2002-11-18 21:39:47 +01:00
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
%%% File : xml_stream.erl
|
2007-12-24 13:58:05 +01:00
|
|
|
%%% Author : Alexey Shchepin <alexey@process-one.net>
|
2004-12-03 23:54:02 +01:00
|
|
|
%%% Purpose : Parse XML streams
|
2007-12-24 13:58:05 +01:00
|
|
|
%%% Created : 17 Nov 2002 by Alexey Shchepin <alexey@process-one.net>
|
|
|
|
%%%
|
|
|
|
%%%
|
2013-01-24 15:25:13 +01:00
|
|
|
%%% ejabberd, Copyright (C) 2002-2013 ProcessOne
|
2007-12-24 13:58:05 +01:00
|
|
|
%%%
|
|
|
|
%%% This program is free software; you can redistribute it and/or
|
|
|
|
%%% modify it under the terms of the GNU General Public License as
|
|
|
|
%%% published by the Free Software Foundation; either version 2 of the
|
|
|
|
%%% License, or (at your option) any later version.
|
|
|
|
%%%
|
|
|
|
%%% This program is distributed in the hope that it will be useful,
|
|
|
|
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
%%% General Public License for more details.
|
2009-01-12 15:44:42 +01:00
|
|
|
%%%
|
2007-12-24 13:58:05 +01:00
|
|
|
%%% You should have received a copy of the GNU General Public License
|
|
|
|
%%% along with this program; if not, write to the Free Software
|
|
|
|
%%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
|
|
%%% 02111-1307 USA
|
|
|
|
%%%
|
2002-11-18 21:39:47 +01:00
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
|
|
|
|
-module(xml_stream).
|
2013-03-14 10:33:02 +01:00
|
|
|
|
2007-12-24 13:58:05 +01:00
|
|
|
-author('alexey@process-one.net').
|
2002-11-18 21:39:47 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-export([new/1, new/2, parse/2, close/1,
|
2004-12-05 21:54:55 +01:00
|
|
|
parse_element/1]).
|
2002-11-18 21:39:47 +01:00
|
|
|
|
2003-10-20 20:23:30 +02:00
|
|
|
-define(XML_START, 0).
|
2013-03-14 10:33:02 +01:00
|
|
|
|
|
|
|
-define(XML_END, 1).
|
|
|
|
|
2003-10-20 20:23:30 +02:00
|
|
|
-define(XML_CDATA, 2).
|
2013-03-14 10:33:02 +01:00
|
|
|
|
2003-10-20 20:23:30 +02:00
|
|
|
-define(XML_ERROR, 3).
|
|
|
|
|
2004-12-01 23:48:53 +01:00
|
|
|
-define(PARSE_COMMAND, 0).
|
2013-03-14 10:33:02 +01:00
|
|
|
|
2004-12-05 21:54:55 +01:00
|
|
|
-define(PARSE_FINAL_COMMAND, 1).
|
2004-12-01 23:48:53 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-record(xml_stream_state,
|
|
|
|
{callback_pid = self() :: pid(),
|
|
|
|
port :: port(),
|
|
|
|
stack = [] :: stack(),
|
|
|
|
size = 0 :: non_neg_integer(),
|
|
|
|
maxsize = infinity :: non_neg_integer() | infinity}).
|
|
|
|
|
|
|
|
-type xml_stream_el() :: {xmlstreamraw, binary()} |
|
|
|
|
{xmlstreamcdata, binary()} |
|
|
|
|
{xmlstreamelement, xmlel()} |
|
|
|
|
{xmlstreamend, binary()} |
|
|
|
|
{xmlstreamstart, binary(), [attr()]} |
|
|
|
|
{xmlstreamerror, binary()}.
|
|
|
|
|
|
|
|
-type xml_stream_state() :: #xml_stream_state{}.
|
|
|
|
-type stack() :: [xmlel()].
|
|
|
|
-type event() :: {?XML_START, {binary(), [attr()]}} |
|
|
|
|
{?XML_END, binary()} |
|
|
|
|
{?XML_CDATA, binary()} |
|
|
|
|
{?XML_ERROR, binary()}.
|
|
|
|
|
|
|
|
-export_type([xml_stream_state/0, xml_stream_el/0]).
|
|
|
|
|
|
|
|
-include("jlib.hrl").
|
2004-12-03 23:54:02 +01:00
|
|
|
|
2002-11-18 21:39:47 +01:00
|
|
|
process_data(CallbackPid, Stack, Data) ->
|
|
|
|
case Data of
|
2003-10-20 20:23:30 +02:00
|
|
|
{?XML_START, {Name, Attrs}} ->
|
2004-08-26 23:47:33 +02:00
|
|
|
if
|
|
|
|
Stack == [] ->
|
2006-03-14 05:26:15 +01:00
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
2012-03-22 21:13:05 +01:00
|
|
|
{xmlstreamstart, Name, Attrs}),
|
|
|
|
%% There is no need to store name or attributes of
|
|
|
|
%% stream opening element as it is not used
|
|
|
|
%% anymore.
|
|
|
|
[xmlstreamstart];
|
2004-08-26 23:47:33 +02:00
|
|
|
true ->
|
2013-03-14 10:33:02 +01:00
|
|
|
[#xmlel{name = Name, attrs = Attrs, children = []} | Stack]
|
2012-03-22 21:13:05 +01:00
|
|
|
end;
|
2003-10-20 20:23:30 +02:00
|
|
|
{?XML_END, EndName} ->
|
2002-11-18 21:39:47 +01:00
|
|
|
case Stack of
|
2012-03-22 21:13:05 +01:00
|
|
|
[xmlstreamstart] ->
|
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
|
|
|
{xmlstreamend, EndName}),
|
|
|
|
[];
|
2013-03-14 10:33:02 +01:00
|
|
|
[#xmlel{name = Name, attrs = Attrs, children = Els}, xmlstreamstart] ->
|
|
|
|
NewEl = #xmlel{name = Name, attrs = Attrs, children = lists:reverse(Els)},
|
2012-03-22 21:13:05 +01:00
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
|
|
|
{xmlstreamelement, NewEl}),
|
|
|
|
[xmlstreamstart];
|
2013-03-14 10:33:02 +01:00
|
|
|
[#xmlel{name = Name, attrs = Attrs, children = Els},
|
|
|
|
#xmlel{name = Name1, attrs = Attrs1, children = Els1} | Tail] ->
|
|
|
|
NewEl = #xmlel{name = Name, attrs = Attrs, children = lists:reverse(Els)},
|
|
|
|
[#xmlel{name = Name1, attrs = Attrs1, children = [NewEl | Els1]} | Tail]
|
2002-11-18 21:39:47 +01:00
|
|
|
end;
|
2003-10-20 20:23:30 +02:00
|
|
|
{?XML_CDATA, CData} ->
|
2004-08-26 23:47:33 +02:00
|
|
|
case Stack of
|
2012-03-22 21:13:05 +01:00
|
|
|
[xmlstreamstart] ->
|
|
|
|
[xmlstreamstart];
|
2007-07-30 12:06:49 +02:00
|
|
|
%% Merge CDATA nodes if they are contiguous
|
|
|
|
%% This does not change the semantic: the split in
|
|
|
|
%% several CDATA nodes depends on the TCP/IP packet
|
|
|
|
%% fragmentation
|
2013-03-14 10:33:02 +01:00
|
|
|
[#xmlel{name = Name, attrs = Attrs,
|
|
|
|
children = [{xmlcdata, PreviousCData} | Els]}
|
|
|
|
| Tail] ->
|
|
|
|
[#xmlel{name = Name, attrs = Attrs,
|
|
|
|
children =
|
|
|
|
[{xmlcdata,
|
|
|
|
iolist_to_binary([PreviousCData, CData])}
|
|
|
|
| Els]}
|
|
|
|
| Tail];
|
2007-07-30 12:06:49 +02:00
|
|
|
%% No previous CDATA
|
2013-03-14 10:33:02 +01:00
|
|
|
[#xmlel{name = Name, attrs = Attrs, children = Els}
|
|
|
|
| Tail] ->
|
|
|
|
[#xmlel{name = Name, attrs = Attrs,
|
|
|
|
children = [{xmlcdata, CData} | Els]}
|
|
|
|
| Tail];
|
2004-08-26 23:47:33 +02:00
|
|
|
[] -> []
|
|
|
|
end;
|
|
|
|
{?XML_ERROR, Err} ->
|
2006-03-14 05:26:15 +01:00
|
|
|
catch gen_fsm:send_event(CallbackPid, {xmlstreamerror, Err})
|
2002-11-18 21:39:47 +01:00
|
|
|
end.
|
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec new(pid()) -> xml_stream_state().
|
|
|
|
|
|
|
|
new(CallbackPid) -> new(CallbackPid, infinity).
|
2002-11-18 21:39:47 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec new(pid(), non_neg_integer() | infinity) -> xml_stream_state().
|
2006-04-22 05:35:13 +02:00
|
|
|
|
|
|
|
new(CallbackPid, MaxSize) ->
|
2011-09-05 07:28:01 +02:00
|
|
|
Port = open_port({spawn, "expat_erl"}, [binary]),
|
2004-12-03 23:54:02 +01:00
|
|
|
#xml_stream_state{callback_pid = CallbackPid,
|
2013-03-14 10:33:02 +01:00
|
|
|
port = Port, stack = [], size = 0, maxsize = MaxSize}.
|
2004-12-03 23:54:02 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec parse(xml_stream_state(), iodata()) -> xml_stream_state().
|
2004-12-03 23:54:02 +01:00
|
|
|
|
|
|
|
parse(#xml_stream_state{callback_pid = CallbackPid,
|
2013-03-14 10:33:02 +01:00
|
|
|
port = Port, stack = Stack, size = Size,
|
|
|
|
maxsize = MaxSize} =
|
|
|
|
State,
|
|
|
|
Str) ->
|
|
|
|
StrSize = byte_size(Str),
|
2004-12-03 23:54:02 +01:00
|
|
|
Res = port_control(Port, ?PARSE_COMMAND, Str),
|
2013-03-14 10:33:02 +01:00
|
|
|
{NewStack, NewSize} = lists:foldl(fun (Data,
|
|
|
|
{St, Sz}) ->
|
|
|
|
NewSt = process_data(CallbackPid,
|
|
|
|
St, Data),
|
|
|
|
case NewSt of
|
|
|
|
[_] -> {NewSt, 0};
|
|
|
|
_ -> {NewSt, Sz}
|
|
|
|
end
|
|
|
|
end,
|
|
|
|
{Stack, Size + StrSize},
|
|
|
|
binary_to_term(Res)),
|
|
|
|
if NewSize > MaxSize ->
|
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
|
|
|
{xmlstreamerror,
|
|
|
|
<<"XML stanza is too big">>});
|
|
|
|
true -> ok
|
2006-04-22 05:35:13 +02:00
|
|
|
end,
|
2013-03-14 10:33:02 +01:00
|
|
|
State#xml_stream_state{stack = NewStack,
|
|
|
|
size = NewSize}.
|
|
|
|
|
|
|
|
-spec close(xml_stream_state()) -> true.
|
2004-12-03 23:54:02 +01:00
|
|
|
|
|
|
|
close(#xml_stream_state{port = Port}) ->
|
|
|
|
port_close(Port).
|
2004-12-05 21:54:55 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec parse_element(iodata()) -> xmlel() |
|
|
|
|
{error, parse_error} |
|
|
|
|
{error, binary()}.
|
2004-12-05 21:54:55 +01:00
|
|
|
|
|
|
|
parse_element(Str) ->
|
2011-09-05 07:28:01 +02:00
|
|
|
Port = open_port({spawn, "expat_erl"}, [binary]),
|
2004-12-05 21:54:55 +01:00
|
|
|
Res = port_control(Port, ?PARSE_FINAL_COMMAND, Str),
|
|
|
|
port_close(Port),
|
|
|
|
process_element_events(binary_to_term(Res)).
|
|
|
|
|
|
|
|
process_element_events(Events) ->
|
|
|
|
process_element_events(Events, []).
|
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec process_element_events([event()], stack()) -> xmlel() |
|
|
|
|
{error, parse_error} |
|
|
|
|
{error, binary()}.
|
|
|
|
|
2004-12-05 21:54:55 +01:00
|
|
|
process_element_events([], _Stack) ->
|
|
|
|
{error, parse_error};
|
|
|
|
process_element_events([Event | Events], Stack) ->
|
|
|
|
case Event of
|
2013-03-14 10:33:02 +01:00
|
|
|
{?XML_START, {Name, Attrs}} ->
|
|
|
|
process_element_events(Events,
|
|
|
|
[#xmlel{name = Name, attrs = Attrs,
|
|
|
|
children = []}
|
|
|
|
| Stack]);
|
|
|
|
{?XML_END, _EndName} ->
|
|
|
|
case Stack of
|
|
|
|
[#xmlel{name = Name, attrs = Attrs, children = Els}
|
|
|
|
| Tail] ->
|
|
|
|
NewEl = #xmlel{name = Name, attrs = Attrs,
|
|
|
|
children = lists:reverse(Els)},
|
|
|
|
case Tail of
|
|
|
|
[] ->
|
|
|
|
if Events == [] -> NewEl;
|
|
|
|
true -> {error, parse_error}
|
|
|
|
end;
|
|
|
|
[#xmlel{name = Name1, attrs = Attrs1, children = Els1}
|
|
|
|
| Tail1] ->
|
|
|
|
process_element_events(Events,
|
|
|
|
[#xmlel{name = Name1,
|
|
|
|
attrs = Attrs1,
|
|
|
|
children = [NewEl | Els1]}
|
|
|
|
| Tail1])
|
|
|
|
end
|
|
|
|
end;
|
|
|
|
{?XML_CDATA, CData} ->
|
|
|
|
case Stack of
|
|
|
|
[#xmlel{name = Name, attrs = Attrs, children = Els}
|
|
|
|
| Tail] ->
|
|
|
|
process_element_events(Events,
|
|
|
|
[#xmlel{name = Name, attrs = Attrs,
|
|
|
|
children =
|
|
|
|
[{xmlcdata, CData} | Els]}
|
|
|
|
| Tail]);
|
|
|
|
[] -> process_element_events(Events, [])
|
|
|
|
end;
|
|
|
|
{?XML_ERROR, Err} -> {error, Err}
|
2004-12-05 21:54:55 +01:00
|
|
|
end.
|