2002-11-18 21:39:47 +01:00
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
%%% File : xml_stream.erl
|
|
|
|
%%% Author : Alexey Shchepin <alexey@sevcom.net>
|
2004-12-03 23:54:02 +01:00
|
|
|
%%% Purpose : Parse XML streams
|
2002-11-18 21:39:47 +01:00
|
|
|
%%% Created : 17 Nov 2002 by Alexey Shchepin <alexey@sevcom.net>
|
|
|
|
%%% Id : $Id$
|
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
|
|
|
|
-module(xml_stream).
|
|
|
|
-author('alexey@sevcom.net').
|
|
|
|
-vsn('$Revision$ ').
|
|
|
|
|
2006-04-13 04:08:24 +02:00
|
|
|
-export([new/1,
|
2006-04-22 05:35:13 +02:00
|
|
|
new/2,
|
2004-12-03 23:54:02 +01:00
|
|
|
parse/2,
|
2004-12-05 21:54:55 +01:00
|
|
|
close/1,
|
|
|
|
parse_element/1]).
|
2002-11-18 21:39:47 +01:00
|
|
|
|
2003-10-20 20:23:30 +02:00
|
|
|
-define(XML_START, 0).
|
|
|
|
-define(XML_END, 1).
|
|
|
|
-define(XML_CDATA, 2).
|
|
|
|
-define(XML_ERROR, 3).
|
|
|
|
|
2004-12-01 23:48:53 +01:00
|
|
|
-define(PARSE_COMMAND, 0).
|
2004-12-05 21:54:55 +01:00
|
|
|
-define(PARSE_FINAL_COMMAND, 1).
|
2004-12-01 23:48:53 +01:00
|
|
|
|
2006-04-22 05:35:13 +02:00
|
|
|
-record(xml_stream_state, {callback_pid, port, stack, size, maxsize}).
|
2004-12-03 23:54:02 +01:00
|
|
|
|
2002-11-18 21:39:47 +01:00
|
|
|
process_data(CallbackPid, Stack, Data) ->
|
|
|
|
case Data of
|
2003-10-20 20:23:30 +02:00
|
|
|
{?XML_START, {Name, Attrs}} ->
|
2004-08-26 23:47:33 +02:00
|
|
|
if
|
|
|
|
Stack == [] ->
|
2006-03-14 05:26:15 +01:00
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
|
|
|
{xmlstreamstart, Name, Attrs});
|
2004-08-26 23:47:33 +02:00
|
|
|
true ->
|
|
|
|
ok
|
2002-11-18 21:39:47 +01:00
|
|
|
end,
|
|
|
|
[{xmlelement, Name, Attrs, []} | Stack];
|
2003-10-20 20:23:30 +02:00
|
|
|
{?XML_END, EndName} ->
|
2002-11-18 21:39:47 +01:00
|
|
|
case Stack of
|
|
|
|
[{xmlelement, Name, Attrs, Els} | Tail] ->
|
|
|
|
NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)},
|
2004-08-26 23:47:33 +02:00
|
|
|
case Tail of
|
|
|
|
[] ->
|
2006-03-14 05:26:15 +01:00
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
|
|
|
{xmlstreamend, EndName}),
|
2002-11-18 21:39:47 +01:00
|
|
|
Tail;
|
2004-08-26 23:47:33 +02:00
|
|
|
[_] ->
|
2006-03-14 05:26:15 +01:00
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
|
|
|
{xmlstreamelement, NewEl}),
|
2004-08-26 23:47:33 +02:00
|
|
|
Tail;
|
|
|
|
[{xmlelement, Name1, Attrs1, Els1} | Tail1] ->
|
|
|
|
[{xmlelement, Name1, Attrs1, [NewEl | Els1]} |
|
|
|
|
Tail1]
|
2002-11-18 21:39:47 +01:00
|
|
|
end
|
|
|
|
end;
|
2003-10-20 20:23:30 +02:00
|
|
|
{?XML_CDATA, CData} ->
|
2004-08-26 23:47:33 +02:00
|
|
|
case Stack of
|
|
|
|
[El] ->
|
|
|
|
[El];
|
2007-07-30 12:06:49 +02:00
|
|
|
%% Merge CDATA nodes if they are contiguous
|
|
|
|
%% This does not change the semantic: the split in
|
|
|
|
%% several CDATA nodes depends on the TCP/IP packet
|
|
|
|
%% fragmentation
|
|
|
|
[{xmlelement, Name, Attrs,
|
|
|
|
[{xmlcdata, PreviousCData}|Els]} | Tail] ->
|
|
|
|
[{xmlelement, Name, Attrs,
|
|
|
|
[{xmlcdata, concat_binary([PreviousCData, CData])} | Els]} | Tail];
|
|
|
|
%% No previous CDATA
|
2004-08-26 23:47:33 +02:00
|
|
|
[{xmlelement, Name, Attrs, Els} | Tail] ->
|
|
|
|
[{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} |
|
|
|
|
Tail];
|
|
|
|
[] -> []
|
|
|
|
end;
|
|
|
|
{?XML_ERROR, Err} ->
|
2006-03-14 05:26:15 +01:00
|
|
|
catch gen_fsm:send_event(CallbackPid, {xmlstreamerror, Err})
|
2002-11-18 21:39:47 +01:00
|
|
|
end.
|
|
|
|
|
|
|
|
|
2004-12-03 23:54:02 +01:00
|
|
|
new(CallbackPid) ->
|
2006-04-22 05:35:13 +02:00
|
|
|
new(CallbackPid, infinity).
|
|
|
|
|
|
|
|
new(CallbackPid, MaxSize) ->
|
2004-12-03 23:54:02 +01:00
|
|
|
Port = open_port({spawn, expat_erl}, [binary]),
|
|
|
|
#xml_stream_state{callback_pid = CallbackPid,
|
|
|
|
port = Port,
|
2006-04-22 05:35:13 +02:00
|
|
|
stack = [],
|
|
|
|
size = 0,
|
|
|
|
maxsize = MaxSize}.
|
2004-12-03 23:54:02 +01:00
|
|
|
|
|
|
|
|
|
|
|
parse(#xml_stream_state{callback_pid = CallbackPid,
|
|
|
|
port = Port,
|
2006-04-22 05:35:13 +02:00
|
|
|
stack = Stack,
|
|
|
|
size = Size,
|
|
|
|
maxsize = MaxSize} = State, Str) ->
|
|
|
|
StrSize = if
|
|
|
|
is_list(Str) -> length(Str);
|
|
|
|
is_binary(Str) -> size(Str)
|
|
|
|
end,
|
2004-12-03 23:54:02 +01:00
|
|
|
Res = port_control(Port, ?PARSE_COMMAND, Str),
|
2006-04-22 05:35:13 +02:00
|
|
|
{NewStack, NewSize} =
|
|
|
|
lists:foldl(
|
|
|
|
fun(Data, {St, Sz}) ->
|
|
|
|
NewSt = process_data(CallbackPid, St, Data),
|
|
|
|
case NewSt of
|
|
|
|
[_] -> {NewSt, 0};
|
|
|
|
_ -> {NewSt, Sz}
|
|
|
|
end
|
|
|
|
end, {Stack, Size + StrSize}, binary_to_term(Res)),
|
|
|
|
if
|
|
|
|
NewSize > MaxSize ->
|
|
|
|
catch gen_fsm:send_event(CallbackPid,
|
|
|
|
{xmlstreamerror, "XML stanza is too big"});
|
|
|
|
true ->
|
|
|
|
ok
|
|
|
|
end,
|
|
|
|
State#xml_stream_state{stack = NewStack, size = NewSize}.
|
2004-12-03 23:54:02 +01:00
|
|
|
|
|
|
|
close(#xml_stream_state{port = Port}) ->
|
|
|
|
port_close(Port).
|
2004-12-05 21:54:55 +01:00
|
|
|
|
|
|
|
|
|
|
|
parse_element(Str) ->
|
|
|
|
Port = open_port({spawn, expat_erl}, [binary]),
|
|
|
|
Res = port_control(Port, ?PARSE_FINAL_COMMAND, Str),
|
|
|
|
port_close(Port),
|
|
|
|
process_element_events(binary_to_term(Res)).
|
|
|
|
|
|
|
|
process_element_events(Events) ->
|
|
|
|
process_element_events(Events, []).
|
|
|
|
|
|
|
|
process_element_events([], _Stack) ->
|
|
|
|
{error, parse_error};
|
|
|
|
process_element_events([Event | Events], Stack) ->
|
|
|
|
case Event of
|
|
|
|
{?XML_START, {Name, Attrs}} ->
|
|
|
|
process_element_events(
|
|
|
|
Events, [{xmlelement, Name, Attrs, []} | Stack]);
|
|
|
|
{?XML_END, _EndName} ->
|
|
|
|
case Stack of
|
|
|
|
[{xmlelement, Name, Attrs, Els} | Tail] ->
|
|
|
|
NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)},
|
|
|
|
case Tail of
|
|
|
|
[] ->
|
|
|
|
if
|
|
|
|
Events == [] ->
|
|
|
|
NewEl;
|
|
|
|
true ->
|
|
|
|
{error, parse_error}
|
|
|
|
end;
|
|
|
|
[{xmlelement, Name1, Attrs1, Els1} | Tail1] ->
|
|
|
|
process_element_events(
|
|
|
|
Events,
|
|
|
|
[{xmlelement, Name1, Attrs1, [NewEl | Els1]} |
|
|
|
|
Tail1])
|
|
|
|
end
|
|
|
|
end;
|
|
|
|
{?XML_CDATA, CData} ->
|
|
|
|
case Stack of
|
|
|
|
[{xmlelement, Name, Attrs, Els} | Tail] ->
|
|
|
|
process_element_events(
|
|
|
|
Events,
|
|
|
|
[{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} |
|
|
|
|
Tail]);
|
|
|
|
[] ->
|
|
|
|
process_element_events(Events, [])
|
|
|
|
end;
|
|
|
|
{?XML_ERROR, Err} ->
|
|
|
|
{error, Err}
|
|
|
|
end.
|
|
|
|
|