2002-11-20 21:19:20 +01:00
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
%%% File : xml.erl
|
|
|
|
%%% Author : Alexey Shchepin <alexey@sevcom.net>
|
|
|
|
%%% Purpose : XML utils
|
|
|
|
%%% Created : 20 Nov 2002 by Alexey Shchepin <alexey@sevcom.net>
|
|
|
|
%%% Id : $Id$
|
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
|
|
|
|
-module(xml).
|
|
|
|
-author('alexey@sevcom.net').
|
|
|
|
-vsn('$Revision$ ').
|
|
|
|
|
2003-01-26 21:16:53 +01:00
|
|
|
-export([element_to_string/1,
|
|
|
|
crypt/1,
|
|
|
|
remove_cdata/1,
|
2002-12-08 18:23:21 +01:00
|
|
|
get_cdata/1, get_tag_cdata/1,
|
2003-01-01 20:54:44 +01:00
|
|
|
get_attr/2, get_attr_s/2,
|
2003-01-02 22:01:12 +01:00
|
|
|
get_tag_attr/2, get_tag_attr_s/2,
|
2007-06-05 03:50:28 +02:00
|
|
|
get_subtag/2, get_subtag_cdata/2,
|
2004-01-18 21:42:09 +01:00
|
|
|
get_path_s/2,
|
|
|
|
replace_tag_attr/3]).
|
2002-11-20 21:19:20 +01:00
|
|
|
|
2004-04-10 21:15:02 +02:00
|
|
|
element_to_string(El) ->
|
|
|
|
case El of
|
|
|
|
{xmlelement, Name, Attrs, Els} ->
|
|
|
|
if
|
|
|
|
Els /= [] ->
|
|
|
|
[$<, Name, attrs_to_list(Attrs), $>,
|
|
|
|
[element_to_string(E) || E <- Els],
|
|
|
|
$<, $/, Name, $>];
|
|
|
|
true ->
|
|
|
|
[$<, Name, attrs_to_list(Attrs), $/, $>]
|
|
|
|
end;
|
2007-07-30 10:32:47 +02:00
|
|
|
%% We do not crypt CDATA binary, but we enclose it in XML CDATA
|
2007-07-30 19:35:00 +02:00
|
|
|
{xmlcdata, CData}
|
|
|
|
when binary(CData) ->
|
|
|
|
make_text_node(CData);
|
2007-07-30 15:13:59 +02:00
|
|
|
%% We crypt list and short binaries (implies a conversion to
|
|
|
|
%% list).
|
|
|
|
{xmlcdata, CData} ->
|
|
|
|
crypt(CData)
|
2004-04-10 21:15:02 +02:00
|
|
|
end.
|
|
|
|
|
|
|
|
attrs_to_list(Attrs) ->
|
|
|
|
[attr_to_list(A) || A <- Attrs].
|
|
|
|
|
|
|
|
attr_to_list({Name, Value}) ->
|
|
|
|
[$\s, crypt(Name), $=, $', crypt(Value), $'].
|
2003-05-18 18:41:15 +02:00
|
|
|
|
2006-04-06 01:56:16 +02:00
|
|
|
crypt(S) when is_list(S) ->
|
2004-04-10 21:15:02 +02:00
|
|
|
[case C of
|
|
|
|
$& -> "&";
|
|
|
|
$< -> "<";
|
|
|
|
$> -> ">";
|
|
|
|
$" -> """;
|
2007-02-04 17:04:40 +01:00
|
|
|
$' -> "'";
|
2004-04-10 21:15:02 +02:00
|
|
|
_ -> C
|
2006-04-06 01:56:16 +02:00
|
|
|
end || C <- S];
|
|
|
|
crypt(S) when is_binary(S) ->
|
|
|
|
crypt(binary_to_list(S)).
|
2007-07-30 19:35:00 +02:00
|
|
|
|
|
|
|
%% Make a cdata_binary depending on what characters it contains
|
|
|
|
make_text_node(CData) ->
|
|
|
|
case cdata_need_escape(CData) of
|
|
|
|
cdata ->
|
|
|
|
CDATA1 = <<"<![CDATA[">>,
|
|
|
|
CDATA2 = <<"]]>">>,
|
|
|
|
concat_binary([CDATA1, CData, CDATA2]);
|
|
|
|
none ->
|
|
|
|
CData;
|
|
|
|
{cdata, EndTokens} ->
|
|
|
|
EscapedCData = escape_cdata(CData, EndTokens),
|
|
|
|
concat_binary(EscapedCData)
|
|
|
|
end.
|
|
|
|
|
|
|
|
%% Returns escape type needed for the text node
|
|
|
|
%% none, cdata, {cdata, [Positions]}
|
|
|
|
%% Positions is a list a integer containing positions of CDATA end
|
|
|
|
%% tokens, so that they can be escaped
|
|
|
|
cdata_need_escape(CData) ->
|
|
|
|
cdata_need_escape(CData, 0, false, []).
|
|
|
|
cdata_need_escape(<<>>, _, false, _) ->
|
|
|
|
none;
|
|
|
|
cdata_need_escape(<<>>, _, true, []) ->
|
|
|
|
cdata;
|
|
|
|
cdata_need_escape(<<>>, _, true, CDataEndTokens) ->
|
|
|
|
{cdata, lists:reverse(CDataEndTokens)};
|
|
|
|
cdata_need_escape(<<$],$],$>,Rest/binary>>, CurrentPosition,
|
|
|
|
_XMLEscape, CDataEndTokens) ->
|
|
|
|
NewPosition = CurrentPosition + 3,
|
|
|
|
cdata_need_escape(Rest, NewPosition, true,
|
|
|
|
[CurrentPosition+1|CDataEndTokens]);
|
|
|
|
%% Only <, & need to be escaped in XML text node
|
|
|
|
%% See reference: http://www.w3.org/TR/xml11/#syntax
|
|
|
|
cdata_need_escape(<<$<,Rest/binary>>, CurrentPosition,
|
|
|
|
_XMLEscape, CDataEndTokens) ->
|
|
|
|
cdata_need_escape(Rest, CurrentPosition+1, true, CDataEndTokens);
|
|
|
|
cdata_need_escape(<<$&,Rest/binary>>, CurrentPosition,
|
|
|
|
_XMLEscape, CDataEndTokens) ->
|
|
|
|
cdata_need_escape(Rest, CurrentPosition+1, true, CDataEndTokens);
|
|
|
|
cdata_need_escape(<<_:8,Rest/binary>>, CurrentPosition,
|
|
|
|
XMLEscape, CDataEndTokens) ->
|
|
|
|
cdata_need_escape(Rest, CurrentPosition+1, XMLEscape,
|
|
|
|
CDataEndTokens).
|
|
|
|
|
|
|
|
%% escape cdata that contain CDATA end tokens
|
|
|
|
%% EndTokens is a list of position of end tokens (integer)
|
|
|
|
%% This is supposed to be a very rare case: You need to generate several
|
|
|
|
%% fields, splitting it in the middle of the end token.
|
|
|
|
%% See example: http://en.wikipedia.org/wiki/CDATA#Uses_of_CDATA_sections
|
|
|
|
escape_cdata(CData, EndTokens) ->
|
|
|
|
escape_cdata(CData, 0, EndTokens, []).
|
|
|
|
escape_cdata(<<>>, _CurrentPosition, [], Acc) ->
|
|
|
|
lists:reverse(Acc);
|
|
|
|
escape_cdata(Rest, CurrentPosition, [], Acc) ->
|
|
|
|
CDATA1 = <<"<![CDATA[">>,
|
|
|
|
CDATA2 = <<"]]>">>,
|
|
|
|
escape_cdata(<<>>, CurrentPosition, [], [CDATA2, Rest, CDATA1|Acc]);
|
|
|
|
escape_cdata(CData, Index, [Pos|Positions], Acc) ->
|
|
|
|
CDATA1 = <<"<![CDATA[">>,
|
|
|
|
CDATA2 = <<"]]>">>,
|
|
|
|
Split = Pos-Index,
|
|
|
|
{Part, Rest} = split_binary(CData, Split+1),
|
|
|
|
%% Note: We build the list in reverse to optimize construction
|
|
|
|
escape_cdata(Rest, Pos+1, Positions, [CDATA2, Part, CDATA1|Acc]).
|
2003-01-26 21:16:53 +01:00
|
|
|
|
2007-07-30 11:09:24 +02:00
|
|
|
remove_cdata_p({xmlelement, _Name, _Attrs, _Els}) -> true;
|
2003-01-09 20:59:16 +01:00
|
|
|
remove_cdata_p(_) -> false.
|
2002-11-20 21:19:20 +01:00
|
|
|
|
2003-01-09 20:59:16 +01:00
|
|
|
remove_cdata(L) -> [E || E <- L, remove_cdata_p(E)].
|
|
|
|
|
2002-11-20 21:19:20 +01:00
|
|
|
get_cdata(L) ->
|
2006-04-06 01:56:16 +02:00
|
|
|
binary_to_list(list_to_binary(get_cdata(L, ""))).
|
2002-11-20 21:19:20 +01:00
|
|
|
|
|
|
|
get_cdata([{xmlcdata, CData} | L], S) ->
|
2006-04-06 01:56:16 +02:00
|
|
|
get_cdata(L, [S, CData]);
|
2002-11-20 21:19:20 +01:00
|
|
|
get_cdata([_ | L], S) ->
|
|
|
|
get_cdata(L, S);
|
|
|
|
get_cdata([], S) ->
|
|
|
|
S.
|
2002-12-08 18:23:21 +01:00
|
|
|
|
2007-07-30 11:09:24 +02:00
|
|
|
get_tag_cdata({xmlelement, _Name, _Attrs, Els}) ->
|
2002-12-08 18:23:21 +01:00
|
|
|
get_cdata(Els).
|
2002-11-20 21:19:20 +01:00
|
|
|
|
|
|
|
get_attr(AttrName, Attrs) ->
|
|
|
|
case lists:keysearch(AttrName, 1, Attrs) of
|
|
|
|
{value, {_, Val}} ->
|
|
|
|
{value, Val};
|
|
|
|
_ ->
|
|
|
|
false
|
|
|
|
end.
|
|
|
|
|
|
|
|
get_attr_s(AttrName, Attrs) ->
|
|
|
|
case lists:keysearch(AttrName, 1, Attrs) of
|
|
|
|
{value, {_, Val}} ->
|
|
|
|
Val;
|
|
|
|
_ ->
|
|
|
|
""
|
|
|
|
end.
|
|
|
|
|
2007-07-30 11:09:24 +02:00
|
|
|
get_tag_attr(AttrName, {xmlelement, _Name, Attrs, _Els}) ->
|
2003-01-01 20:54:44 +01:00
|
|
|
get_attr(AttrName, Attrs).
|
|
|
|
|
2007-07-30 11:09:24 +02:00
|
|
|
get_tag_attr_s(AttrName, {xmlelement, _Name, Attrs, _Els}) ->
|
2003-01-01 20:54:44 +01:00
|
|
|
get_attr_s(AttrName, Attrs).
|
2002-11-24 21:36:57 +01:00
|
|
|
|
2003-01-02 22:01:12 +01:00
|
|
|
|
|
|
|
get_subtag({xmlelement, _, _, Els}, Name) ->
|
|
|
|
get_subtag1(Els, Name).
|
|
|
|
|
|
|
|
get_subtag1([El | Els], Name) ->
|
|
|
|
case El of
|
|
|
|
{xmlelement, Name, _, _} ->
|
|
|
|
El;
|
|
|
|
_ ->
|
|
|
|
get_subtag1(Els, Name)
|
|
|
|
end;
|
|
|
|
get_subtag1([], _) ->
|
|
|
|
false.
|
|
|
|
|
2007-06-05 03:50:28 +02:00
|
|
|
get_subtag_cdata(Tag, Name) ->
|
|
|
|
case get_subtag(Tag, Name) of
|
|
|
|
false ->
|
|
|
|
"";
|
|
|
|
Subtag ->
|
|
|
|
get_tag_cdata(Subtag)
|
|
|
|
end.
|
2003-01-02 22:01:12 +01:00
|
|
|
|
|
|
|
get_path_s(El, []) ->
|
|
|
|
El;
|
|
|
|
get_path_s(El, [{elem, Name} | Path]) ->
|
|
|
|
case get_subtag(El, Name) of
|
|
|
|
false ->
|
|
|
|
"";
|
|
|
|
SubEl ->
|
|
|
|
get_path_s(SubEl, Path)
|
|
|
|
end;
|
|
|
|
get_path_s(El, [{attr, Name}]) ->
|
|
|
|
get_tag_attr_s(Name, El);
|
|
|
|
get_path_s(El, [cdata]) ->
|
|
|
|
get_tag_cdata(El).
|
|
|
|
|
2004-01-18 21:42:09 +01:00
|
|
|
|
|
|
|
replace_tag_attr(Attr, Value, {xmlelement, Name, Attrs, Els}) ->
|
|
|
|
Attrs1 = lists:keydelete(Attr, 1, Attrs),
|
|
|
|
Attrs2 = [{Attr, Value} | Attrs1],
|
|
|
|
{xmlelement, Name, Attrs2, Els}.
|
|
|
|
|
|
|
|
|