mirror of
https://github.com/processone/ejabberd.git
synced 2024-10-13 15:16:49 +02:00
* src/xml.erl: Better escaping management with CDATA. We only add CDATA enclosure when needed. CDATA end token is properly escaped.
SVN Revision: 844
This commit is contained in:
parent
f6095694b1
commit
bdb2c6820f
@ -1,5 +1,8 @@
|
||||
2007-07-30 Mickael Remond <mickael.remond@process-one.net>
|
||||
|
||||
* src/xml.erl: Better escaping management with CDATA. We only add
|
||||
CDATA enclosure when needed. CDATA end token is properly escaped.
|
||||
|
||||
* src/xml.erl: Only wrap xmldata nodes in xml cdata "tag" if
|
||||
bigger than 50 bytes. Shorter xmlcdata nodes will be escaped.
|
||||
|
||||
|
76
src/xml.erl
76
src/xml.erl
@ -20,9 +20,6 @@
|
||||
get_path_s/2,
|
||||
replace_tag_attr/3]).
|
||||
|
||||
%% XML CDATA bigger than this will be enclosed in CDATA XML "tag"
|
||||
-define(CDATA_BINARY_THRESHOLD, 50).
|
||||
|
||||
element_to_string(El) ->
|
||||
case El of
|
||||
{xmlelement, Name, Attrs, Els} ->
|
||||
@ -35,11 +32,9 @@ element_to_string(El) ->
|
||||
[$<, Name, attrs_to_list(Attrs), $/, $>]
|
||||
end;
|
||||
%% We do not crypt CDATA binary, but we enclose it in XML CDATA
|
||||
%% if they are long enough to be worth it.
|
||||
{xmlcdata, CData} when binary(CData), size(CData) > ?CDATA_BINARY_THRESHOLD ->
|
||||
CDATA1 = <<"<![CDATA[">>,
|
||||
CDATA2 = <<"]]>">>,
|
||||
concat_binary([CDATA1, CData, CDATA2]);
|
||||
{xmlcdata, CData}
|
||||
when binary(CData) ->
|
||||
make_text_node(CData);
|
||||
%% We crypt list and short binaries (implies a conversion to
|
||||
%% list).
|
||||
{xmlcdata, CData} ->
|
||||
@ -63,6 +58,71 @@ crypt(S) when is_list(S) ->
|
||||
end || C <- S];
|
||||
crypt(S) when is_binary(S) ->
|
||||
crypt(binary_to_list(S)).
|
||||
|
||||
%% Make a cdata_binary depending on what characters it contains
|
||||
make_text_node(CData) ->
|
||||
case cdata_need_escape(CData) of
|
||||
cdata ->
|
||||
CDATA1 = <<"<![CDATA[">>,
|
||||
CDATA2 = <<"]]>">>,
|
||||
concat_binary([CDATA1, CData, CDATA2]);
|
||||
none ->
|
||||
CData;
|
||||
{cdata, EndTokens} ->
|
||||
EscapedCData = escape_cdata(CData, EndTokens),
|
||||
concat_binary(EscapedCData)
|
||||
end.
|
||||
|
||||
%% Returns escape type needed for the text node
|
||||
%% none, cdata, {cdata, [Positions]}
|
||||
%% Positions is a list a integer containing positions of CDATA end
|
||||
%% tokens, so that they can be escaped
|
||||
cdata_need_escape(CData) ->
|
||||
cdata_need_escape(CData, 0, false, []).
|
||||
cdata_need_escape(<<>>, _, false, _) ->
|
||||
none;
|
||||
cdata_need_escape(<<>>, _, true, []) ->
|
||||
cdata;
|
||||
cdata_need_escape(<<>>, _, true, CDataEndTokens) ->
|
||||
{cdata, lists:reverse(CDataEndTokens)};
|
||||
cdata_need_escape(<<$],$],$>,Rest/binary>>, CurrentPosition,
|
||||
_XMLEscape, CDataEndTokens) ->
|
||||
NewPosition = CurrentPosition + 3,
|
||||
cdata_need_escape(Rest, NewPosition, true,
|
||||
[CurrentPosition+1|CDataEndTokens]);
|
||||
%% Only <, & need to be escaped in XML text node
|
||||
%% See reference: http://www.w3.org/TR/xml11/#syntax
|
||||
cdata_need_escape(<<$<,Rest/binary>>, CurrentPosition,
|
||||
_XMLEscape, CDataEndTokens) ->
|
||||
cdata_need_escape(Rest, CurrentPosition+1, true, CDataEndTokens);
|
||||
cdata_need_escape(<<$&,Rest/binary>>, CurrentPosition,
|
||||
_XMLEscape, CDataEndTokens) ->
|
||||
cdata_need_escape(Rest, CurrentPosition+1, true, CDataEndTokens);
|
||||
cdata_need_escape(<<_:8,Rest/binary>>, CurrentPosition,
|
||||
XMLEscape, CDataEndTokens) ->
|
||||
cdata_need_escape(Rest, CurrentPosition+1, XMLEscape,
|
||||
CDataEndTokens).
|
||||
|
||||
%% escape cdata that contain CDATA end tokens
|
||||
%% EndTokens is a list of position of end tokens (integer)
|
||||
%% This is supposed to be a very rare case: You need to generate several
|
||||
%% fields, splitting it in the middle of the end token.
|
||||
%% See example: http://en.wikipedia.org/wiki/CDATA#Uses_of_CDATA_sections
|
||||
escape_cdata(CData, EndTokens) ->
|
||||
escape_cdata(CData, 0, EndTokens, []).
|
||||
escape_cdata(<<>>, _CurrentPosition, [], Acc) ->
|
||||
lists:reverse(Acc);
|
||||
escape_cdata(Rest, CurrentPosition, [], Acc) ->
|
||||
CDATA1 = <<"<![CDATA[">>,
|
||||
CDATA2 = <<"]]>">>,
|
||||
escape_cdata(<<>>, CurrentPosition, [], [CDATA2, Rest, CDATA1|Acc]);
|
||||
escape_cdata(CData, Index, [Pos|Positions], Acc) ->
|
||||
CDATA1 = <<"<![CDATA[">>,
|
||||
CDATA2 = <<"]]>">>,
|
||||
Split = Pos-Index,
|
||||
{Part, Rest} = split_binary(CData, Split+1),
|
||||
%% Note: We build the list in reverse to optimize construction
|
||||
escape_cdata(Rest, Pos+1, Positions, [CDATA2, Part, CDATA1|Acc]).
|
||||
|
||||
remove_cdata_p({xmlelement, _Name, _Attrs, _Els}) -> true;
|
||||
remove_cdata_p(_) -> false.
|
||||
|
Loading…
Reference in New Issue
Block a user