2004-04-10 21:15:02 +02:00
|
|
|
%%%----------------------------------------------------------------------
|
2015-09-02 15:02:46 +02:00
|
|
|
%%% File : ejabberd_idna.erl
|
2007-12-24 12:41:41 +01:00
|
|
|
%%% Author : Alexey Shchepin <alexey@process-one.net>
|
2004-04-10 21:15:02 +02:00
|
|
|
%%% Purpose : Support for IDNA (RFC3490)
|
2007-12-24 12:41:41 +01:00
|
|
|
%%% Created : 10 Apr 2004 by Alexey Shchepin <alexey@process-one.net>
|
|
|
|
%%%
|
|
|
|
%%%
|
2016-01-13 12:29:14 +01:00
|
|
|
%%% ejabberd, Copyright (C) 2002-2016 ProcessOne
|
2007-12-24 12:41:41 +01:00
|
|
|
%%%
|
|
|
|
%%% This program is free software; you can redistribute it and/or
|
|
|
|
%%% modify it under the terms of the GNU General Public License as
|
|
|
|
%%% published by the Free Software Foundation; either version 2 of the
|
|
|
|
%%% License, or (at your option) any later version.
|
|
|
|
%%%
|
|
|
|
%%% This program is distributed in the hope that it will be useful,
|
|
|
|
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
%%% General Public License for more details.
|
2009-01-12 15:44:42 +01:00
|
|
|
%%%
|
2014-02-22 11:27:40 +01:00
|
|
|
%%% You should have received a copy of the GNU General Public License along
|
|
|
|
%%% with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2007-12-24 12:41:41 +01:00
|
|
|
%%%
|
2004-04-10 21:15:02 +02:00
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
|
2015-09-02 15:02:46 +02:00
|
|
|
-module(ejabberd_idna).
|
2013-03-14 10:33:02 +01:00
|
|
|
|
2007-12-24 12:41:41 +01:00
|
|
|
-author('alexey@process-one.net').
|
2004-04-10 21:15:02 +02:00
|
|
|
|
|
|
|
-export([domain_utf8_to_ascii/1,
|
2013-03-14 10:33:02 +01:00
|
|
|
domain_ucs2_to_ascii/1,
|
|
|
|
utf8_to_ucs2/1]).
|
2004-04-10 21:15:02 +02:00
|
|
|
|
2013-06-27 18:45:04 +02:00
|
|
|
-ifdef(TEST).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
-endif.
|
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec domain_utf8_to_ascii(binary()) -> false | binary().
|
2004-04-10 21:15:02 +02:00
|
|
|
|
|
|
|
domain_utf8_to_ascii(Domain) ->
|
|
|
|
domain_ucs2_to_ascii(utf8_to_ucs2(Domain)).
|
|
|
|
|
|
|
|
utf8_to_ucs2(S) ->
|
2013-06-27 18:45:04 +02:00
|
|
|
utf8_to_ucs2(binary_to_list(S), "").
|
2004-04-10 21:15:02 +02:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
utf8_to_ucs2([], R) -> lists:reverse(R);
|
|
|
|
utf8_to_ucs2([C | S], R) when C < 128 ->
|
2004-04-10 21:15:02 +02:00
|
|
|
utf8_to_ucs2(S, [C | R]);
|
2013-03-14 10:33:02 +01:00
|
|
|
utf8_to_ucs2([C1, C2 | S], R) when C1 < 224 ->
|
|
|
|
utf8_to_ucs2(S, [C1 band 31 bsl 6 bor C2 band 63 | R]);
|
|
|
|
utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 240 ->
|
|
|
|
utf8_to_ucs2(S,
|
|
|
|
[C1 band 15 bsl 12 bor (C2 band 63 bsl 6) bor C3 band 63
|
|
|
|
| R]).
|
2004-04-10 21:15:02 +02:00
|
|
|
|
2013-06-27 18:45:04 +02:00
|
|
|
-spec domain_ucs2_to_ascii(list()) -> false | binary().
|
2004-04-10 21:15:02 +02:00
|
|
|
|
|
|
|
domain_ucs2_to_ascii(Domain) ->
|
2013-06-27 18:45:04 +02:00
|
|
|
case catch domain_ucs2_to_ascii1(Domain) of
|
2013-03-14 10:33:02 +01:00
|
|
|
{'EXIT', _Reason} -> false;
|
|
|
|
Res -> iolist_to_binary(Res)
|
2004-04-10 21:15:02 +02:00
|
|
|
end.
|
|
|
|
|
|
|
|
domain_ucs2_to_ascii1(Domain) ->
|
2013-03-14 10:33:02 +01:00
|
|
|
Parts = string:tokens(Domain,
|
|
|
|
[46, 12290, 65294, 65377]),
|
|
|
|
ASCIIParts = lists:map(fun (P) -> to_ascii(P) end,
|
|
|
|
Parts),
|
|
|
|
string:strip(lists:flatmap(fun (P) -> [$. | P] end,
|
|
|
|
ASCIIParts),
|
2004-04-10 21:15:02 +02:00
|
|
|
left, $.).
|
|
|
|
|
2007-12-07 00:15:04 +01:00
|
|
|
%% Domain names are already nameprep'ed in ejabberd, so we skiping this step
|
2004-04-10 21:15:02 +02:00
|
|
|
to_ascii(Name) ->
|
2013-03-14 10:33:02 +01:00
|
|
|
false = lists:any(fun (C)
|
|
|
|
when (0 =< C) and (C =< 44) or
|
|
|
|
(46 =< C) and (C =< 47)
|
|
|
|
or (58 =< C) and (C =< 64)
|
|
|
|
or (91 =< C) and (C =< 96)
|
|
|
|
or (123 =< C) and (C =< 127) ->
|
|
|
|
true;
|
|
|
|
(_) -> false
|
|
|
|
end,
|
|
|
|
Name),
|
2004-04-10 21:15:02 +02:00
|
|
|
case Name of
|
2013-03-14 10:33:02 +01:00
|
|
|
[H | _] when H /= $- -> true = lists:last(Name) /= $-
|
2004-04-10 21:15:02 +02:00
|
|
|
end,
|
2013-03-14 10:33:02 +01:00
|
|
|
ASCIIName = case lists:any(fun (C) -> C > 127 end, Name)
|
|
|
|
of
|
|
|
|
true ->
|
|
|
|
true = case Name of
|
|
|
|
"xn--" ++ _ -> false;
|
|
|
|
_ -> true
|
|
|
|
end,
|
|
|
|
"xn--" ++ punycode_encode(Name);
|
|
|
|
false -> Name
|
2004-04-10 21:15:02 +02:00
|
|
|
end,
|
|
|
|
L = length(ASCIIName),
|
|
|
|
true = (1 =< L) and (L =< 63),
|
|
|
|
ASCIIName.
|
|
|
|
|
|
|
|
%%% PUNYCODE (RFC3492)
|
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-define(BASE, 36).
|
|
|
|
|
|
|
|
-define(TMIN, 1).
|
|
|
|
|
|
|
|
-define(TMAX, 26).
|
|
|
|
|
|
|
|
-define(SKEW, 38).
|
|
|
|
|
|
|
|
-define(DAMP, 700).
|
|
|
|
|
2004-04-10 21:15:02 +02:00
|
|
|
-define(INITIAL_BIAS, 72).
|
2013-03-14 10:33:02 +01:00
|
|
|
|
|
|
|
-define(INITIAL_N, 128).
|
2004-04-10 21:15:02 +02:00
|
|
|
|
|
|
|
punycode_encode(Input) ->
|
2013-03-14 10:33:02 +01:00
|
|
|
N = (?INITIAL_N),
|
2004-04-10 21:15:02 +02:00
|
|
|
Delta = 0,
|
2013-03-14 10:33:02 +01:00
|
|
|
Bias = (?INITIAL_BIAS),
|
|
|
|
Basic = lists:filter(fun (C) -> C =< 127 end, Input),
|
|
|
|
NonBasic = lists:filter(fun (C) -> C > 127 end, Input),
|
2004-04-10 21:15:02 +02:00
|
|
|
L = length(Input),
|
|
|
|
B = length(Basic),
|
|
|
|
SNonBasic = lists:usort(NonBasic),
|
2013-03-14 10:33:02 +01:00
|
|
|
Output1 = if B > 0 -> Basic ++ "-";
|
|
|
|
true -> ""
|
2007-12-07 00:15:04 +01:00
|
|
|
end,
|
2013-03-14 10:33:02 +01:00
|
|
|
Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N,
|
|
|
|
Delta, Bias, ""),
|
2004-04-10 21:15:02 +02:00
|
|
|
Output1 ++ Output2.
|
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
punycode_encode1(Input, [M | SNonBasic], B, H, L, N,
|
|
|
|
Delta, Bias, Out)
|
|
|
|
when H < L ->
|
2004-04-10 21:15:02 +02:00
|
|
|
Delta1 = Delta + (M - N) * (H + 1),
|
2013-03-14 10:33:02 +01:00
|
|
|
% let n = m
|
|
|
|
{NewDelta, NewBias, NewH, NewOut} = lists:foldl(fun (C,
|
|
|
|
{ADelta, ABias, AH,
|
|
|
|
AOut}) ->
|
|
|
|
if C < M ->
|
|
|
|
{ADelta + 1,
|
|
|
|
ABias, AH,
|
|
|
|
AOut};
|
|
|
|
C == M ->
|
|
|
|
NewOut =
|
|
|
|
punycode_encode_delta(ADelta,
|
|
|
|
ABias,
|
|
|
|
AOut),
|
|
|
|
NewBias =
|
|
|
|
adapt(ADelta,
|
|
|
|
H +
|
|
|
|
1,
|
|
|
|
H
|
|
|
|
==
|
|
|
|
B),
|
|
|
|
{0, NewBias,
|
|
|
|
AH + 1,
|
|
|
|
NewOut};
|
|
|
|
true ->
|
|
|
|
{ADelta,
|
|
|
|
ABias, AH,
|
|
|
|
AOut}
|
|
|
|
end
|
|
|
|
end,
|
|
|
|
{Delta1, Bias, H, Out},
|
|
|
|
Input),
|
|
|
|
punycode_encode1(Input, SNonBasic, B, NewH, L, M + 1,
|
|
|
|
NewDelta + 1, NewBias, NewOut);
|
|
|
|
punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N,
|
|
|
|
_Delta, _Bias, Out) ->
|
2004-04-10 21:15:02 +02:00
|
|
|
lists:reverse(Out).
|
|
|
|
|
|
|
|
punycode_encode_delta(Delta, Bias, Out) ->
|
|
|
|
punycode_encode_delta(Delta, Bias, Out, ?BASE).
|
|
|
|
|
|
|
|
punycode_encode_delta(Delta, Bias, Out, K) ->
|
2013-03-14 10:33:02 +01:00
|
|
|
T = if K =< Bias -> ?TMIN;
|
|
|
|
K >= Bias + (?TMAX) -> ?TMAX;
|
|
|
|
true -> K - Bias
|
2004-04-10 21:15:02 +02:00
|
|
|
end,
|
2013-03-14 10:33:02 +01:00
|
|
|
if Delta < T -> [codepoint(Delta) | Out];
|
|
|
|
true ->
|
|
|
|
C = T + (Delta - T) rem ((?BASE) - T),
|
|
|
|
punycode_encode_delta((Delta - T) div ((?BASE) - T),
|
|
|
|
Bias, [codepoint(C) | Out], K + (?BASE))
|
2004-04-10 21:15:02 +02:00
|
|
|
end.
|
|
|
|
|
|
|
|
adapt(Delta, NumPoints, FirstTime) ->
|
2013-03-14 10:33:02 +01:00
|
|
|
Delta1 = if FirstTime -> Delta div (?DAMP);
|
|
|
|
true -> Delta div 2
|
2004-04-10 21:15:02 +02:00
|
|
|
end,
|
2013-03-14 10:33:02 +01:00
|
|
|
Delta2 = Delta1 + Delta1 div NumPoints,
|
2004-04-10 21:15:02 +02:00
|
|
|
adapt1(Delta2, 0).
|
|
|
|
|
|
|
|
adapt1(Delta, K) ->
|
2013-03-14 10:33:02 +01:00
|
|
|
if Delta > ((?BASE) - (?TMIN)) * (?TMAX) div 2 ->
|
|
|
|
adapt1(Delta div ((?BASE) - (?TMIN)), K + (?BASE));
|
|
|
|
true ->
|
|
|
|
K +
|
|
|
|
((?BASE) - (?TMIN) + 1) * Delta div (Delta + (?SKEW))
|
2004-04-10 21:15:02 +02:00
|
|
|
end.
|
2007-12-07 00:15:04 +01:00
|
|
|
|
2004-04-10 21:15:02 +02:00
|
|
|
codepoint(C) ->
|
2013-03-14 10:33:02 +01:00
|
|
|
if (0 =< C) and (C =< 25) -> C + 97;
|
|
|
|
(26 =< C) and (C =< 35) -> C + 22
|
2004-04-10 21:15:02 +02:00
|
|
|
end.
|
2013-06-27 18:45:04 +02:00
|
|
|
|
|
|
|
%%%===================================================================
|
|
|
|
%%% Unit tests
|
|
|
|
%%%===================================================================
|
|
|
|
-ifdef(TEST).
|
|
|
|
|
|
|
|
acsii_test() ->
|
|
|
|
?assertEqual(<<"test.org">>, domain_utf8_to_ascii(<<"test.org">>)).
|
|
|
|
|
|
|
|
utf8_test() ->
|
|
|
|
?assertEqual(
|
|
|
|
<<"xn--d1acufc.xn--p1ai">>,
|
|
|
|
domain_utf8_to_ascii(
|
|
|
|
<<208,180,208,190,208,188,208,181,208,189,46,209,128,209,132>>)).
|
|
|
|
|
|
|
|
-endif.
|