2011-12-08 12:39:58 +01:00
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
%%% File : ejabberd_regexp.erl
|
|
|
|
%%% Author : Badlop
|
2020-06-02 22:52:21 +02:00
|
|
|
%%% Purpose : Frontend to Re OTP module
|
2011-12-08 12:39:58 +01:00
|
|
|
%%% Created : 8 Dec 2011 by Badlop
|
|
|
|
%%%
|
|
|
|
%%%
|
2022-02-10 17:21:43 +01:00
|
|
|
%%% ejabberd, Copyright (C) 2002-2022 ProcessOne
|
2011-12-08 12:39:58 +01:00
|
|
|
%%%
|
|
|
|
%%% This program is free software; you can redistribute it and/or
|
|
|
|
%%% modify it under the terms of the GNU General Public License as
|
|
|
|
%%% published by the Free Software Foundation; either version 2 of the
|
|
|
|
%%% License, or (at your option) any later version.
|
|
|
|
%%%
|
|
|
|
%%% This program is distributed in the hope that it will be useful,
|
|
|
|
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
%%% General Public License for more details.
|
|
|
|
%%%
|
2014-02-22 11:27:40 +01:00
|
|
|
%%% You should have received a copy of the GNU General Public License along
|
|
|
|
%%% with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2011-12-08 12:39:58 +01:00
|
|
|
%%%
|
|
|
|
%%%----------------------------------------------------------------------
|
|
|
|
|
|
|
|
-module(ejabberd_regexp).
|
2013-03-14 10:33:02 +01:00
|
|
|
|
2020-06-02 22:52:21 +02:00
|
|
|
-export([run/2, split/2, replace/3, greplace/3, sh_to_awk/1]).
|
2011-12-08 12:39:58 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec run(binary(), binary()) -> match | nomatch | {error, any()}.
|
|
|
|
|
2011-12-08 12:39:58 +01:00
|
|
|
run(String, Regexp) ->
|
2020-06-02 22:52:21 +02:00
|
|
|
re:run(String, Regexp, [{capture, none}, unicode]).
|
2011-12-08 12:39:58 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec split(binary(), binary()) -> [binary()].
|
|
|
|
|
2011-12-08 12:39:58 +01:00
|
|
|
split(String, Regexp) ->
|
2020-06-02 22:52:21 +02:00
|
|
|
re:split(String, Regexp, [{return, binary}]).
|
2011-12-08 12:39:58 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec replace(binary(), binary(), binary()) -> binary().
|
|
|
|
|
2011-12-08 12:39:58 +01:00
|
|
|
replace(String, Regexp, New) ->
|
2020-06-02 22:52:21 +02:00
|
|
|
re:replace(String, Regexp, New, [{return, binary}]).
|
2011-12-08 12:39:58 +01:00
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec greplace(binary(), binary(), binary()) -> binary().
|
|
|
|
|
2011-12-08 12:39:58 +01:00
|
|
|
greplace(String, Regexp, New) ->
|
2020-06-02 22:52:21 +02:00
|
|
|
re:replace(String, Regexp, New, [global, {return, binary}]).
|
2018-01-15 10:31:06 +01:00
|
|
|
|
|
|
|
%% This code was copied and adapted from xmerl_regexp.erl
|
|
|
|
|
2013-03-14 10:33:02 +01:00
|
|
|
-spec sh_to_awk(binary()) -> binary().
|
2018-01-15 10:31:06 +01:00
|
|
|
sh_to_awk(Sh) ->
|
|
|
|
iolist_to_binary([<<"^(">>, sh_to_awk_1(Sh)]). %Fix the beginning
|
|
|
|
|
|
|
|
sh_to_awk_1(<<"*", Sh/binary>>) -> %This matches any string
|
|
|
|
[<<".*">>, sh_to_awk_1(Sh)];
|
|
|
|
sh_to_awk_1(<<"?", Sh/binary>>) -> %This matches any character
|
|
|
|
[$., sh_to_awk_1(Sh)];
|
|
|
|
sh_to_awk_1(<<"[^]", Sh/binary>>) -> %This takes careful handling
|
|
|
|
[<<"\\^">>, sh_to_awk_1(Sh)];
|
|
|
|
%% Must move '^' to end.
|
|
|
|
sh_to_awk_1(<<"[^", Sh/binary>>) ->
|
|
|
|
[$[, sh_to_awk_2(Sh, true)];
|
|
|
|
sh_to_awk_1(<<"[!", Sh/binary>>) ->
|
|
|
|
[<<"[^">>, sh_to_awk_2(Sh, false)];
|
|
|
|
sh_to_awk_1(<<"[", Sh/binary>>) ->
|
|
|
|
[$[, sh_to_awk_2(Sh, false)];
|
|
|
|
sh_to_awk_1(<<C:8, Sh/binary>>) -> %% Unspecialise everything else which is not an escape character.
|
|
|
|
case sh_special_char(C) of
|
|
|
|
true -> [$\\,C|sh_to_awk_1(Sh)];
|
|
|
|
false -> [C|sh_to_awk_1(Sh)]
|
|
|
|
end;
|
|
|
|
sh_to_awk_1(<<>>) ->
|
|
|
|
<<")$">>. %Fix the end
|
|
|
|
|
|
|
|
sh_to_awk_2(<<"]", Sh/binary>>, UpArrow) ->
|
|
|
|
[$]|sh_to_awk_3(Sh, UpArrow)];
|
|
|
|
sh_to_awk_2(Sh, UpArrow) ->
|
|
|
|
sh_to_awk_3(Sh, UpArrow).
|
|
|
|
|
|
|
|
sh_to_awk_3(<<"]", Sh/binary>>, true) ->
|
|
|
|
[<<"^]">>, sh_to_awk_1(Sh)];
|
|
|
|
sh_to_awk_3(<<"]", Sh/binary>>, false) ->
|
|
|
|
[$]|sh_to_awk_1(Sh)];
|
|
|
|
sh_to_awk_3(<<C:8, Sh/binary>>, UpArrow) ->
|
|
|
|
[C|sh_to_awk_3(Sh, UpArrow)];
|
|
|
|
sh_to_awk_3(<<>>, true) ->
|
2019-06-14 11:33:26 +02:00
|
|
|
[$^|sh_to_awk_1(<<>>)];
|
2018-01-15 10:31:06 +01:00
|
|
|
sh_to_awk_3(<<>>, false) ->
|
2019-06-14 11:33:26 +02:00
|
|
|
sh_to_awk_1(<<>>).
|
2018-01-15 10:31:06 +01:00
|
|
|
|
|
|
|
%% Test if a character is a special character.
|
2019-06-14 11:33:26 +02:00
|
|
|
-spec sh_special_char(char()) -> boolean().
|
2018-01-15 10:31:06 +01:00
|
|
|
sh_special_char($|) -> true;
|
|
|
|
sh_special_char($*) -> true;
|
|
|
|
sh_special_char($+) -> true;
|
|
|
|
sh_special_char($?) -> true;
|
|
|
|
sh_special_char($() -> true;
|
|
|
|
sh_special_char($)) -> true;
|
|
|
|
sh_special_char($\\) -> true;
|
|
|
|
sh_special_char($^) -> true;
|
|
|
|
sh_special_char($$) -> true;
|
|
|
|
sh_special_char($.) -> true;
|
|
|
|
sh_special_char($[) -> true;
|
|
|
|
sh_special_char($]) -> true;
|
|
|
|
sh_special_char($") -> true;
|
|
|
|
sh_special_char(_C) -> false.
|