25
1
mirror of https://github.com/processone/ejabberd.git synced 2024-11-28 16:34:13 +01:00

* src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s

after a remote server crash (EJAB-540)
* src/ejabberd_s2s_in.erl: Likewise
* src/ejabberd_s2s.erl: Likewise
* doc/guide.tex: Likewise

SVN Revision: 1296
This commit is contained in:
Badlop 2008-04-18 12:19:45 +00:00
parent 981a4a9cd8
commit ee6aae8211
5 changed files with 70 additions and 3 deletions

View File

@ -1,3 +1,11 @@
2008-04-18 Badlop <badlop@process-one.net>
* src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s
after a remote server crash (EJAB-540)
* src/ejabberd_s2s_in.erl: Likewise
* src/ejabberd_s2s.erl: Likewise
* doc/guide.tex: Likewise
2008-04-16 Badlop <badlop@process-one.net> 2008-04-16 Badlop <badlop@process-one.net>
* doc/guide.tex: Clarification: PEP is enabled in default config * doc/guide.tex: Clarification: PEP is enabled in default config

View File

@ -786,6 +786,9 @@ There are some additional global options:
Defines if incoming and outgoing s2s connections with a specific remote host are allowed or denied. Defines if incoming and outgoing s2s connections with a specific remote host are allowed or denied.
This allows to restrict ejabberd to only stablish s2s connections This allows to restrict ejabberd to only stablish s2s connections
with a small list of trusted servers, or to block some specific servers. with a small list of trusted servers, or to block some specific servers.
\titem{\{s2s\_max\_retry\_delay, Seconds\}} \ind{options!s2s\_max\_retry\_delay}
The maximum allowed delay for retry to connect after a failed connection attempt.
Specified in seconds. The default value is 300 seconds (5 minutes).
\end{description} \end{description}
For example, the following simple configuration defines: For example, the following simple configuration defines:

View File

@ -34,6 +34,7 @@
route/3, route/3,
have_connection/1, have_connection/1,
has_key/2, has_key/2,
get_connections_pids/1,
try_register/1, try_register/1,
remove_connection/3, remove_connection/3,
dirty_get_connections/0, dirty_get_connections/0,
@ -108,6 +109,14 @@ has_key(FromTo, Key) ->
true true
end. end.
get_connections_pids(FromTo) ->
case catch mnesia:dirty_read(s2s, FromTo) of
L when is_list(L) ->
[Connection#s2s.pid || Connection <- L];
_ ->
[]
end.
try_register(FromTo) -> try_register(FromTo) ->
Key = randoms:get_string(), Key = randoms:get_string(),
MaxS2SConnectionsNumber = max_s2s_connections_number(FromTo), MaxS2SConnectionsNumber = max_s2s_connections_number(FromTo),

View File

@ -352,6 +352,7 @@ stream_established({xmlstreamelement, El}, StateData) ->
case {ejabberd_s2s:allow_host(To, From), case {ejabberd_s2s:allow_host(To, From),
lists:member(LTo, ejabberd_router:dirty_get_all_domains())} of lists:member(LTo, ejabberd_router:dirty_get_all_domains())} of
{true, true} -> {true, true} ->
ejabberd_s2s_out:terminate_if_waiting_delay(To, From),
ejabberd_s2s_out:start(To, From, ejabberd_s2s_out:start(To, From,
{verify, self(), {verify, self(),
Key, StateData#state.streamid}), Key, StateData#state.streamid}),

View File

@ -33,6 +33,7 @@
-export([start/3, -export([start/3,
start_link/3, start_link/3,
start_connection/1, start_connection/1,
terminate_if_waiting_delay/2,
stop_connection/1]). stop_connection/1]).
%% p1_fsm callbacks (same as gen_fsm) %% p1_fsm callbacks (same as gen_fsm)
@ -67,6 +68,7 @@
db_enabled = true, db_enabled = true,
try_auth = true, try_auth = true,
myname, server, queue, myname, server, queue,
delay_to_retry = undefined_delay,
new = false, verify = false, new = false, verify = false,
timer}). timer}).
@ -92,6 +94,10 @@
%% -define(FSMLIMITS, [{max_queue, 2000}]). %% -define(FSMLIMITS, [{max_queue, 2000}]).
-define(FSMTIMEOUT, 5000). -define(FSMTIMEOUT, 5000).
%% Maximum delay to wait before retrying to connect after a failed attempt.
%% Specified in miliseconds. Default value is 5 minutes.
-define(MAX_RETRY_DELAY, 300000).
-define(STREAM_HEADER, -define(STREAM_HEADER,
"<?xml version='1.0'?>" "<?xml version='1.0'?>"
"<stream:stream " "<stream:stream "
@ -217,7 +223,7 @@ open_socket(init, StateData) ->
{error, _Reason} -> {error, _Reason} ->
?INFO_MSG("s2s connection: ~s -> ~s (remote server not found)", ?INFO_MSG("s2s connection: ~s -> ~s (remote server not found)",
[StateData#state.myname, StateData#state.server]), [StateData#state.myname, StateData#state.server]),
wait_before_reconnect(StateData, 300000) wait_before_reconnect(StateData)
%%{stop, normal, StateData} %%{stop, normal, StateData}
end; end;
open_socket(stop, StateData) -> open_socket(stop, StateData) ->
@ -768,6 +774,12 @@ handle_info({timeout, Timer, _}, _StateName,
?INFO_MSG("Closing connection with ~s: timeout", [StateData#state.server]), ?INFO_MSG("Closing connection with ~s: timeout", [StateData#state.server]),
{stop, normal, StateData}; {stop, normal, StateData};
handle_info(terminate_if_waiting_before_retry, wait_before_retry, StateData) ->
{stop, normal, StateData};
handle_info(terminate_if_waiting_before_retry, StateName, StateData) ->
{next_state, StateName, StateData, get_timeout_interval(StateName)};
handle_info(_, StateName, StateData) -> handle_info(_, StateName, StateData) ->
{next_state, StateName, StateData, get_timeout_interval(StateName)}. {next_state, StateName, StateData, get_timeout_interval(StateName)}.
@ -989,7 +1001,7 @@ log_s2s_out(false, _, _) -> ok;
log_s2s_out(_, Myname, Server) -> log_s2s_out(_, Myname, Server) ->
?INFO_MSG("Trying to open s2s connection: ~s -> ~s",[Myname, Server]). ?INFO_MSG("Trying to open s2s connection: ~s -> ~s",[Myname, Server]).
%% Calcultate timeout depending on which state we are in: %% Calculate timeout depending on which state we are in:
%% Can return integer > 0 | infinity %% Can return integer > 0 | infinity
get_timeout_interval(StateName) -> get_timeout_interval(StateName) ->
case StateName of case StateName of
@ -1005,11 +1017,45 @@ get_timeout_interval(StateName) ->
%% This function is intended to be called at the end of a state %% This function is intended to be called at the end of a state
%% function that want to wait for a reconnect delay before stopping. %% function that want to wait for a reconnect delay before stopping.
wait_before_reconnect(StateData, Delay) -> wait_before_reconnect(StateData) ->
%% bounce queue manage by process and Erlang message queue %% bounce queue manage by process and Erlang message queue
bounce_queue(StateData#state.queue, ?ERR_REMOTE_SERVER_NOT_FOUND), bounce_queue(StateData#state.queue, ?ERR_REMOTE_SERVER_NOT_FOUND),
bounce_messages(?ERR_REMOTE_SERVER_NOT_FOUND), bounce_messages(?ERR_REMOTE_SERVER_NOT_FOUND),
cancel_timer(StateData#state.timer), cancel_timer(StateData#state.timer),
Delay = case StateData#state.delay_to_retry of
undefined_delay ->
%% The initial delay is random between 1 and 15 seconds
%% Return a random integer between 1000 and 15000
{_, _, MicroSecs} = now(),
(MicroSecs rem 14000) + 1000;
D1 ->
%% Duplicate the delay with each successive failed
%% reconnection attempt, but don't exceed the max
lists:min([D1 * 2, get_max_retry_delay()])
end,
Timer = erlang:start_timer(Delay, self(), []), Timer = erlang:start_timer(Delay, self(), []),
{next_state, wait_before_retry, StateData#state{timer=Timer, {next_state, wait_before_retry, StateData#state{timer=Timer,
delay_to_retry = Delay,
queue = queue:new()}}. queue = queue:new()}}.
%% @doc Get the maximum allowed delay for retry to reconnect (in miliseconds).
%% The default value is 5 minutes.
%% The option {s2s_max_retry_delay, Seconds} can be used (in seconds).
%% @spec () -> integer()
get_max_retry_delay() ->
case ejabberd_config:get_local_option(s2s_max_retry_delay) of
Seconds when is_integer(Seconds) ->
Seconds*1000;
_ ->
?MAX_RETRY_DELAY
end.
%% Terminate s2s_out connections that are in state wait_before_retry
terminate_if_waiting_delay(From, To) ->
FromTo = {From, To},
Pids = ejabberd_s2s:get_connections_pids(FromTo),
lists:foreach(
fun(Pid) ->
Pid ! terminate_if_waiting_before_retry
end,
Pids).