From 2e12fd2b11952ee3fc28db1c37a92ccd7a646b30 Mon Sep 17 00:00:00 2001 From: Badlop Date: Fri, 18 Apr 2008 12:19:45 +0000 Subject: [PATCH] * src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s after a remote server crash (EJAB-540) * src/ejabberd_s2s_in.erl: Likewise * src/ejabberd_s2s.erl: Likewise * doc/guide.tex: Likewise SVN Revision: 1296 --- ChangeLog | 8 +++++++ doc/guide.tex | 3 +++ src/ejabberd_s2s.erl | 9 +++++++ src/ejabberd_s2s_in.erl | 1 + src/ejabberd_s2s_out.erl | 52 +++++++++++++++++++++++++++++++++++++--- 5 files changed, 70 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3be079095..0063a4d1b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2008-04-18 Badlop + + * src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s + after a remote server crash (EJAB-540) + * src/ejabberd_s2s_in.erl: Likewise + * src/ejabberd_s2s.erl: Likewise + * doc/guide.tex: Likewise + 2008-04-16 Badlop * doc/guide.tex: Clarification: PEP is enabled in default config diff --git a/doc/guide.tex b/doc/guide.tex index 634bfdcc3..94dca9f21 100644 --- a/doc/guide.tex +++ b/doc/guide.tex @@ -786,6 +786,9 @@ There are some additional global options: Defines if incoming and outgoing s2s connections with a specific remote host are allowed or denied. This allows to restrict ejabberd to only stablish s2s connections with a small list of trusted servers, or to block some specific servers. + \titem{\{s2s\_max\_retry\_delay, Seconds\}} \ind{options!s2s\_max\_retry\_delay} + The maximum allowed delay for retry to connect after a failed connection attempt. + Specified in seconds. The default value is 300 seconds (5 minutes). \end{description} For example, the following simple configuration defines: diff --git a/src/ejabberd_s2s.erl b/src/ejabberd_s2s.erl index c72af5aec..b500388c5 100644 --- a/src/ejabberd_s2s.erl +++ b/src/ejabberd_s2s.erl @@ -34,6 +34,7 @@ route/3, have_connection/1, has_key/2, + get_connections_pids/1, try_register/1, remove_connection/3, dirty_get_connections/0, @@ -108,6 +109,14 @@ has_key(FromTo, Key) -> true end. +get_connections_pids(FromTo) -> + case catch mnesia:dirty_read(s2s, FromTo) of + L when is_list(L) -> + [Connection#s2s.pid || Connection <- L]; + _ -> + [] + end. + try_register(FromTo) -> Key = randoms:get_string(), MaxS2SConnectionsNumber = max_s2s_connections_number(FromTo), diff --git a/src/ejabberd_s2s_in.erl b/src/ejabberd_s2s_in.erl index 623339845..cd57e3d1e 100644 --- a/src/ejabberd_s2s_in.erl +++ b/src/ejabberd_s2s_in.erl @@ -352,6 +352,7 @@ stream_established({xmlstreamelement, El}, StateData) -> case {ejabberd_s2s:allow_host(To, From), lists:member(LTo, ejabberd_router:dirty_get_all_domains())} of {true, true} -> + ejabberd_s2s_out:terminate_if_waiting_delay(To, From), ejabberd_s2s_out:start(To, From, {verify, self(), Key, StateData#state.streamid}), diff --git a/src/ejabberd_s2s_out.erl b/src/ejabberd_s2s_out.erl index 8eddc41eb..e5f8ed254 100644 --- a/src/ejabberd_s2s_out.erl +++ b/src/ejabberd_s2s_out.erl @@ -33,6 +33,7 @@ -export([start/3, start_link/3, start_connection/1, + terminate_if_waiting_delay/2, stop_connection/1]). %% p1_fsm callbacks (same as gen_fsm) @@ -67,6 +68,7 @@ db_enabled = true, try_auth = true, myname, server, queue, + delay_to_retry = undefined_delay, new = false, verify = false, timer}). @@ -92,6 +94,10 @@ %% -define(FSMLIMITS, [{max_queue, 2000}]). -define(FSMTIMEOUT, 5000). +%% Maximum delay to wait before retrying to connect after a failed attempt. +%% Specified in miliseconds. Default value is 5 minutes. +-define(MAX_RETRY_DELAY, 300000). + -define(STREAM_HEADER, "" " {error, _Reason} -> ?INFO_MSG("s2s connection: ~s -> ~s (remote server not found)", [StateData#state.myname, StateData#state.server]), - wait_before_reconnect(StateData, 300000) + wait_before_reconnect(StateData) %%{stop, normal, StateData} end; open_socket(stop, StateData) -> @@ -768,6 +774,12 @@ handle_info({timeout, Timer, _}, _StateName, ?INFO_MSG("Closing connection with ~s: timeout", [StateData#state.server]), {stop, normal, StateData}; +handle_info(terminate_if_waiting_before_retry, wait_before_retry, StateData) -> + {stop, normal, StateData}; + +handle_info(terminate_if_waiting_before_retry, StateName, StateData) -> + {next_state, StateName, StateData, get_timeout_interval(StateName)}; + handle_info(_, StateName, StateData) -> {next_state, StateName, StateData, get_timeout_interval(StateName)}. @@ -989,7 +1001,7 @@ log_s2s_out(false, _, _) -> ok; log_s2s_out(_, Myname, Server) -> ?INFO_MSG("Trying to open s2s connection: ~s -> ~s",[Myname, Server]). -%% Calcultate timeout depending on which state we are in: +%% Calculate timeout depending on which state we are in: %% Can return integer > 0 | infinity get_timeout_interval(StateName) -> case StateName of @@ -1005,11 +1017,45 @@ get_timeout_interval(StateName) -> %% This function is intended to be called at the end of a state %% function that want to wait for a reconnect delay before stopping. -wait_before_reconnect(StateData, Delay) -> +wait_before_reconnect(StateData) -> %% bounce queue manage by process and Erlang message queue bounce_queue(StateData#state.queue, ?ERR_REMOTE_SERVER_NOT_FOUND), bounce_messages(?ERR_REMOTE_SERVER_NOT_FOUND), cancel_timer(StateData#state.timer), + Delay = case StateData#state.delay_to_retry of + undefined_delay -> + %% The initial delay is random between 1 and 15 seconds + %% Return a random integer between 1000 and 15000 + {_, _, MicroSecs} = now(), + (MicroSecs rem 14000) + 1000; + D1 -> + %% Duplicate the delay with each successive failed + %% reconnection attempt, but don't exceed the max + lists:min([D1 * 2, get_max_retry_delay()]) + end, Timer = erlang:start_timer(Delay, self(), []), {next_state, wait_before_retry, StateData#state{timer=Timer, + delay_to_retry = Delay, queue = queue:new()}}. + +%% @doc Get the maximum allowed delay for retry to reconnect (in miliseconds). +%% The default value is 5 minutes. +%% The option {s2s_max_retry_delay, Seconds} can be used (in seconds). +%% @spec () -> integer() +get_max_retry_delay() -> + case ejabberd_config:get_local_option(s2s_max_retry_delay) of + Seconds when is_integer(Seconds) -> + Seconds*1000; + _ -> + ?MAX_RETRY_DELAY + end. + +%% Terminate s2s_out connections that are in state wait_before_retry +terminate_if_waiting_delay(From, To) -> + FromTo = {From, To}, + Pids = ejabberd_s2s:get_connections_pids(FromTo), + lists:foreach( + fun(Pid) -> + Pid ! terminate_if_waiting_before_retry + end, + Pids).