* src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s

after a remote server crash (EJAB-540)
* src/ejabberd_s2s_in.erl: Likewise
* src/ejabberd_s2s.erl: Likewise
* doc/guide.tex: Likewise

SVN Revision: 1296
This commit is contained in:
Badlop 2008-04-18 12:19:45 +00:00
parent 8d23ef3ed0
commit 2e12fd2b11
5 changed files with 70 additions and 3 deletions

View File

@ -1,3 +1,11 @@
2008-04-18 Badlop <badlop@process-one.net>
* src/ejabberd_s2s_out.erl: Fix long timeout when reconnecting s2s
after a remote server crash (EJAB-540)
* src/ejabberd_s2s_in.erl: Likewise
* src/ejabberd_s2s.erl: Likewise
* doc/guide.tex: Likewise
2008-04-16 Badlop <badlop@process-one.net>
* doc/guide.tex: Clarification: PEP is enabled in default config

View File

@ -786,6 +786,9 @@ There are some additional global options:
Defines if incoming and outgoing s2s connections with a specific remote host are allowed or denied.
This allows to restrict ejabberd to only stablish s2s connections
with a small list of trusted servers, or to block some specific servers.
\titem{\{s2s\_max\_retry\_delay, Seconds\}} \ind{options!s2s\_max\_retry\_delay}
The maximum allowed delay for retry to connect after a failed connection attempt.
Specified in seconds. The default value is 300 seconds (5 minutes).
\end{description}
For example, the following simple configuration defines:

View File

@ -34,6 +34,7 @@
route/3,
have_connection/1,
has_key/2,
get_connections_pids/1,
try_register/1,
remove_connection/3,
dirty_get_connections/0,
@ -108,6 +109,14 @@ has_key(FromTo, Key) ->
true
end.
get_connections_pids(FromTo) ->
case catch mnesia:dirty_read(s2s, FromTo) of
L when is_list(L) ->
[Connection#s2s.pid || Connection <- L];
_ ->
[]
end.
try_register(FromTo) ->
Key = randoms:get_string(),
MaxS2SConnectionsNumber = max_s2s_connections_number(FromTo),

View File

@ -352,6 +352,7 @@ stream_established({xmlstreamelement, El}, StateData) ->
case {ejabberd_s2s:allow_host(To, From),
lists:member(LTo, ejabberd_router:dirty_get_all_domains())} of
{true, true} ->
ejabberd_s2s_out:terminate_if_waiting_delay(To, From),
ejabberd_s2s_out:start(To, From,
{verify, self(),
Key, StateData#state.streamid}),

View File

@ -33,6 +33,7 @@
-export([start/3,
start_link/3,
start_connection/1,
terminate_if_waiting_delay/2,
stop_connection/1]).
%% p1_fsm callbacks (same as gen_fsm)
@ -67,6 +68,7 @@
db_enabled = true,
try_auth = true,
myname, server, queue,
delay_to_retry = undefined_delay,
new = false, verify = false,
timer}).
@ -92,6 +94,10 @@
%% -define(FSMLIMITS, [{max_queue, 2000}]).
-define(FSMTIMEOUT, 5000).
%% Maximum delay to wait before retrying to connect after a failed attempt.
%% Specified in miliseconds. Default value is 5 minutes.
-define(MAX_RETRY_DELAY, 300000).
-define(STREAM_HEADER,
"<?xml version='1.0'?>"
"<stream:stream "
@ -217,7 +223,7 @@ open_socket(init, StateData) ->
{error, _Reason} ->
?INFO_MSG("s2s connection: ~s -> ~s (remote server not found)",
[StateData#state.myname, StateData#state.server]),
wait_before_reconnect(StateData, 300000)
wait_before_reconnect(StateData)
%%{stop, normal, StateData}
end;
open_socket(stop, StateData) ->
@ -768,6 +774,12 @@ handle_info({timeout, Timer, _}, _StateName,
?INFO_MSG("Closing connection with ~s: timeout", [StateData#state.server]),
{stop, normal, StateData};
handle_info(terminate_if_waiting_before_retry, wait_before_retry, StateData) ->
{stop, normal, StateData};
handle_info(terminate_if_waiting_before_retry, StateName, StateData) ->
{next_state, StateName, StateData, get_timeout_interval(StateName)};
handle_info(_, StateName, StateData) ->
{next_state, StateName, StateData, get_timeout_interval(StateName)}.
@ -989,7 +1001,7 @@ log_s2s_out(false, _, _) -> ok;
log_s2s_out(_, Myname, Server) ->
?INFO_MSG("Trying to open s2s connection: ~s -> ~s",[Myname, Server]).
%% Calcultate timeout depending on which state we are in:
%% Calculate timeout depending on which state we are in:
%% Can return integer > 0 | infinity
get_timeout_interval(StateName) ->
case StateName of
@ -1005,11 +1017,45 @@ get_timeout_interval(StateName) ->
%% This function is intended to be called at the end of a state
%% function that want to wait for a reconnect delay before stopping.
wait_before_reconnect(StateData, Delay) ->
wait_before_reconnect(StateData) ->
%% bounce queue manage by process and Erlang message queue
bounce_queue(StateData#state.queue, ?ERR_REMOTE_SERVER_NOT_FOUND),
bounce_messages(?ERR_REMOTE_SERVER_NOT_FOUND),
cancel_timer(StateData#state.timer),
Delay = case StateData#state.delay_to_retry of
undefined_delay ->
%% The initial delay is random between 1 and 15 seconds
%% Return a random integer between 1000 and 15000
{_, _, MicroSecs} = now(),
(MicroSecs rem 14000) + 1000;
D1 ->
%% Duplicate the delay with each successive failed
%% reconnection attempt, but don't exceed the max
lists:min([D1 * 2, get_max_retry_delay()])
end,
Timer = erlang:start_timer(Delay, self(), []),
{next_state, wait_before_retry, StateData#state{timer=Timer,
delay_to_retry = Delay,
queue = queue:new()}}.
%% @doc Get the maximum allowed delay for retry to reconnect (in miliseconds).
%% The default value is 5 minutes.
%% The option {s2s_max_retry_delay, Seconds} can be used (in seconds).
%% @spec () -> integer()
get_max_retry_delay() ->
case ejabberd_config:get_local_option(s2s_max_retry_delay) of
Seconds when is_integer(Seconds) ->
Seconds*1000;
_ ->
?MAX_RETRY_DELAY
end.
%% Terminate s2s_out connections that are in state wait_before_retry
terminate_if_waiting_delay(From, To) ->
FromTo = {From, To},
Pids = ejabberd_s2s:get_connections_pids(FromTo),
lists:foreach(
fun(Pid) ->
Pid ! terminate_if_waiting_before_retry
end,
Pids).