25
1
mirror of https://github.com/processone/ejabberd.git synced 2024-12-22 17:28:25 +01:00

ejabberd_systemd: Avoid using gen_server timeout

Don't (ab)use the gen_server timeout mechanism for pinging the systemd
watchdog.  Under certain conditions (e.g., the process receiving sys
messages), the gen_server timeout might not be triggered as expected.

Fixes #4054, fixes #4058,
This commit is contained in:
Holger Weiss 2023-08-09 01:54:12 +02:00
parent f0db7623d1
commit 6c7e85d3d8

View File

@ -44,9 +44,8 @@
{socket :: gen_udp:socket() | undefined, {socket :: gen_udp:socket() | undefined,
destination :: inet:local_address() | undefined, destination :: inet:local_address() | undefined,
interval :: pos_integer() | undefined, interval :: pos_integer() | undefined,
last_ping :: integer() | undefined}). timer :: reference() | undefined}).
-type watchdog_timeout() :: pos_integer() | hibernate.
-type state() :: #state{}. -type state() :: #state{}.
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
@ -71,8 +70,7 @@ stopping() ->
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
%% gen_server callbacks. %% gen_server callbacks.
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
-spec init(any()) -spec init(any()) -> {ok, state()} | {stop, term()}.
-> {ok, state()} | {ok, state(), watchdog_timeout()} | {stop, term()}.
init(_Opts) -> init(_Opts) ->
process_flag(trap_exit, true), process_flag(trap_exit, true),
case os:getenv("NOTIFY_SOCKET") of case os:getenv("NOTIFY_SOCKET") of
@ -84,17 +82,10 @@ init(_Opts) ->
Destination = {local, Path}, Destination = {local, Path},
case gen_udp:open(0, [local]) of case gen_udp:open(0, [local]) of
{ok, Socket} -> {ok, Socket} ->
Interval = get_watchdog_interval(),
State = #state{socket = Socket, State = #state{socket = Socket,
destination = Destination, destination = Destination,
interval = Interval}, interval = get_watchdog_interval()},
if is_integer(Interval), Interval > 0 -> {ok, maybe_start_timer(State)};
?INFO_MSG("Watchdog notifications enabled", []),
{ok, set_last_ping(State), Interval};
true ->
?INFO_MSG("Watchdog notifications disabled", []),
{ok, State}
end;
{error, Reason} -> {error, Reason} ->
?CRITICAL_MSG("Cannot open IPC socket: ~p", [Reason]), ?CRITICAL_MSG("Cannot open IPC socket: ~p", [Reason]),
{stop, Reason} {stop, Reason}
@ -105,47 +96,48 @@ init(_Opts) ->
end. end.
-spec handle_call(term(), {pid(), term()}, state()) -spec handle_call(term(), {pid(), term()}, state())
-> {reply, {error, badarg}, state(), watchdog_timeout()}. -> {reply, {error, badarg}, state()}.
handle_call(Request, From, State) -> handle_call(Request, From, State) ->
?ERROR_MSG("Got unexpected request from ~p: ~p", [From, Request]), ?ERROR_MSG("Got unexpected request from ~p: ~p", [From, Request]),
{reply, {error, badarg}, State, get_timeout(State)}. {reply, {error, badarg}, State}.
-spec handle_cast({notify, binary()} | term(), state()) -spec handle_cast({notify, binary()} | term(), state()) -> {noreply, state()}.
-> {noreply, state(), watchdog_timeout()}.
handle_cast({notify, Notification}, handle_cast({notify, Notification},
#state{destination = undefined} = State) -> #state{destination = undefined} = State) ->
?DEBUG("No NOTIFY_SOCKET, dropping ~s notification", [Notification]), ?DEBUG("No NOTIFY_SOCKET, dropping ~s notification", [Notification]),
{noreply, State, get_timeout(State)}; {noreply, State};
handle_cast({notify, Notification}, State) -> handle_cast({notify, Notification}, State) ->
try notify(State, Notification) try notify(State, Notification)
catch _:Err -> catch _:Err ->
?ERROR_MSG("Cannot send ~s notification: ~p", [Notification, Err]) ?ERROR_MSG("Cannot send ~s notification: ~p", [Notification, Err])
end, end,
{noreply, State, get_timeout(State)}; {noreply, State};
handle_cast(Msg, State) -> handle_cast(Msg, State) ->
?ERROR_MSG("Got unexpected message: ~p", [Msg]), ?ERROR_MSG("Got unexpected message: ~p", [Msg]),
{noreply, State, get_timeout(State)}. {noreply, State}.
-spec handle_info(timeout | term(), state()) -spec handle_info(ping_watchdog | term(), state()) -> {noreply, state()}.
-> {noreply, state(), watchdog_timeout()}. handle_info(ping_watchdog , #state{interval = Interval} = State)
handle_info(timeout, #state{interval = Interval} = State)
when is_integer(Interval), Interval > 0 -> when is_integer(Interval), Interval > 0 ->
try notify(State, <<"WATCHDOG=1">>) try notify(State, <<"WATCHDOG=1">>)
catch _:Err -> catch _:Err ->
?ERROR_MSG("Cannot ping watchdog: ~p", [Err]) ?ERROR_MSG("Cannot ping watchdog: ~p", [Err])
end, end,
{noreply, set_last_ping(State), Interval}; {noreply, start_timer(State)};
handle_info(Info, State) -> handle_info(Info, State) ->
?ERROR_MSG("Got unexpected info: ~p", [Info]), ?ERROR_MSG("Got unexpected info: ~p", [Info]),
{noreply, State, get_timeout(State)}. {noreply, State}.
-spec terminate(normal | shutdown | {shutdown, term()} | term(), state()) -> ok. -spec terminate(normal | shutdown | {shutdown, term()} | term(), state()) -> ok.
terminate(Reason, #state{socket = undefined}) -> terminate(Reason, #state{socket = Socket} = State) ->
?DEBUG("Terminating ~s (~p)", [?MODULE, Reason]), ?DEBUG("Terminating ~s (~p)", [?MODULE, Reason]),
ok; cancel_timer(State),
terminate(Reason, #state{socket = Socket}) -> case Socket of
?DEBUG("Closing socket and terminating ~s (~p)", [?MODULE, Reason]), undefined ->
ok = gen_udp:close(Socket). ok;
_Socket ->
gen_udp:close(Socket)
end.
-spec code_change({down, term()} | term(), state(), term()) -> {ok, state()}. -spec code_change({down, term()} | term(), state(), term()) -> {ok, state()}.
code_change(_OldVsn, State, _Extra) -> code_change(_OldVsn, State, _Extra) ->
@ -166,24 +158,22 @@ get_watchdog_interval() ->
undefined undefined
end. end.
-spec get_timeout(state()) -> watchdog_timeout(). -spec maybe_start_timer(state()) -> state().
get_timeout(#state{interval = undefined}) -> maybe_start_timer(#state{interval = Interval} = State)
?DEBUG("Watchdog interval is undefined, hibernating", []), when is_integer(Interval), Interval > 0 ->
hibernate; ?INFO_MSG("Watchdog notifications enabled", []),
get_timeout(#state{interval = Interval, last_ping = LastPing}) -> start_timer(State);
case Interval - (erlang:monotonic_time(millisecond) - LastPing) of maybe_start_timer(State) ->
Timeout when Timeout > 0 -> ?INFO_MSG("Watchdog notifications disabled", []),
?DEBUG("Calculated new timeout value: ~B", [Timeout]), State.
Timeout;
_ ->
?DEBUG("Calculated new timeout value: 1", []),
1
end.
-spec set_last_ping(state()) -> state(). -spec start_timer(state()) -> state().
set_last_ping(State) -> start_timer(#state{interval = Interval} = State) ->
LastPing = erlang:monotonic_time(millisecond), State#state{timer = erlang:send_after(Interval, self(), ping_watchdog)}.
State#state{last_ping = LastPing}.
-spec cancel_timer(state()) -> ok.
cancel_timer(#state{timer = Timer}) ->
misc:cancel_timer(Timer).
-spec notify(state(), binary()) -> ok. -spec notify(state(), binary()) -> ok.
notify(#state{socket = Socket, destination = Destination}, notify(#state{socket = Socket, destination = Destination},