2007-04-12 07:31:53 +02:00
|
|
|
%%%-------------------------------------------------------------------
|
|
|
|
%%% File : ejabberd_system_monitor.erl
|
|
|
|
%%% Author : Alexey Shchepin <alexey@process-one.net>
|
|
|
|
%%% Description : Ejabberd watchdog
|
|
|
|
%%% Created : 21 Mar 2007 by Alexey Shchepin <alexey@process-one.net>
|
2007-12-24 12:41:41 +01:00
|
|
|
%%%
|
|
|
|
%%%
|
2011-02-14 13:47:22 +01:00
|
|
|
%%% ejabberd, Copyright (C) 2002-2011 ProcessOne
|
2007-12-24 12:41:41 +01:00
|
|
|
%%%
|
|
|
|
%%% This program is free software; you can redistribute it and/or
|
|
|
|
%%% modify it under the terms of the GNU General Public License as
|
|
|
|
%%% published by the Free Software Foundation; either version 2 of the
|
|
|
|
%%% License, or (at your option) any later version.
|
|
|
|
%%%
|
|
|
|
%%% This program is distributed in the hope that it will be useful,
|
|
|
|
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
%%% General Public License for more details.
|
2009-01-19 15:47:33 +01:00
|
|
|
%%%
|
2007-12-24 12:41:41 +01:00
|
|
|
%%% You should have received a copy of the GNU General Public License
|
|
|
|
%%% along with this program; if not, write to the Free Software
|
|
|
|
%%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
|
|
%%% 02111-1307 USA
|
|
|
|
%%%
|
2007-04-12 07:31:53 +02:00
|
|
|
%%%-------------------------------------------------------------------
|
|
|
|
|
|
|
|
-module(ejabberd_system_monitor).
|
|
|
|
-author('alexey@process-one.net').
|
|
|
|
|
|
|
|
-behaviour(gen_server).
|
|
|
|
|
|
|
|
%% API
|
|
|
|
-export([start_link/0,
|
|
|
|
process_command/3,
|
|
|
|
process_remote_command/1]).
|
|
|
|
|
|
|
|
%% gen_server callbacks
|
|
|
|
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
|
|
|
|
terminate/2, code_change/3]).
|
|
|
|
|
2008-10-10 17:23:58 +02:00
|
|
|
-include_lib("exmpp/include/exmpp.hrl").
|
|
|
|
|
2007-04-12 07:31:53 +02:00
|
|
|
-include("ejabberd.hrl").
|
|
|
|
|
|
|
|
-record(state, {}).
|
|
|
|
|
|
|
|
%%====================================================================
|
|
|
|
%% API
|
|
|
|
%%====================================================================
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%% Function: start_link() -> {ok,Pid} | ignore | {error,Error}
|
|
|
|
%% Description: Starts the server
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
start_link() ->
|
2009-03-03 19:45:26 +01:00
|
|
|
LH = case ejabberd_config:get_local_option(watchdog_large_heap) of
|
|
|
|
I when is_integer(I) -> I;
|
|
|
|
_ -> 1000000
|
|
|
|
end,
|
|
|
|
Opts = [{large_heap, LH}],
|
|
|
|
gen_server:start_link({local, ?MODULE}, ?MODULE, Opts, []).
|
2007-04-12 07:31:53 +02:00
|
|
|
|
|
|
|
process_command(From, To, Packet) ->
|
2009-06-01 18:42:07 +02:00
|
|
|
case {exmpp_jid:prep_node(To), exmpp_jid:prep_resource(To) } of
|
2009-01-03 16:15:38 +01:00
|
|
|
{undefined, <<"watchdog">>} ->
|
2008-10-10 17:23:58 +02:00
|
|
|
case Packet#xmlel.name of
|
|
|
|
'message' ->
|
2009-01-03 16:15:38 +01:00
|
|
|
case lists:any(fun(J) ->
|
2009-06-01 18:49:00 +02:00
|
|
|
exmpp_jid:compare(J,From)
|
2009-01-03 16:15:38 +01:00
|
|
|
end, get_admin_jids()) of
|
2007-04-12 07:31:53 +02:00
|
|
|
true ->
|
2008-10-10 17:23:58 +02:00
|
|
|
Body = exmpp_xml:get_path(
|
|
|
|
Packet, [{element, 'body'}, cdata_as_list]),
|
2007-04-12 07:31:53 +02:00
|
|
|
spawn(fun() ->
|
|
|
|
process_flag(priority, high),
|
|
|
|
process_command1(From, To, Body)
|
|
|
|
end),
|
|
|
|
stop;
|
|
|
|
false ->
|
|
|
|
ok
|
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
ok
|
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
ok
|
|
|
|
end.
|
|
|
|
|
|
|
|
%%====================================================================
|
|
|
|
%% gen_server callbacks
|
|
|
|
%%====================================================================
|
|
|
|
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%% Function: init(Args) -> {ok, State} |
|
|
|
|
%% {ok, State, Timeout} |
|
|
|
|
%% ignore |
|
|
|
|
%% {stop, Reason}
|
|
|
|
%% Description: Initiates the server
|
|
|
|
%%--------------------------------------------------------------------
|
2009-03-03 19:45:26 +01:00
|
|
|
init(Opts) ->
|
|
|
|
LH = proplists:get_value(large_heap, Opts),
|
2007-04-12 07:31:53 +02:00
|
|
|
process_flag(priority, high),
|
2009-03-03 19:45:26 +01:00
|
|
|
erlang:system_monitor(self(), [{large_heap, LH}]),
|
2007-04-12 07:31:53 +02:00
|
|
|
lists:foreach(
|
|
|
|
fun(Host) ->
|
2009-01-03 16:15:38 +01:00
|
|
|
ejabberd_hooks:add(local_send_to_resource_hook,
|
|
|
|
list_to_binary(Host),
|
2007-04-12 07:31:53 +02:00
|
|
|
?MODULE, process_command, 50)
|
|
|
|
end, ?MYHOSTS),
|
|
|
|
{ok, #state{}}.
|
|
|
|
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%% Function: %% handle_call(Request, From, State) -> {reply, Reply, State} |
|
|
|
|
%% {reply, Reply, State, Timeout} |
|
|
|
|
%% {noreply, State} |
|
|
|
|
%% {noreply, State, Timeout} |
|
|
|
|
%% {stop, Reason, Reply, State} |
|
|
|
|
%% {stop, Reason, State}
|
|
|
|
%% Description: Handling call messages
|
|
|
|
%%--------------------------------------------------------------------
|
2009-03-03 19:45:26 +01:00
|
|
|
handle_call({get, large_heap}, _From, State) ->
|
|
|
|
{reply, get_large_heap(), State};
|
|
|
|
handle_call({set, large_heap, NewValue}, _From, State) ->
|
|
|
|
MonSettings = erlang:system_monitor(self(), [{large_heap, NewValue}]),
|
|
|
|
OldLH = get_large_heap(MonSettings),
|
|
|
|
NewLH = get_large_heap(),
|
|
|
|
{reply, {lh_changed, OldLH, NewLH}, State};
|
2007-04-12 07:31:53 +02:00
|
|
|
handle_call(_Request, _From, State) ->
|
|
|
|
Reply = ok,
|
|
|
|
{reply, Reply, State}.
|
|
|
|
|
2009-03-03 19:45:26 +01:00
|
|
|
get_large_heap() ->
|
|
|
|
MonSettings = erlang:system_monitor(),
|
|
|
|
get_large_heap(MonSettings).
|
|
|
|
get_large_heap(MonSettings) ->
|
|
|
|
{_MonitorPid, Options} = MonSettings,
|
|
|
|
proplists:get_value(large_heap, Options).
|
|
|
|
|
2007-04-12 07:31:53 +02:00
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%% Function: handle_cast(Msg, State) -> {noreply, State} |
|
|
|
|
%% {noreply, State, Timeout} |
|
|
|
|
%% {stop, Reason, State}
|
|
|
|
%% Description: Handling cast messages
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
handle_cast(_Msg, State) ->
|
|
|
|
{noreply, State}.
|
|
|
|
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%% Function: handle_info(Info, State) -> {noreply, State} |
|
|
|
|
%% {noreply, State, Timeout} |
|
|
|
|
%% {stop, Reason, State}
|
|
|
|
%% Description: Handling all non call/cast messages
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
handle_info({monitor, Pid, large_heap, Info}, State) ->
|
|
|
|
spawn(fun() ->
|
|
|
|
process_flag(priority, high),
|
|
|
|
process_large_heap(Pid, Info)
|
|
|
|
end),
|
|
|
|
{noreply, State};
|
|
|
|
handle_info(_Info, State) ->
|
|
|
|
{noreply, State}.
|
|
|
|
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%% Function: terminate(Reason, State) -> void()
|
|
|
|
%% Description: This function is called by a gen_server when it is about to
|
|
|
|
%% terminate. It should be the opposite of Module:init/1 and do any necessary
|
|
|
|
%% cleaning up. When it returns, the gen_server terminates with Reason.
|
|
|
|
%% The return value is ignored.
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
terminate(_Reason, _State) ->
|
|
|
|
ok.
|
|
|
|
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState}
|
|
|
|
%% Description: Convert process state when code is changed
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
code_change(_OldVsn, State, _Extra) ->
|
|
|
|
{ok, State}.
|
|
|
|
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
%%% Internal functions
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
|
|
|
|
process_large_heap(Pid, Info) ->
|
|
|
|
Host = ?MYNAME,
|
|
|
|
case ejabberd_config:get_local_option(watchdog_admins) of
|
|
|
|
JIDs when is_list(JIDs),
|
|
|
|
JIDs /= [] ->
|
|
|
|
DetailedInfo = detailed_info(Pid),
|
|
|
|
Body = io_lib:format(
|
2009-03-03 19:45:26 +01:00
|
|
|
"(~w) The process ~w is consuming too much memory:~n~p~n"
|
2007-04-12 07:31:53 +02:00
|
|
|
"~s",
|
|
|
|
[node(), Pid, Info, DetailedInfo]),
|
2009-06-01 18:26:00 +02:00
|
|
|
From = exmpp_jid:make(undefined, Host, <<"watchdog">>),
|
2007-04-12 07:31:53 +02:00
|
|
|
lists:foreach(
|
|
|
|
fun(S) ->
|
2008-10-10 17:23:58 +02:00
|
|
|
try
|
2009-06-01 18:35:55 +02:00
|
|
|
JID = exmpp_jid:parse(S),
|
2008-10-10 17:23:58 +02:00
|
|
|
send_message(From, JID, Body)
|
|
|
|
catch
|
|
|
|
_ ->
|
|
|
|
ok
|
2007-04-12 07:31:53 +02:00
|
|
|
end
|
|
|
|
end, JIDs);
|
|
|
|
_ ->
|
|
|
|
ok
|
|
|
|
end.
|
|
|
|
|
|
|
|
send_message(From, To, Body) ->
|
|
|
|
ejabberd_router:route(
|
|
|
|
From, To,
|
2008-10-10 17:23:58 +02:00
|
|
|
exmpp_message:chat(lists:flatten(Body))).
|
2007-04-12 07:31:53 +02:00
|
|
|
|
|
|
|
get_admin_jids() ->
|
|
|
|
case ejabberd_config:get_local_option(watchdog_admins) of
|
|
|
|
JIDs when is_list(JIDs) ->
|
|
|
|
lists:flatmap(
|
|
|
|
fun(S) ->
|
2008-10-10 17:23:58 +02:00
|
|
|
try
|
2009-06-01 18:35:55 +02:00
|
|
|
JID = exmpp_jid:parse(S),
|
2009-06-01 18:39:36 +02:00
|
|
|
[{exmpp_jid:prep_node(JID),
|
2009-06-01 18:37:15 +02:00
|
|
|
exmpp_jid:prep_domain(JID),
|
2009-06-01 18:42:07 +02:00
|
|
|
exmpp_jid:prep_resource(JID)}]
|
2008-10-10 17:23:58 +02:00
|
|
|
catch
|
|
|
|
_ ->
|
|
|
|
[]
|
2007-04-12 07:31:53 +02:00
|
|
|
end
|
|
|
|
end, JIDs);
|
|
|
|
_ ->
|
|
|
|
[]
|
|
|
|
end.
|
|
|
|
|
|
|
|
detailed_info(Pid) ->
|
|
|
|
case process_info(Pid, dictionary) of
|
|
|
|
{dictionary, Dict} ->
|
|
|
|
case lists:keysearch('$ancestors', 1, Dict) of
|
|
|
|
{value, {'$ancestors', [Sup | _]}} ->
|
|
|
|
case Sup of
|
|
|
|
ejabberd_c2s_sup ->
|
|
|
|
c2s_info(Pid);
|
|
|
|
ejabberd_s2s_out_sup ->
|
|
|
|
s2s_out_info(Pid);
|
|
|
|
ejabberd_service_sup ->
|
|
|
|
service_info(Pid);
|
|
|
|
_ ->
|
|
|
|
detailed_info1(Pid)
|
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
detailed_info1(Pid)
|
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
detailed_info1(Pid)
|
|
|
|
end.
|
|
|
|
|
|
|
|
detailed_info1(Pid) ->
|
|
|
|
io_lib:format(
|
|
|
|
"~p", [[process_info(Pid, current_function),
|
|
|
|
process_info(Pid, initial_call),
|
|
|
|
process_info(Pid, message_queue_len),
|
|
|
|
process_info(Pid, links),
|
|
|
|
process_info(Pid, dictionary),
|
|
|
|
process_info(Pid, heap_size),
|
|
|
|
process_info(Pid, stack_size)
|
|
|
|
]]).
|
|
|
|
|
|
|
|
c2s_info(Pid) ->
|
|
|
|
["Process type: c2s",
|
|
|
|
check_send_queue(Pid),
|
|
|
|
"\n",
|
|
|
|
io_lib:format("Command to kill this process: kill ~s ~w",
|
|
|
|
[atom_to_list(node()), Pid])].
|
|
|
|
|
|
|
|
s2s_out_info(Pid) ->
|
|
|
|
FromTo = mnesia:dirty_select(
|
|
|
|
s2s, [{{s2s, '$1', Pid, '_'}, [], ['$1']}]),
|
|
|
|
["Process type: s2s_out",
|
|
|
|
case FromTo of
|
|
|
|
[{From, To}] ->
|
|
|
|
"\n" ++ io_lib:format("S2S connection: from ~s to ~s",
|
|
|
|
[From, To]);
|
|
|
|
_ ->
|
|
|
|
""
|
|
|
|
end,
|
|
|
|
check_send_queue(Pid),
|
|
|
|
"\n",
|
|
|
|
io_lib:format("Command to kill this process: kill ~s ~w",
|
|
|
|
[atom_to_list(node()), Pid])].
|
|
|
|
|
|
|
|
service_info(Pid) ->
|
|
|
|
Routes = mnesia:dirty_select(
|
|
|
|
route, [{{route, '$1', Pid, '_'}, [], ['$1']}]),
|
|
|
|
["Process type: s2s_out",
|
|
|
|
case Routes of
|
|
|
|
[Route] ->
|
|
|
|
"\nServiced domain: " ++ Route;
|
|
|
|
_ ->
|
|
|
|
""
|
|
|
|
end,
|
|
|
|
check_send_queue(Pid),
|
|
|
|
"\n",
|
|
|
|
io_lib:format("Command to kill this process: kill ~s ~w",
|
|
|
|
[atom_to_list(node()), Pid])].
|
|
|
|
|
|
|
|
check_send_queue(Pid) ->
|
|
|
|
case {process_info(Pid, current_function),
|
|
|
|
process_info(Pid, message_queue_len)} of
|
|
|
|
{{current_function, MFA}, {message_queue_len, MLen}} ->
|
|
|
|
if
|
|
|
|
MLen > 100 ->
|
|
|
|
case MFA of
|
|
|
|
{prim_inet, send, 2} ->
|
|
|
|
"\nPossible reason: the process is blocked "
|
|
|
|
"trying to send data over its TCP connection.";
|
|
|
|
{M, F, A} ->
|
|
|
|
["\nPossible reason: the process can't process "
|
|
|
|
"messages faster than they arrive. ",
|
|
|
|
io_lib:format("Current function is ~w:~w/~w",
|
|
|
|
[M, F, A])
|
|
|
|
]
|
|
|
|
end;
|
|
|
|
true ->
|
|
|
|
""
|
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
""
|
|
|
|
end.
|
|
|
|
|
|
|
|
process_command1(From, To, Body) ->
|
|
|
|
process_command2(string:tokens(Body, " "), From, To).
|
|
|
|
|
|
|
|
process_command2(["kill", SNode, SPid], From, To) ->
|
|
|
|
Node = list_to_atom(SNode),
|
|
|
|
remote_command(Node, [kill, SPid], From, To);
|
2009-03-03 19:45:26 +01:00
|
|
|
process_command2(["showlh", SNode], From, To) ->
|
|
|
|
Node = list_to_atom(SNode),
|
|
|
|
remote_command(Node, [showlh], From, To);
|
|
|
|
process_command2(["setlh", SNode, NewValueString], From, To) ->
|
|
|
|
Node = list_to_atom(SNode),
|
|
|
|
NewValue = list_to_integer(NewValueString),
|
|
|
|
remote_command(Node, [setlh, NewValue], From, To);
|
2007-04-12 07:31:53 +02:00
|
|
|
process_command2(["help"], From, To) ->
|
|
|
|
send_message(To, From, help());
|
|
|
|
process_command2(_, From, To) ->
|
|
|
|
send_message(To, From, help()).
|
|
|
|
|
2009-03-03 19:45:26 +01:00
|
|
|
|
2007-04-12 07:31:53 +02:00
|
|
|
help() ->
|
|
|
|
"Commands:\n"
|
2009-03-03 19:45:26 +01:00
|
|
|
" kill <node> <pid>\n"
|
|
|
|
" showlh <node>\n"
|
|
|
|
" setlh <node> <integer>".
|
|
|
|
|
2007-04-12 07:31:53 +02:00
|
|
|
|
|
|
|
remote_command(Node, Args, From, To) ->
|
|
|
|
Message =
|
|
|
|
case rpc:call(Node, ?MODULE, process_remote_command, [Args]) of
|
|
|
|
{badrpc, Reason} ->
|
|
|
|
io_lib:format("Command failed:~n~p", [Reason]);
|
|
|
|
Result ->
|
|
|
|
Result
|
|
|
|
end,
|
|
|
|
send_message(To, From, Message).
|
|
|
|
|
|
|
|
process_remote_command([kill, SPid]) ->
|
|
|
|
exit(list_to_pid(SPid), kill),
|
|
|
|
"ok";
|
2009-03-03 19:45:26 +01:00
|
|
|
process_remote_command([showlh]) ->
|
|
|
|
Res = gen_server:call(ejabberd_system_monitor, {get, large_heap}),
|
|
|
|
io_lib:format("Current large heap: ~p", [Res]);
|
|
|
|
process_remote_command([setlh, NewValue]) ->
|
|
|
|
{lh_changed, OldLH, NewLH} = gen_server:call(ejabberd_system_monitor, {set, large_heap, NewValue}),
|
|
|
|
io_lib:format("Result of set large heap: ~p --> ~p", [OldLH, NewLH]);
|
2007-04-12 07:31:53 +02:00
|
|
|
process_remote_command(_) ->
|
|
|
|
throw(unknown_command).
|
|
|
|
|