diff --git a/ChangeLog b/ChangeLog index 57e20b244..bb69000da 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2007-04-09 Alexey Shchepin + + * src/ejabberd_sm.erl: Minor optimisation + + * src/ejabberd_system_monitor.erl: Experimental watchdog + * src/ejabberd_sup.erl: Likewise + * src/ejabberd_config.erl: Likewise + 2007-03-22 Mickael Remond * src/guide.tex: Fixed typo. @@ -95,7 +103,7 @@ * src/cyrsasl_plain.erl: bad-auth error code replaced by not-authorized (EJAB-187). - + * src/aclocal.m4: configure --with-erlang option is now working (Thanks to Jerome Sautret) (EJAB-186). diff --git a/src/ejabberd_config.erl b/src/ejabberd_config.erl index 199f374f3..35b9212a9 100644 --- a/src/ejabberd_config.erl +++ b/src/ejabberd_config.erl @@ -123,6 +123,8 @@ process_term(Term, State) -> add_option({domain_balancing, Domain}, Balancing, State); {domain_balancing_component_number, Domain, N} -> add_option({domain_balancing_component_number, Domain}, N, State); + {watchdog_admins, Admins} -> + add_option(watchdog_admins, Admins, State); {loglevel, Loglevel} -> ejabberd_loglevel:set(Loglevel), State; diff --git a/src/ejabberd_sm.erl b/src/ejabberd_sm.erl index c320b4bef..4b67ef719 100644 --- a/src/ejabberd_sm.erl +++ b/src/ejabberd_sm.erl @@ -429,6 +429,10 @@ route_message(From, To, Packet) -> case xml:get_tag_attr_s("type", Packet) of "error" -> ok; + "groupchat" -> + bounce_offline_message(From, To, Packet); + "headline" -> + bounce_offline_message(From, To, Packet); _ -> case ejabberd_auth:is_user_exists(LUser, LServer) of true -> diff --git a/src/ejabberd_sup.erl b/src/ejabberd_sup.erl index e093b6a2b..af2c16f8f 100644 --- a/src/ejabberd_sup.erl +++ b/src/ejabberd_sup.erl @@ -40,6 +40,13 @@ init([]) -> brutal_kill, worker, [ejabberd_node_groups]}, + SystemMonitor = + {ejabberd_system_monitor, + {ejabberd_system_monitor, start_link, []}, + permanent, + brutal_kill, + worker, + [ejabberd_system_monitor]}, Router = {ejabberd_router, {ejabberd_router, start_link, []}, @@ -150,6 +157,7 @@ init([]) -> [Hooks, StringPrep, NodeGroups, + SystemMonitor, Router, SM, S2S, diff --git a/src/ejabberd_system_monitor.erl b/src/ejabberd_system_monitor.erl new file mode 100644 index 000000000..3192b7763 --- /dev/null +++ b/src/ejabberd_system_monitor.erl @@ -0,0 +1,313 @@ +%%%------------------------------------------------------------------- +%%% File : ejabberd_system_monitor.erl +%%% Author : Alexey Shchepin +%%% Description : Ejabberd watchdog +%%% Created : 21 Mar 2007 by Alexey Shchepin +%%% Id : $Id$ +%%%------------------------------------------------------------------- + +-module(ejabberd_system_monitor). +-author('alexey@process-one.net'). +-vsn('$Revision$ '). + +-behaviour(gen_server). + +%% API +-export([start_link/0, + process_command/3, + process_remote_command/1]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-include("ejabberd.hrl"). +-include("jlib.hrl"). + +-record(state, {}). + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link() -> {ok,Pid} | ignore | {error,Error} +%% Description: Starts the server +%%-------------------------------------------------------------------- +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +process_command(From, To, Packet) -> + case To of + #jid{luser = "", lresource = "watchdog"} -> + {xmlelement, Name, _Attrs, _Els} = Packet, + case Name of + "message" -> + LFrom = jlib:jid_tolower(jlib:jid_remove_resource(From)), + case lists:member(LFrom, get_admin_jids()) of + true -> + Body = xml:get_path_s( + Packet, [{elem, "body"}, cdata]), + spawn(fun() -> + process_flag(priority, high), + process_command1(From, To, Body) + end), + stop; + false -> + ok + end; + _ -> + ok + end; + _ -> + ok + end. + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% Description: Initiates the server +%%-------------------------------------------------------------------- +init([]) -> + process_flag(priority, high), + erlang:system_monitor(self(), [{large_heap, 1000000}]), + lists:foreach( + fun(Host) -> + ejabberd_hooks:add(local_send_to_resource_hook, Host, + ?MODULE, process_command, 50) + end, ?MYHOSTS), + {ok, #state{}}. + +%%-------------------------------------------------------------------- +%% Function: %% handle_call(Request, From, State) -> {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% Description: Handling call messages +%%-------------------------------------------------------------------- +handle_call(_Request, _From, State) -> + Reply = ok, + {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% Description: Handling cast messages +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% Description: Handling all non call/cast messages +%%-------------------------------------------------------------------- +handle_info({monitor, Pid, large_heap, Info}, State) -> + spawn(fun() -> + process_flag(priority, high), + process_large_heap(Pid, Info) + end), + {noreply, State}; +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate(Reason, State) -> void() +%% Description: This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState} +%% Description: Convert process state when code is changed +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +process_large_heap(Pid, Info) -> + Host = ?MYNAME, + case ejabberd_config:get_local_option(watchdog_admins) of + JIDs when is_list(JIDs), + JIDs /= [] -> + DetailedInfo = detailed_info(Pid), + Body = io_lib:format( + "(~w) The process ~w is consuming too much memory: ~w.~n" + "~s", + [node(), Pid, Info, DetailedInfo]), + From = jlib:make_jid("", Host, "watchdog"), + lists:foreach( + fun(S) -> + case jlib:string_to_jid(S) of + error -> ok; + JID -> + send_message(From, JID, Body) + end + end, JIDs); + _ -> + ok + end. + +send_message(From, To, Body) -> + ejabberd_router:route( + From, To, + {xmlelement, "message", [{"type", "chat"}], + [{xmlelement, "body", [], + [{xmlcdata, lists:flatten(Body)}]}]}). + +get_admin_jids() -> + case ejabberd_config:get_local_option(watchdog_admins) of + JIDs when is_list(JIDs) -> + lists:flatmap( + fun(S) -> + case jlib:string_to_jid(S) of + error -> []; + JID -> [jlib:jid_tolower(JID)] + end + end, JIDs); + _ -> + [] + end. + +detailed_info(Pid) -> + case process_info(Pid, dictionary) of + {dictionary, Dict} -> + case lists:keysearch('$ancestors', 1, Dict) of + {value, {'$ancestors', [Sup | _]}} -> + case Sup of + ejabberd_c2s_sup -> + c2s_info(Pid); + ejabberd_s2s_out_sup -> + s2s_out_info(Pid); + ejabberd_service_sup -> + service_info(Pid); + _ -> + detailed_info1(Pid) + end; + _ -> + detailed_info1(Pid) + end; + _ -> + detailed_info1(Pid) + end. + +detailed_info1(Pid) -> + io_lib:format( + "~p", [[process_info(Pid, current_function), + process_info(Pid, initial_call), + process_info(Pid, message_queue_len), + process_info(Pid, links), + process_info(Pid, dictionary), + process_info(Pid, heap_size), + process_info(Pid, stack_size) + ]]). + +c2s_info(Pid) -> + ["Process type: c2s", + check_send_queue(Pid), + "\n", + io_lib:format("Command to kill this process: kill ~s ~w", + [atom_to_list(node()), Pid])]. + +s2s_out_info(Pid) -> + FromTo = mnesia:dirty_select( + s2s, [{{s2s, '$1', Pid, '_'}, [], ['$1']}]), + ["Process type: s2s_out", + case FromTo of + [{From, To}] -> + "\n" ++ io_lib:format("S2S connection: from ~s to ~s", + [From, To]); + _ -> + "" + end, + check_send_queue(Pid), + "\n", + io_lib:format("Command to kill this process: kill ~s ~w", + [atom_to_list(node()), Pid])]. + +service_info(Pid) -> + Routes = mnesia:dirty_select( + route, [{{route, '$1', Pid, '_'}, [], ['$1']}]), + ["Process type: s2s_out", + case Routes of + [Route] -> + "\nServiced domain: " ++ Route; + _ -> + "" + end, + check_send_queue(Pid), + "\n", + io_lib:format("Command to kill this process: kill ~s ~w", + [atom_to_list(node()), Pid])]. + +check_send_queue(Pid) -> + case {process_info(Pid, current_function), + process_info(Pid, message_queue_len)} of + {{current_function, MFA}, {message_queue_len, MLen}} -> + if + MLen > 100 -> + case MFA of + {prim_inet, send, 2} -> + "\nPossible reason: the process is blocked " + "trying to send data over its TCP connection."; + {M, F, A} -> + ["\nPossible reason: the process can't process " + "messages faster than they arrive. ", + io_lib:format("Current function is ~w:~w/~w", + [M, F, A]) + ] + end; + true -> + "" + end; + _ -> + "" + end. + +process_command1(From, To, Body) -> + process_command2(string:tokens(Body, " "), From, To). + +process_command2(["kill", SNode, SPid], From, To) -> + Node = list_to_atom(SNode), + remote_command(Node, [kill, SPid], From, To); +process_command2(["help"], From, To) -> + send_message(To, From, help()); +process_command2(_, From, To) -> + send_message(To, From, help()). + +help() -> + "Commands:\n" + " kill ". + +remote_command(Node, Args, From, To) -> + Message = + case rpc:call(Node, ?MODULE, process_remote_command, [Args]) of + {badrpc, Reason} -> + io_lib:format("Command failed:~n~p", [Reason]); + Result -> + Result + end, + send_message(To, From, Message). + +process_remote_command([kill, SPid]) -> + exit(list_to_pid(SPid), kill), + "ok"; +process_remote_command(_) -> + throw(unknown_command). +