25
1
mirror of https://github.com/processone/ejabberd.git synced 2024-11-20 16:15:59 +01:00

Merge branch 'cluster_fix'

This commit is contained in:
Christophe Romain 2015-11-20 14:52:36 +01:00
commit 4480749a52
7 changed files with 83 additions and 277 deletions

View File

@ -183,8 +183,6 @@ install: all copy-files
# Binary C programs # Binary C programs
$(INSTALL) -d $(PBINDIR) $(INSTALL) -d $(PBINDIR)
$(INSTALL) -m 750 $(O_USER) tools/captcha.sh $(PBINDIR) $(INSTALL) -m 750 $(O_USER) tools/captcha.sh $(PBINDIR)
$(INSTALL) -m 750 $(O_USER) tools/joincluster $(PBINDIR)
$(INSTALL) -m 750 $(O_USER) tools/leavecluster $(PBINDIR)
# #
# Copy lite.sql # Copy lite.sql
[ -d deps/sqlite3 ] && $(INSTALL) -d $(SQLDIR) || true [ -d deps/sqlite3 ] && $(INSTALL) -d $(SQLDIR) || true

View File

@ -447,16 +447,6 @@ check_start()
} }
} }
# cluster setup
join_cluster()
{
$EXEC_CMD "$EJABBERD_BIN_PATH/joincluster $*"
}
leave_cluster()
{
$EXEC_CMD "$EJABBERD_BIN_PATH/leavecluster $*"
}
# allow sync calls # allow sync calls
wait_for_status() wait_for_status()
{ {
@ -489,7 +479,5 @@ case $ARGS in
' etop') etop;; ' etop') etop;;
' started') wait_for_status 0 30 2;; # wait 30x2s before timeout ' started') wait_for_status 0 30 2;; # wait 30x2s before timeout
' stopped') wait_for_status 3 15 2 && stop_epmd;; # wait 15x2s before timeout ' stopped') wait_for_status 3 15 2 && stop_epmd;; # wait 15x2s before timeout
' join_cluster'*) join_cluster ${ARGS# join_cluster};;
' leave_cluster'*) leave_cluster ${ARGS# leave_cluster};;
*) ctl $ARGS;; *) ctl $ARGS;;
esac esac

View File

@ -33,6 +33,8 @@
stop_kindly/2, send_service_message_all_mucs/2, stop_kindly/2, send_service_message_all_mucs/2,
registered_vhosts/0, registered_vhosts/0,
reload_config/0, reload_config/0,
%% Cluster
join_cluster/1, leave_cluster/1, list_cluster/0,
%% Erlang %% Erlang
update_list/0, update/1, update_list/0, update/1,
%% Accounts %% Accounts
@ -146,6 +148,22 @@ commands() ->
args = [], args = [],
result = {res, rescode}}, result = {res, rescode}},
#ejabberd_commands{name = join_cluster, tags = [cluster],
desc = "Join this node into the cluster handled by Node",
module = ?MODULE, function = join_cluster,
args = [{node, binary}],
result = {res, rescode}},
#ejabberd_commands{name = leave_cluster, tags = [cluster],
desc = "Remove node handled by Node from the cluster",
module = ?MODULE, function = leave_cluster,
args = [{node, binary}],
result = {res, rescode}},
#ejabberd_commands{name = list_cluster, tags = [cluster],
desc = "List nodes that are part of the cluster handled by Node",
module = ?MODULE, function = list_cluster,
args = [],
result = {nodes, {list, {node, atom}}}},
#ejabberd_commands{name = import_file, tags = [mnesia], #ejabberd_commands{name = import_file, tags = [mnesia],
desc = "Import user data from jabberd14 spool file", desc = "Import user data from jabberd14 spool file",
module = ?MODULE, function = import_file, module = ?MODULE, function = import_file,
@ -373,6 +391,19 @@ reload_config() ->
acl:start(), acl:start(),
shaper:start(). shaper:start().
%%%
%%% Cluster management
%%%
join_cluster(NodeBin) ->
ejabberd_cluster:join(list_to_atom(binary_to_list(NodeBin))).
leave_cluster(NodeBin) ->
ejabberd_cluster:leave(list_to_atom(binary_to_list(NodeBin))).
list_cluster() ->
ejabberd_cluster:get_nodes().
%%% %%%
%%% Migration management %%% Migration management
%%% %%%

View File

@ -27,6 +27,7 @@
%% API %% API
-export([get_nodes/0, call/4, multicall/3, multicall/4]). -export([get_nodes/0, call/4, multicall/3, multicall/4]).
-export([join/1, leave/1]).
-include("ejabberd.hrl"). -include("ejabberd.hrl").
-include("logger.hrl"). -include("logger.hrl").
@ -51,3 +52,53 @@ multicall(Module, Function, Args) ->
multicall(Nodes, Module, Function, Args) -> multicall(Nodes, Module, Function, Args) ->
rpc:multicall(Nodes, Module, Function, Args, 5000). rpc:multicall(Nodes, Module, Function, Args, 5000).
-spec join(node()) -> ok | {error, any()}.
join(Node) ->
case {node(), net_adm:ping(Node)} of
{Node, _} ->
{error, {not_master, Node}};
{_, pong} ->
application:stop(ejabberd),
application:stop(mnesia),
mnesia:delete_schema([node()]),
application:start(mnesia),
mnesia:change_config(extra_db_nodes, [Node]),
mnesia:change_table_copy_type(schema, node(), disc_copies),
spawn(fun() ->
lists:foreach(fun(Table) ->
Type = call(Node, mnesia, table_info, [Table, storage_type]),
mnesia:add_table_copy(Table, node(), Type)
end, mnesia:system_info(tables)--[schema])
end),
application:start(ejabberd);
_ ->
{error, {no_ping, Node}}
end.
-spec leave(node()) -> ok | {error, any()}.
leave(Node) ->
case {node(), net_adm:ping(Node)} of
{Node, _} ->
Cluster = get_nodes()--[Node],
leave(Cluster, Node);
{_, pong} ->
rpc:call(Node, ?MODULE, leave, [Node], 10000);
{_, pang} ->
case mnesia:del_table_copy(schema, Node) of
{atomic, ok} -> ok;
{aborted, Reason} -> {error, Reason}
end
end.
leave([], Node) ->
{error, {no_cluster, Node}};
leave([Master|_], Node) ->
application:stop(ejabberd),
application:stop(mnesia),
call(Master, mnesia, del_table_copy, [schema, Node]),
spawn(fun() ->
mnesia:delete_schema([node()]),
erlang:halt(0)
end),
ok.

View File

@ -80,7 +80,7 @@ start() ->
end end
end, end,
Node = list_to_atom(SNode1), Node = list_to_atom(SNode1),
Status = case ejabberd_cluster:call(Node, ?MODULE, process, [Args]) of Status = case rpc:call(Node, ?MODULE, process, [Args], 60000) of
{badrpc, Reason} -> {badrpc, Reason} ->
print("Failed RPC connection to the node ~p: ~p~n", print("Failed RPC connection to the node ~p: ~p~n",
[Node, Reason]), [Node, Reason]),

View File

@ -1,153 +0,0 @@
#!/bin/sh
# Add the current ejabberd node in a cluster
# copyright (c) 2010-2015 ProcessOne
# Return Code:
# 0 : groovy baby
# 10 : ejabberdctl not found
# 11 : erl not found
# 12 : erlc not found
# 20 : database dir can not be created
# 21 : database dir not writable
# 22 : temporary dir can not be created
# 30 : network issue
# 31 : node names incompatibility
error()
{
echo "Error: $1" >&2
exit $2
}
[ -z $NO_WARNINGS ] && {
echo "--------------------------------------------------------------------"
echo ""
echo "ejabberd cluster configuration"
echo ""
echo "This ejabberd node will be configured for use in an ejabberd cluster."
echo "IMPORTANT: all local data from the database will be lost, and"
echo "cluster database will be initialized. All data from the master"
echo "node will be replicated to this one."
echo ""
echo "--------------------------------------------------------------------"
echo "Press any key to continue, or Ctrl+C to stop now"
read foo
echo ""
}
[ $# -eq 0 ] && {
echo "Make sure you have a running remote master ejabberd node"
echo "Before continuing, you must copy the ~/.erlang.cookie file from"
echo "remote master node and check ejabberd.cfg compatibility."
echo "e.g. hosts definition must match on all nodes"
echo ""
echo "The remote master node name is defined as ERLANG_NODE into"
echo "ejabberdctl.cfg on that remote node."
echo ""
echo -n "Remote master node name: "
read REMOTE
echo ""
} || {
echo "Using passed parameter for remote master node name: $1"
REMOTE=$1
}
PA=/tmp/clustersetup_$$
CTL=$(which ejabberdctl)
[ -x "$CTL" ] || {
HERE=`which "$0"`
BASE=`dirname $HERE`/..
ROOTDIR=`cd $BASE; pwd`
PATH=$ROOTDIR/bin:$PATH
PA=$ROOTDIR/clustersetup_$$
CTL=$(which ejabberdctl)
}
echo "Using commands:"
[ -x "$CTL" ] && echo $CTL || error "can't find ejabberdctl" 10
exec $CTL stop 2>/dev/null >/dev/null
ERLC=${ERL}c
[ -x $ERL ] && echo $ERL || error "can't find erl" 11
[ -x $ERLC ] && echo $ERLC || error "can't find erlc" 12
echo ""
NAME=-name
[ "$ERLANG_NODE" = "${ERLANG_NODE%.*}" ] && NAME=-sname
CLUSTERSETUP=clustersetup
CLUSTERSETUP_ERL=$PA/$CLUSTERSETUP.erl
REMOTENAME=-name
[ "$REMOTE" = "${REMOTE%.*}" ] && REMOTENAME=-sname
[ "$REMOTENAME" = "$NAME" ] || {
echo "IMPORTANT!: node names are incompatible"
echo "Remote node name is $REMOTE"
echo "Local node name is $ERLANG_NODE"
echo ""
echo "Both node names must be short or fqdn names."
echo "Using short and fqdn names is impossible."
echo ""
error "incompatible node names" 31
}
set -o errexit
set -o nounset
[ -d $SPOOL_DIR ] && rm -Rf $SPOOL_DIR/* || mkdir -p $SPOOL_DIR || error "$SPOOL_DIR cannot be created" 20
[ -w $SPOOL_DIR ] || error "$SPOOL_DIR directory is not writable" 21
mkdir -p $PA || error "$PA cannot be created" 22
cd $PA
cat <<EOF > $CLUSTERSETUP_ERL
-module($CLUSTERSETUP).
-export([start/0]).
set_table_copy(Table, _Node, {badrpc, Reason}) ->
io:format("Error: cannot get storage type for table ~p on node $REMOTE:~n ~p~n",[Table, Reason]);
set_table_copy(Table, Node, Type) ->
io:format("setting table ~p to mode ~p~n",[Table, Type]),
case mnesia:add_table_copy(Table, Node, Type) of
{aborted, _} ->
mnesia:change_table_copy_type(Table, Node, Type);
_ ->
ok
end.
set_tables({badrpc, Reason}) ->
io:format("ERROR: cannot get tables list on $REMOTE : ~p~n",[Reason]);
set_tables([]) ->
ok;
set_tables([schema | Tables]) ->
set_tables(Tables);
set_tables([s2s | Tables]) ->
set_tables(Tables);
set_tables([session | Tables]) ->
set_tables(Tables);
set_tables([Table | Tables]) ->
set_table_copy(Table, node(),
rpc:call('$REMOTE', mnesia, table_info, [Table, storage_type])),
set_tables(Tables).
start() ->
io:format("~n",[]),
R = case net_adm:ping('$REMOTE') of
pong ->
set_table_copy(schema, node(), disc_copies),
set_tables(rpc:call('$REMOTE', mnesia, system_info, [tables])),
0;
pang ->
io:format("node ~p is not reachable, please check epmd port, and FIREWALL_WINDOW ports~n", ['$REMOTE']),
1
end,
halt(R).
EOF
$ERLC -o $PA $CLUSTERSETUP_ERL
$ERL $NAME $ERLANG_NODE -pa $PA $KERNEL_OPTS -mnesia extra_db_nodes "['$REMOTE']" dir "\"$SPOOL_DIR\"" -s mnesia -s $CLUSTERSETUP start
cd -
rm -Rf $PA
echo "End."
echo "Check that there is no error in the above messages."

View File

@ -1,109 +0,0 @@
#!/bin/sh
# Remove the current ejabberd node in a cluster
# copyright (c) 2010-2015 ProcessOne
# Return Code:
# 0 : groovy baby
# 10 : ejabberdctl not found
# 11 : erl not found
# 12 : erlc not found
# 22 : temporary dir can not be created
error()
{
echo "Error: $1" >&2
exit $2
}
[ -z $NO_WARNINGS ] && {
echo "--------------------------------------------------------------------"
echo ""
echo "ejabberd cluster configuration"
echo ""
echo "This ejabberd node will be removed from the cluster."
echo "IMPORTANT: this node will be stopped. At least one other clustered"
echo "node must be running."
echo ""
echo "--------------------------------------------------------------------"
echo "Press any key to continue, or Ctrl+C to stop now"
read foo
echo ""
}
PA=/tmp/clustersetup_$$
CTL=$(which ejabberdctl)
[ -x "$CTL" ] || {
HERE=`which "$0"`
BASE=`dirname $HERE`/..
ROOTDIR=`cd $BASE; pwd`
PATH=$ROOTDIR/bin:$PATH
PA=$ROOTDIR/clustersetup_$$
CTL=$(which ejabberdctl)
}
echo "Using commands:"
[ -x "$CTL" ] && echo $CTL || error "can't find ejabberdctl" 10
exec $CTL stop 2>/dev/null >/dev/null
ERLC=${ERL}c
[ -x $ERL ] && echo $ERL || error "can't find erl" 11
[ -x $ERLC ] && echo $ERLC || error "can't find erlc" 12
echo ""
$CTL stopped
CLUSTERSETUP=clustersetup
CLUSTERSETUP_ERL=$PA/$CLUSTERSETUP.erl
set -o errexit
set -o nounset
mkdir -p $PA || error "$PA cannot be created" 22
cd $PA
cat <<EOF > $CLUSTERSETUP_ERL
-module($CLUSTERSETUP).
-export([start/0]).
del_table_copy(Table, Node) ->
case mnesia:del_table_copy(Table, Node) of
{aborted, Reason} -> io:format("Error: can not remove ~p table: ~p~n", [Table, Reason]);
_ -> io:format("table ~p removed from cluster~n", [Table])
end.
del_tables([],_) ->
ok;
del_tables([schema | Tables], Node) ->
del_tables(Tables, Node);
del_tables([Table | Tables], Node) ->
del_table_copy(Table, Node),
del_tables(Tables, Node).
start() ->
io:format("~n",[]),
Removed = node(),
case mnesia:system_info(running_db_nodes)--[Removed] of
[] -> io:format("Error: no other node running in the cluster~n");
Nodes ->
del_tables(mnesia:system_info(local_tables), Removed),
mnesia:stop(),
case rpc:call(hd(Nodes), mnesia, del_table_copy, [schema, Removed]) of
{badrpc,Reason} -> io:format("Error: can not unregister node ~p from cluster: ~p~n", [Removed, Reason]);
{aborted,Reason} -> io:format("Error: can not unregister node ~p from cluster: ~p~n", [Removed, Reason]);
{atomic, ok} ->
mnesia:delete_schema([Removed]),
io:format("node ~p removed from cluster~n", [Removed])
end
end,
halt(0).
EOF
$ERLC -o $PA $CLUSTERSETUP_ERL
$ERL $NAME $ERLANG_NODE -pa $PA $KERNEL_OPTS -mnesia dir "\"$SPOOL_DIR\"" -s mnesia -s $CLUSTERSETUP start
cd -
rm -Rf $PA
echo "End."
echo "Check that there is no error in the above messages."