From 7ef85dddea8f58cea86ae19b16ddf62cc99f8caf Mon Sep 17 00:00:00 2001 From: Eric Cestari Date: Fri, 7 Jan 2011 15:12:51 +0100 Subject: [PATCH] Refactoring of ejabberd_http_fileserver.erl - not a gen_server anymore. Should be way faster now (no more message passing between processes) - configuration stored in mochiglobal - support for etag - support for gzip compression: - static (if a foo.gz is in the same dir as requested foo, it will be served) - always (will always gzip, will use static is available) - false, don't gzip - logfile is now in another module. --- src/mochiglobal.erl | 107 +++++++ src/web/mod_http_fileserver.erl | 414 ++++++++++------------------ src/web/mod_http_fileserver_log.erl | 167 +++++++++++ 3 files changed, 424 insertions(+), 264 deletions(-) create mode 100644 src/mochiglobal.erl create mode 100644 src/web/mod_http_fileserver_log.erl diff --git a/src/mochiglobal.erl b/src/mochiglobal.erl new file mode 100644 index 000000000..c740b8781 --- /dev/null +++ b/src/mochiglobal.erl @@ -0,0 +1,107 @@ +%% @author Bob Ippolito +%% @copyright 2010 Mochi Media, Inc. +%% @doc Abuse module constant pools as a "read-only shared heap" (since erts 5.6) +%% [1]. +-module(mochiglobal). +-author("Bob Ippolito "). +-export([get/1, get/2, put/2, delete/1]). + +-spec get(atom()) -> any() | undefined. +%% @equiv get(K, undefined) +get(K) -> + get(K, undefined). + +-spec get(atom(), T) -> any() | T. +%% @doc Get the term for K or return Default. +get(K, Default) -> + get(K, Default, key_to_module(K)). + +get(_K, Default, Mod) -> + try Mod:term() + catch error:undef -> + Default + end. + +-spec put(atom(), any()) -> ok. +%% @doc Store term V at K, replaces an existing term if present. +put(K, V) -> + put(K, V, key_to_module(K)). + +put(_K, V, Mod) -> + Bin = compile(Mod, V), + code:purge(Mod), + code:load_binary(Mod, atom_to_list(Mod) ++ ".erl", Bin), + ok. + +-spec delete(atom()) -> boolean(). +%% @doc Delete term stored at K, no-op if non-existent. +delete(K) -> + delete(K, key_to_module(K)). + +delete(_K, Mod) -> + code:purge(Mod), + code:delete(Mod). + +-spec key_to_module(atom()) -> atom(). +key_to_module(K) -> + list_to_atom("mochiglobal:" ++ atom_to_list(K)). + +-spec compile(atom(), any()) -> binary(). +compile(Module, T) -> + {ok, Module, Bin} = compile:forms(forms(Module, T), + [verbose, report_errors]), + Bin. + +-spec forms(atom(), any()) -> [erl_syntax:syntaxTree()]. +forms(Module, T) -> + [erl_syntax:revert(X) || X <- term_to_abstract(Module, term, T)]. + +-spec term_to_abstract(atom(), atom(), any()) -> [erl_syntax:syntaxTree()]. +term_to_abstract(Module, Getter, T) -> + [%% -module(Module). + erl_syntax:attribute( + erl_syntax:atom(module), + [erl_syntax:atom(Module)]), + %% -export([Getter/0]). + erl_syntax:attribute( + erl_syntax:atom(export), + [erl_syntax:list( + [erl_syntax:arity_qualifier( + erl_syntax:atom(Getter), + erl_syntax:integer(0))])]), + %% Getter() -> T. + erl_syntax:function( + erl_syntax:atom(Getter), + [erl_syntax:clause([], none, [erl_syntax:abstract(T)])])]. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +get_put_delete_test() -> + K = '$$test$$mochiglobal', + delete(K), + ?assertEqual( + bar, + get(K, bar)), + try + ?MODULE:put(K, baz), + ?assertEqual( + baz, + get(K, bar)), + ?MODULE:put(K, wibble), + ?assertEqual( + wibble, + ?MODULE:get(K)) + after + delete(K) + end, + ?assertEqual( + bar, + get(K, bar)), + ?assertEqual( + undefined, + ?MODULE:get(K)), + ok. +-endif. diff --git a/src/web/mod_http_fileserver.erl b/src/web/mod_http_fileserver.erl index ab4e1a5bd..0302d3527 100644 --- a/src/web/mod_http_fileserver.erl +++ b/src/web/mod_http_fileserver.erl @@ -26,45 +26,20 @@ -module(mod_http_fileserver). -author('mmirra@process-one.net'). - +-author('ecestari@process-one.net'). -behaviour(gen_mod). --behaviour(gen_server). %% gen_mod callbacks -export([start/2, stop/1]). -%% API --export([start_link/2]). - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - %% request_handlers callbacks -export([process/2]). -%% ejabberd_hooks callbacks --export([reopen_log/1]). - -include("ejabberd.hrl"). -include("jlib.hrl"). -include_lib("kernel/include/file.hrl"). -%%-include("ejabberd_http.hrl"). -%% TODO: When ejabberd-modules SVN gets the new ejabberd_http.hrl, delete this code: --record(request, {method, - path, - q = [], - us, - auth, - lang = "", - data = "", - ip, - host, % string() - port, % integer() - tp, % transfer protocol = http | https - headers - }). +-include("ejabberd_http.hrl"). -ifdef(SSL40). -define(STRING2LOWER, string). @@ -76,11 +51,6 @@ -endif. -endif. --record(state, {host, docroot, accesslog, accesslogfd, directory_indices, - custom_headers, default_content_type, content_types = []}). - --define(PROCNAME, ejabberd_mod_http_fileserver). - %% Response is {DataSize, Code, [{HeaderKey, HeaderValue}], Data} -define(HTTP_ERR_FILE_NOT_FOUND, {-1, 404, [], "Not found"}). -define(HTTP_ERR_FORBIDDEN, {-1, 403, [], "Forbidden"}). @@ -91,7 +61,7 @@ {".html", "text/html"}, {".jar", "application/java-archive"}, {".jpeg", "image/jpeg"}, - {".jpg", "image/jpeg"}, + {".jpg", "image/jpeg"}, {".js", "text/javascript"}, {".png", "image/png"}, {".txt", "text/plain"}, @@ -101,81 +71,69 @@ -compile(export_all). -%%==================================================================== -%% gen_mod callbacks -%%==================================================================== start(Host, Opts) -> - Proc = get_proc_name(Host), - ChildSpec = - {Proc, - {?MODULE, start_link, [Host, Opts]}, - transient, % if process crashes abruptly, it gets restarted - 1000, - worker, - [?MODULE]}, - supervisor:start_child(ejabberd_sup, ChildSpec). - -stop(Host) -> - Proc = get_proc_name(Host), - gen_server:call(Proc, stop), - supervisor:terminate_child(ejabberd_sup, Proc), - supervisor:delete_child(ejabberd_sup, Proc). - -%%==================================================================== -%% API -%%==================================================================== -%%-------------------------------------------------------------------- -%% Function: start_link() -> {ok,Pid} | ignore | {error,Error} -%% Description: Starts the server -%%-------------------------------------------------------------------- -start_link(Host, Opts) -> - Proc = get_proc_name(Host), - gen_server:start_link({local, Proc}, ?MODULE, [Host, Opts], []). - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== -%%-------------------------------------------------------------------- -%% Function: init(Args) -> {ok, State} | -%% {ok, State, Timeout} | -%% ignore | -%% {stop, Reason} -%% Description: Initiates the server -%%-------------------------------------------------------------------- -init([Host, Opts]) -> - try initialize(Host, Opts) of - {DocRoot, AccessLog, AccessLogFD, DirectoryIndices, - CustomHeaders, DefaultContentType, ContentTypes} -> - {ok, #state{host = Host, - accesslog = AccessLog, - accesslogfd = AccessLogFD, - docroot = DocRoot, - directory_indices = DirectoryIndices, - custom_headers = CustomHeaders, - default_content_type = DefaultContentType, - content_types = ContentTypes}} - catch - throw:Reason -> - {stop, Reason} - end. - -initialize(Host, Opts) -> DocRoot = gen_mod:get_opt(docroot, Opts, undefined), + set_default_host(Host, Opts), + conf_store(Host, docroot, DocRoot), check_docroot_defined(DocRoot, Host), DRInfo = check_docroot_exists(DocRoot), check_docroot_is_dir(DRInfo, DocRoot), check_docroot_is_readable(DRInfo, DocRoot), AccessLog = gen_mod:get_opt(accesslog, Opts, undefined), - AccessLogFD = try_open_log(AccessLog, Host), + start_log(Host, AccessLog), DirectoryIndices = gen_mod:get_opt(directory_indices, Opts, []), + conf_store(Host, directory_indices, DirectoryIndices), + ServeStaticGzip = gen_mod:get_opt(serve_gzip, Opts, false), + conf_store(Host, serve_gzip, ServeStaticGzip), CustomHeaders = gen_mod:get_opt(custom_headers, Opts, []), + conf_store(Host, custom_headers, CustomHeaders), DefaultContentType = gen_mod:get_opt(default_content_type, Opts, ?DEFAULT_CONTENT_TYPE), + conf_store(Host, default_content_type, DefaultContentType), ContentTypes = build_list_content_types(gen_mod:get_opt(content_types, Opts, []), ?DEFAULT_CONTENT_TYPES), - ?INFO_MSG("initialize: ~n ~p", [ContentTypes]),%+++ - {DocRoot, AccessLog, AccessLogFD, DirectoryIndices, - CustomHeaders, DefaultContentType, ContentTypes}. + conf_store(Host, content_types, ContentTypes), + ?INFO_MSG("initialize: ~n ~p", [ContentTypes]), + ok. + +% Defines host that will answer request if hostname is not recognized. +% The first configured host will be used. +set_default_host(Host, _Opts)-> + case mochiglobal:get(http_default_host) of + undefined -> + ?DEBUG("Setting default host to ~p", [Host]), + mochiglobal:put(http_default_host, Host); + _ -> + ok + end. + +conf_store(Host, Key, Value)-> + R = case mochiglobal:get(Key) of + undefined -> [{Host, Value}]; + A -> + case lists:keymember(Host, 1, A) of + true -> lists:keyreplace(Host, 1, A,{Host, Value}); + false -> [{Host, Value}|A] + end + end, + mochiglobal:put(Key, R). + +conf_get(Host, Key, Default) -> + case mochiglobal:get(Key) of + undefined-> Default; + A -> + case lists:keyfind(Host, 1, A) of + {Host, Val} -> Val; + false -> + case mochiglobal:get(http_default_host) of + Host -> % stop recursion here + Default; + DefaultHost -> + conf_get(DefaultHost, Key, Default) + end + end + end. + %% @spec (AdminCTs::[CT], Default::[CT]) -> [CT] %% where CT = {Extension::string(), Value} @@ -183,6 +141,12 @@ initialize(Host, Opts) -> %% @doc Return a unified list without duplicates. %% Elements of AdminCTs have more priority. %% If a CT is declared as 'undefined', then it is not included in the result. + +start_log(_Host, undefined)-> + ok; +start_log(Host, FileName) -> + mod_http_fileserver_log:start(Host, FileName). + build_list_content_types(AdminCTsUnsorted, DefaultCTsUnsorted) -> AdminCTs = lists:ukeysort(1, AdminCTsUnsorted), DefaultCTs = lists:ukeysort(1, DefaultCTsUnsorted), @@ -213,79 +177,10 @@ check_docroot_is_readable(DRInfo, DocRoot) -> read_write -> ok; _ -> throw({docroot_not_readable, DocRoot}) end. - -try_open_log(undefined, _Host) -> - undefined; -try_open_log(FN, Host) -> - FD = try open_log(FN) of - FD1 -> FD1 - catch - throw:{cannot_open_accesslog, FN, Reason} -> - ?ERROR_MSG("Cannot open access log file: ~p~nReason: ~p", [FN, Reason]), - undefined - end, - ejabberd_hooks:add(reopen_log_hook, Host, ?MODULE, reopen_log, 50), - FD. - -%%-------------------------------------------------------------------- -%% Function: handle_call(Request, From, State) -> {reply, Reply, State} | -%% {reply, Reply, State, Timeout} | -%% {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, Reply, State} | -%% {stop, Reason, State} -%% Description: Handling call messages -%%-------------------------------------------------------------------- -handle_call({serve, LocalPath}, _From, State) -> - Reply = serve(LocalPath, State#state.docroot, State#state.directory_indices, - State#state.custom_headers, - State#state.default_content_type, State#state.content_types), - {reply, Reply, State}; -handle_call(_Request, _From, State) -> - {reply, ok, State}. - -%%-------------------------------------------------------------------- -%% Function: handle_cast(Msg, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% Description: Handling cast messages -%%-------------------------------------------------------------------- -handle_cast({add_to_log, FileSize, Code, Request}, State) -> - add_to_log(State#state.accesslogfd, FileSize, Code, Request), - {noreply, State}; -handle_cast(reopen_log, State) -> - FD2 = reopen_log(State#state.accesslog, State#state.accesslogfd), - {noreply, State#state{accesslogfd = FD2}}; -handle_cast(_Msg, State) -> - {noreply, State}. - -%%-------------------------------------------------------------------- -%% Function: handle_info(Info, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% Description: Handling all non call/cast messages -%%-------------------------------------------------------------------- -handle_info(_Info, State) -> - {noreply, State}. - -%%-------------------------------------------------------------------- -%% Function: terminate(Reason, State) -> void() -%% Description: This function is called by a gen_server when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any necessary -%% cleaning up. When it returns, the gen_server terminates with Reason. -%% The return value is ignored. -%%-------------------------------------------------------------------- -terminate(_Reason, State) -> - close_log(State#state.accesslogfd), - ejabberd_hooks:delete(reopen_log_hook, State#state.host, ?MODULE, reopen_log, 50), + +stop(_Host) -> ok. -%%-------------------------------------------------------------------- -%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState} -%% Description: Convert process state when code is changed -%%-------------------------------------------------------------------- -code_change(_OldVsn, State, _Extra) -> - {ok, State}. %%==================================================================== %% request_handlers callbacks @@ -295,114 +190,122 @@ code_change(_OldVsn, State, _Extra) -> %% @doc Handle an HTTP request. %% LocalPath is the part of the requested URL path that is "local to the module". %% Returns the page to be sent back to the client and/or HTTP status code. + process(LocalPath, Request) -> ?DEBUG("Requested ~p", [LocalPath]), - try gen_server:call(get_proc_name(Request#request.host), {serve, LocalPath}) of - {FileSize, Code, Headers, Contents} -> - add_to_log(FileSize, Code, Request), - {Code, Headers, Contents} - catch - exit:{noproc, _} -> - ?ERROR_MSG("Received an HTTP request with Host ~p, but couldn't find the related " - "ejabberd virtual host", [Request#request.host]), - ejabberd_web:error(not_found) - end. - -serve(LocalPath, DocRoot, DirectoryIndices, CustomHeaders, DefaultContentType, ContentTypes) -> + Host = Request#request.host, + ClientHeaders = Request#request.headers, + DirectoryIndices = conf_get(Host, directory_indices, undefined), + CustomHeaders = conf_get(Host, custom_headers, undefined), + DefaultContentType = conf_get(Host, default_content_type, undefined), + ContentTypes = conf_get(Host, content_types, undefined), + Static = conf_get(Host, serve_gzip, undefined), + DocRoot = conf_get(Host, docroot, undefined), FileName = filename:join(filename:split(DocRoot) ++ LocalPath), - case file:read_file_info(FileName) of + {FileSize, Code, Headers, Contents} = case file:read_file_info(FileName) of {error, enoent} -> ?HTTP_ERR_FILE_NOT_FOUND; {error, eacces} -> ?HTTP_ERR_FORBIDDEN; {ok, #file_info{type = directory}} -> serve_index(FileName, DirectoryIndices, CustomHeaders, DefaultContentType, - ContentTypes); - {ok, FileInfo} -> serve_file(FileInfo, FileName, - CustomHeaders, - DefaultContentType, - ContentTypes) - end. + ContentTypes, Static); + {ok, FileInfo} -> + case should_serve(FileInfo, ClientHeaders) of + true ->serve_file(FileInfo, FileName, + CustomHeaders, + DefaultContentType, + ContentTypes, Static); + false -> + {0, 304, [], []} + end + end, + mod_http_fileserver_log:add_to_log(Host,FileSize, Code, Request), + {Code, Headers, Contents}. +should_serve(FileInfo, Headers) -> + lists:foldl(fun({Header, Fun}, Acc)-> + case lists:keyfind(Header, 1, Headers) of + {_, Val} -> + Fun(FileInfo,Val); + _O -> + Acc + end + end, true, [{'If-None-Match',fun etag/2} + ]). +etag(FileInfo, Etag)-> + case httpd_util:create_etag(FileInfo) of + Etag -> + false; + _ -> + true + end. +modified(FileInfo, LastModified)-> + AfterDate = calendar:datetime_to_gregorian_seconds( + httpd_util:convert_request_date(LastModified)), + Mtime = calendar:datetime_to_gregorian_seconds(FileInfo#file_info.mtime), + ?DEBUG("Modified : ~p > ~p (serving: ~p)", [Mtime, AfterDate,Mtime > AfterDate]), + Mtime > AfterDate. + %% Troll through the directory indices attempting to find one which %% works, if none can be found, return a 404. -serve_index(_FileName, [], _CH, _DefaultContentType, _ContentTypes) -> +serve_index(_FileName, [], _CH, _DefaultContentType, _ContentTypes, _Static) -> ?HTTP_ERR_FILE_NOT_FOUND; -serve_index(FileName, [Index | T], CH, DefaultContentType, ContentTypes) -> +serve_index(FileName, [Index | T], CH, DefaultContentType, ContentTypes, Static) -> IndexFileName = filename:join([FileName] ++ [Index]), case file:read_file_info(IndexFileName) of - {error, _Error} -> serve_index(FileName, T, CH, DefaultContentType, ContentTypes); - {ok, #file_info{type = directory}} -> serve_index(FileName, T, CH, DefaultContentType, ContentTypes); - {ok, FileInfo} -> serve_file(FileInfo, IndexFileName, CH, DefaultContentType, ContentTypes) + {error, _Error} -> serve_index(FileName, T, CH, DefaultContentType, ContentTypes, Static); + {ok, #file_info{type = directory}} -> serve_index(FileName, T, CH, DefaultContentType, ContentTypes, Static); + {ok, FileInfo} -> serve_file(FileInfo, IndexFileName, CH, DefaultContentType, ContentTypes, Static) end. %% Assume the file exists if we got this far and attempt to read it in %% and serve it up. -serve_file(FileInfo, FileName, CustomHeaders, DefaultContentType, ContentTypes) -> + +serve_file(FileInfo, FileName, CustomHeaders, DefaultContentType, ContentTypes, false) -> ?DEBUG("Delivering: ~s", [FileName]), - {ok, FileContents} = file:read_file(FileName), ContentType = content_type(FileName, DefaultContentType, ContentTypes), + {ok, FileContents} = file:read_file(FileName), {FileInfo#file_info.size, 200, [{"Server", "ejabberd"}, {"Last-Modified", last_modified(FileInfo)}, {"Content-Type", ContentType} | CustomHeaders], - FileContents}. - -%%---------------------------------------------------------------------- -%% Log file -%%---------------------------------------------------------------------- - -open_log(FN) -> - case file:open(FN, [append]) of - {ok, FD} -> - FD; - {error, Reason} -> - throw({cannot_open_accesslog, FN, Reason}) + FileContents}; + +serve_file(FileInfo, FileName, CustomHeaders, DefaultContentType, ContentTypes, Gzip) -> + ?DEBUG("Delivering: ~s", [FileName]), + ContentType = content_type(FileName, DefaultContentType, ContentTypes), + CompressedFileName = FileName ++ ".gz", + case file:read_file_info(CompressedFileName) of + {ok, FileInfoCompressed} -> %Found compressed + ?INFO_MSG("Found compressed: ~s", [FileName]), + {ok, FileContents} = file:read_file(CompressedFileName), + {FileInfoCompressed#file_info.size, + 200, [{"Server", "ejabberd"}, + {"Last-Modified", last_modified(FileInfoCompressed)}, + {"Content-Type", ContentType}, + {"Etag", httpd_util:create_etag(FileInfoCompressed)}, + {"Content-Encoding", "gzip"} | CustomHeaders], + FileContents}; + {error, _} -> + {FileContents, Size} = case Gzip of + static -> + {ok, Content} = file:read_file(FileName), + {Content, FileInfo#file_info.size}; + always -> + {ok, Content} = file:read_file(FileName), + Compressed = zlib:gzip(Content), + {Compressed, size(Compressed)} + end, + {Size, + 200, [{"Server", "ejabberd"}, + {"Last-Modified", last_modified(FileInfo)}, + {"Etag", httpd_util:create_etag(FileInfo)}, + {"Content-Type", ContentType}, + {"Content-Encoding", "gzip"} | CustomHeaders], + FileContents} end. -close_log(FD) -> - file:close(FD). - -reopen_log(undefined, undefined) -> - ok; -reopen_log(FN, FD) -> - close_log(FD), - open_log(FN). - -reopen_log(Host) -> - gen_server:cast(get_proc_name(Host), reopen_log). - -add_to_log(FileSize, Code, Request) -> - gen_server:cast(get_proc_name(Request#request.host), - {add_to_log, FileSize, Code, Request}). - -add_to_log(undefined, _FileSize, _Code, _Request) -> - ok; -add_to_log(File, FileSize, Code, Request) -> - {{Year, Month, Day}, {Hour, Minute, Second}} = calendar:local_time(), - IP = ip_to_string(element(1, Request#request.ip)), - Path = join(Request#request.path, "/"), - Query = case join(lists:map(fun(E) -> lists:concat([element(1, E), "=", element(2, E)]) end, - Request#request.q), "&") of - [] -> - ""; - String -> - [$? | String] - end, - UserAgent = find_header('User-Agent', Request#request.headers, "-"), - Referer = find_header('Referer', Request#request.headers, "-"), - %% Pseudo Combined Apache log format: - %% 127.0.0.1 - - [28/Mar/2007:18:41:55 +0200] "GET / HTTP/1.1" 302 303 "-" "tsung" - %% TODO some fields are harcoded/missing: - %% The date/time integers should have always 2 digits. For example day "7" should be "07" - %% Month should be 3*letter, not integer 1..12 - %% Missing time zone = (`+' | `-') 4*digit - %% Missing protocol version: HTTP/1.1 - %% For reference: http://httpd.apache.org/docs/2.2/logs.html - io:format(File, "~s - - [~p/~p/~p:~p:~p:~p] \"~s /~s~s\" ~p ~p ~p ~p~n", - [IP, Day, Month, Year, Hour, Minute, Second, Request#request.method, Path, Query, Code, - FileSize, Referer, UserAgent]). - find_header(Header, Headers, Default) -> case lists:keysearch(Header, 1, Headers) of {value, {_, Value}} -> Value; @@ -413,15 +316,6 @@ find_header(Header, Headers, Default) -> %% Utilities %%---------------------------------------------------------------------- -get_proc_name(Host) -> gen_mod:get_module_proc(Host, ?PROCNAME). - -join([], _) -> - ""; -join([E], _) -> - E; -join([H | T], Separator) -> - lists:foldl(fun(E, Acc) -> lists:concat([Acc, Separator, E]) end, H, T). - content_type(Filename, DefaultContentType, ContentTypes) -> Extension = ?STRING2LOWER:to_lower(filename:extension(Filename)), case lists:keysearch(Extension, 1, ContentTypes) of @@ -432,11 +326,3 @@ content_type(Filename, DefaultContentType, ContentTypes) -> last_modified(FileInfo) -> Then = FileInfo#file_info.mtime, httpd_util:rfc1123_date(Then). - -%% Convert IP address tuple to string representation. Accepts either -%% IPv4 or IPv6 address tuples. -ip_to_string(Address) when size(Address) == 4 -> - join(tuple_to_list(Address), "."); -ip_to_string(Address) when size(Address) == 8 -> - Parts = lists:map(fun (Int) -> io_lib:format("~.16B", [Int]) end, tuple_to_list(Address)), - ?STRING2LOWER:to_lower(lists:flatten(join(Parts, ":"))). diff --git a/src/web/mod_http_fileserver_log.erl b/src/web/mod_http_fileserver_log.erl new file mode 100644 index 000000000..b76f947c8 --- /dev/null +++ b/src/web/mod_http_fileserver_log.erl @@ -0,0 +1,167 @@ +-module (mod_http_fileserver_log). + +-behaviour (gen_server). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). + +-export ([start_link/2,start/2, stop/1, add_to_log/4,reopen_log/1]). + +-include("ejabberd.hrl"). +-include("jlib.hrl"). +-include("ejabberd_http.hrl"). +-include_lib("kernel/include/file.hrl"). + +-define(PROCNAME, ejabberd_mod_http_fileserver_log). + +-record(state, {host,accesslog, accesslogfd}). +%% Public API + +start(Host, Filename) -> + Proc =gen_mod:get_module_proc(Host, ?PROCNAME), + ChildSpec = + {Proc, + {?MODULE, start_link, [Host, Filename]}, + transient, % if process crashes abruptly, it gets restarted + 1000, + worker, + [?MODULE]}, + supervisor:start_child(ejabberd_sup, ChildSpec). + +stop(Host) -> + Proc = gen_mod:get_module_proc(Host, ?PROCNAME), + gen_server:call(Proc, stop), + supervisor:terminate_child(ejabberd_sup, Proc), + supervisor:delete_child(ejabberd_sup, Proc). + +start_link(Host, Filename) -> + Proc = gen_mod:get_module_proc(Host, ?PROCNAME), + gen_server:start_link({local, Proc}, ?MODULE, [Host, Filename], []). + +add_to_log(Host,FileSize, Code, Request) -> + gen_server:cast(gen_mod:get_module_proc(Host, ?PROCNAME), + {add_to_log, FileSize, Code, Request}). + +reopen_log(Host) -> + gen_server:cast(gen_mod:get_module_proc(Host, ?PROCNAME), reopen_log). + +%% Server implementation, a.k.a.: callbacks + +init([Host, Filename]) -> + try try_open_log(Filename, Host) of + AccessLogFD -> + ?DEBUG("File opened !", []), + {ok, #state{host = Host, + accesslog = Filename, + accesslogfd = AccessLogFD}} + catch + throw:Reason -> + {stop, Reason} + end. + +try_open_log(FN, Host) -> + FD = try open_log(FN) of + FD1 -> FD1 + catch + throw:{cannot_open_accesslog, FN, Reason} -> + ?ERROR_MSG("Cannot open access log file: ~p~nReason: ~p", [FN, Reason]), + undefined + end, + %HostB = list_to_binary(Host), + ejabberd_hooks:add(reopen_log_hook, Host, ?MODULE, reopen_log, 50), + FD. + +handle_call(_Request, _From, State) -> + {reply, ok, State}. + +handle_cast({add_to_log, FileSize, Code, Request}, State) -> + add_to_log2(State#state.accesslogfd, FileSize, Code, Request), + {noreply, State}; +handle_cast(reopen_log, State) -> + FD2 = reopen_log(State#state.accesslog, State#state.accesslogfd), + {noreply, State#state{accesslogfd = FD2}}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + close_log(State#state.accesslogfd), + ejabberd_hooks:delete(reopen_log_hook, State#state.host, ?MODULE, reopen_log, 50), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +%%---------------------------------------------------------------------- +%% Log file +%%---------------------------------------------------------------------- + +open_log(FN) -> + case file:open(FN, [append]) of + {ok, FD} -> + FD; + {error, Reason} -> + throw({cannot_open_accesslog, FN, Reason}) + end. + +close_log(FD) -> + file:close(FD). + +reopen_log(undefined, undefined) -> + ok; +reopen_log(FN, FD) -> + ?DEBUG("reopening logs", []), + close_log(FD), + open_log(FN). + + + +add_to_log2(undefined, _FileSize, _Code, _Request) -> + ok; +add_to_log2(File, FileSize, Code, Request) -> + {{Year, Month, Day}, {Hour, Minute, Second}} = calendar:local_time(), + IP = ip_to_string(element(1, Request#request.ip)), + Path = join(Request#request.path, "/"), + Query = case join(lists:map(fun(E) -> lists:concat([element(1, E), "=", element(2, E)]) end, + Request#request.q), "&") of + [] -> + ""; + String -> + [$? | String] + end, + UserAgent = find_header('User-Agent', Request#request.headers, "-"), + Referer = find_header('Referer', Request#request.headers, "-"), + %% Pseudo Combined Apache log format: + %% 127.0.0.1 - - [28/Mar/2007:18:41:55 +0200] "GET / HTTP/1.1" 302 303 "-" "tsung" + %% TODO some fields are harcoded/missing: + %% The date/time integers should have always 2 digits. For example day "7" should be "07" + %% Month should be 3*letter, not integer 1..12 + %% Missing time zone = (`+' | `-') 4*digit + %% Missing protocol version: HTTP/1.1 + %% For reference: http://httpd.apache.org/docs/2.2/logs.html + io:format(File, "~s - - [~p/~p/~p:~p:~p:~p] \"~s /~s~s\" ~p ~p ~p ~p~n", + [IP, Day, Month, Year, Hour, Minute, Second, Request#request.method, Path, Query, Code, + FileSize, Referer, UserAgent]). + +find_header(Header, Headers, Default) -> + case lists:keysearch(Header, 1, Headers) of + {value, {_, Value}} -> Value; + false -> Default + end. + +join([], _) -> + ""; +join([E], _) -> + E; +join([H | T], Separator) -> + lists:foldl(fun(E, Acc) -> lists:concat([Acc, Separator, E]) end, H, T). + +%% Convert IP address tuple to string representation. Accepts either +%% IPv4 or IPv6 address tuples. +ip_to_string(Address) when size(Address) == 4 -> + join(tuple_to_list(Address), "."); +ip_to_string(Address) when size(Address) == 8 -> + Parts = lists:map(fun (Int) -> io_lib:format("~.16B", [Int]) end, tuple_to_list(Address)), + string:to_lower(lists:flatten(join(Parts, ":"))). \ No newline at end of file