src/recon.erl - couchdb-recon - Git at Google

 %%% @author Fred Hebert <mononcqc@ferd.ca>
 %%%  [http://ferd.ca/]
 %%% @doc Recon, as a module, provides access to the high-level functionality
 %%% contained in the Recon application.
 %%%
 %%% It has functions in five main categories:
 %%%
 %%% <dl>
 %%%     <dt>1. State information</dt>
 %%%     <dd>Process information is everything that has to do with the
 %%%         general state of the node. Functions such as {@link info/1}
 %%%         and {@link info/3} are wrappers to provide more details than
 %%%         `erlang:process_info/2', while providing it in a production-safe
 %%%         manner.</dd>
 %%%     <dd>{@link proc_count/2} and {@link proc_window/3} are to be used
 %%%         when you require information about processes in a larger sense:
 %%%         biggest consumers of given process information (say memory or
 %%%         reductions), either absolutely or over a sliding time window,
 %%%         respectively.</dd>
 %%%     <dd>{@link bin_leak/1} is a function that can be used to try and
 %%%         see if your Erlang node is leaking refc binaries. See the function
 %%%         itself for more details.</dd>
 %%%     <dd>Functions to access node statistics, in a manner somewhat similar
 %%%         to what <a href="https://github.com/ferd/vmstats">vmstats</a>
 %%%         provides as a library. There are 3 of them:
 %%%         {@link node_stats_print/2}, which displays them,
 %%%         {@link node_stats_list/2}, which returns them in a list, and
 %%%         {@link node_stats/4}, which provides a fold-like interface
 %%%         for stats gathering.</dd>
 %%%
 %%%     <dt>2. OTP tools</dt>
 %%%     <dd>This category provides tools to interact with pieces of OTP
 %%%         more easily. At this point, the only function included is
 %%%         {@link get_state/1}, which works as a wrapper around
 %%%         `sys:get_state/1' in R16B01, and provides the required
 %%%         functionality for older versions of Erlang.</dd>
 %%%
 %%%     <dt>3. Code Handling</dt>
 %%%     <dd>Specific functions are in `recon' for the sole purpose
 %%%         of interacting with source and compiled code.
 %%%         {@link remote_load/1} and {@link remote_load/2} will allow
 %%%         to take a local module, and load it remotely (in a diskless
 %%%         manner) on another Erlang node you're connected to.</dd>
 %%%     <dd>{@link source/1} allows to print the source of a loaded module,
 %%%         in case it's not available in the currently running node.</dd>
 %%%
 %%%     <dt>4. Ports and Sockets</dt>
 %%%     <dd>To make it simpler to debug some network-related issues,
 %%%         recon contains functions to deal with Erlang ports (raw, file
 %%%         handles, or inet). Functions {@link tcp/0}, {@link udp/0},
 %%%         {@link sctp/0}, {@link files/0}, and {@link port_types/0} will
 %%%         list all the Erlang ports of a given type. The latter function
 %%%         prints counts of all individual types.</dd>
 %%%     <dd>Finally, the functions {@link inet_count/2} and {@link inet_window/3}
 %%%         provide the absolute or sliding window functionality of
 %%%         {@link proc_count/2} and {@link proc_count/3} to inet ports
 %%%         and connections currently on the node.</dd>
 %%%
 %%%     <dt>5. RPC</dt>
 %%%     <dd>These are wrappers to make RPC work simpler with clusters of
 %%%         Erlang nodes. Default RPC mechanisms (from the `rpc' module)
 %%%         make it somewhat painful to call shell-defined funs over node
 %%%         boundaries. The functions {@link rpc/1}, {@link rpc/2}, and
 %%%         {@link rpc/3} will do it with a simpler interface.</dd>
 %%%     <dd>Additionally, when you're running diagnostic code on remote
 %%%         nodes and want to know which node evaluated what result, using
 %%%         {@link named_rpc/1}, {@link named_rpc/2}, and {@link named_rpc/3}
 %%%         will wrap the results in a tuple that tells you which node it's
 %%%         coming from, making it easier to identify bad nodes.</dd>
 %%% </dl>
 %%% @end
 -module(recon).
 -export([info/1,info/3,
          proc_count/2, proc_window/3,
          bin_leak/1,
          node_stats_print/2, node_stats_list/2, node_stats/4]).
 -export([get_state/1]).
 -export([remote_load/1, remote_load/2,
          source/1]).
 -export([tcp/0, udp/0, sctp/0, files/0, port_types/0,
          inet_count/2, inet_window/3]).
 -export([rpc/1, rpc/2, rpc/3,
          named_rpc/1, named_rpc/2, named_rpc/3]).

 %%%%%%%%%%%%%
 %%% TYPES %%%
 %%%%%%%%%%%%%
 -type proc_attrs() :: {pid(),
                        Attr::_,
                        [Name::atom()
                        |{current_function, mfa()}
                        |{initial_call, mfa()}, ...]}.
 -type inet_attrs() :: {port(),
                        Attr::_,
                        [{atom(), term()}]}.

 -type pid_term() :: pid() | atom() | string()
                   | {global, term()} | {via, module(), term()}
                   | {non_neg_integer(), non_neg_integer(), non_neg_integer()}.

 -export_type([proc_attrs/0, inet_attrs/0, pid_term/0]).
 %%%%%%%%%%%%%%%%%%
 %%% PUBLIC API %%%
 %%%%%%%%%%%%%%%%%%

 %%% Process Info %%%

 %% @doc Equivalent to `info(<A.B.C>)' where `A', `B', and `C' are integers part
 %% of a pid
 -spec info(N,N,N) -> [{atom(), [{atom(),term()}]},...] when
       N :: non_neg_integer().
 info(A,B,C) -> info(recon_lib:triple_to_pid(A,B,C)).

 %% @doc Allows to be similar to `erlang:process_info/1', but excludes fields
 %% such as the mailbox, which have a tendency to grow and be unsafe when called
 %% in production systems. Also includes a few more fields than what is usually
 %% given (`monitors', `monitored_by', etc.), and separates the fields in a more
 %% readable format based on the type of information contained.
 %%
 %% Moreover, it will fetch and read information on local processes that were
 %% registered locally (an atom), globally (`{global, Name}'), or through
 %% another registry supported in the `{via, Module, Name}' syntax (must have a
 %% `Module:whereis_name/1' function). Pids can also be passed in as a string
 %% (`"<0.39.0>"') or a triple (`{0,39,0}') and will be converted to be used.
 -spec info(pid_term()) -> [{Type, [{Key, Value}]},...] when
       Type :: meta | signals | location | memory | work,
       Key :: registered_name | dictionary | group_leader | status
            | links | monitors | monitored_by | trap_exit | initial_call
            | current_stacktrace | memory | message_queue_len | heap_size
            | total_heap_size | garbage_collection | reductions,
       Value :: term().
 info(PidTerm) ->
     Pid = recon_lib:term_to_pid(PidTerm),
     Info = fun(List) -> erlang:process_info(Pid, List) end,
     [{meta, Info([registered_name, dictionary, group_leader, status])},
      {signals, Info([links, monitors, monitored_by, trap_exit])},
      {location, Info([initial_call, current_stacktrace])},
      {memory, Info([memory, message_queue_len, heap_size, total_heap_size,
                     garbage_collection])},
      {work, Info([reductions])}].

 %% @doc Fetches a given attribute from all processes and returns
 %% the biggest `Num' consumers.
 %% @todo Implement this function so it only stores `Num' entries in
 %% memory at any given time, instead of as many as there are
 %% processes.
 -spec proc_count(AttributeName, Num) -> [proc_attrs()] when
       AttributeName :: atom(),
       Num :: non_neg_integer().
 proc_count(AttrName, Num) ->
     lists:sublist(lists:usort(
         fun({_,A,_},{_,B,_}) -> A > B end,
         recon_lib:proc_attrs(AttrName)
     ), Num).

 %% @doc Fetches a given attribute from all processes and returns
 %% the biggest entries, over a sliding time window.
 %%
 %% This function is particularly useful when processes on the node
 %% are mostly short-lived, usually too short to inspect through other
 %% tools, in order to figure out what kind of processes are eating
 %% through a lot resources on a given node.
 %%
 %% It is important to see this function as a snapshot over a sliding
 %% window. A program's timeline during sampling might look like this:
 %%
 %%  `--w---- [Sample1] ---x-------------y----- [Sample2] ---z--->'
 %%
 %% Some processes will live between `w' and die at `x', some between `y' and
 %% `z', and some between `x' and `y'. These samples will not be too significant
 %% as they're incomplete. If the majority of your processes run between a time
 %% interval `x'...`y' (in absolute terms), you should make sure that your
 %% sampling time is smaller than this so that for many processes, their
 %% lifetime spans the equivalent of `w' and `z'. Not doing this can skew the
 %% results: long-lived processes, that have 10 times the time to accumulate
 %% data (say reductions) will look like bottlenecks when they're not one.
 %%
 %% Warning: this function depends on data gathered at two snapshots, and then
 %% building a dictionary with entries to differentiate them. This can take a
 %% heavy toll on memory when you have many dozens of thousands of processes.
 -spec proc_window(AttributeName, Num, Milliseconds) -> [proc_attrs()] when
       AttributeName :: atom(),
       Num :: non_neg_integer(),
       Milliseconds :: pos_integer().
 proc_window(AttrName, Num, Time) ->
     Sample = fun() -> recon_lib:proc_attrs(AttrName) end,
     {First,Last} = recon_lib:sample(Time, Sample),
     lists:sublist(lists:usort(
         fun({_,A,_},{_,B,_}) -> A > B end,
         recon_lib:sliding_window(First, Last)
     ), Num).

 %% @doc Refc binaries can be leaking when barely-busy processes route them
 %% around and do little else, or when extremely busy processes reach a stable
 %% amount of memory allocated and do the vast majority of their work with refc
 %% binaries. When this happens, it may take a very long while before references
 %% get deallocated and refc binaries get to be garbage collected, leading to
 %% Out Of Memory crashes.
 %% This function fetches the number of refc binary references in each process
 %% of the node, garbage collects them, and compares the resulting number of
 %% references in each of them. The function then returns the `N' processes
 %% that freed the biggest amount of binaries, potentially highlighting leaks.
 %%
 %% See <a href="http://www.erlang.org/doc/efficiency_guide/binaryhandling.html#id65722">The efficiency guide</a>
 %% for more details on refc binaries
 -spec bin_leak(pos_integer()) -> [proc_attrs()].
 bin_leak(N) ->
     lists:sublist(
         lists:usort(
             fun({K1,V1,_},{K2,V2,_}) -> {V1,K1} =< {V2,K2} end,
             [try
                 {ok, {_,Pre,Id}} = recon_lib:proc_attrs(binary, Pid),
                 erlang:garbage_collect(Pid),
                 {ok, {_,Post,_}} = recon_lib:proc_attrs(binary, Pid),
                 {Pid, length(Post)-length(Pre), Id}
             catch
                 _:_ -> {Pid, 0}
             end || Pid <- processes()]),
         N).

 %% @doc Shorthand for `node_stats(N, Interval, fun(X,_) -> io:format("~p~n",[X]) end, nostate)'.
 -spec node_stats_print(Repeat, Interval) -> term() when
       Repeat :: non_neg_integer(),
       Interval :: pos_integer().
 node_stats_print(N, Interval) ->
     node_stats(N, Interval, fun(X, _) -> io:format("~p~n",[X]) end, ok).

 %% @doc Shorthand for `node_stats(N, Interval, fun(X,Acc) -> [X|Acc] end, [])'
 %% with the results reversed to be in the right temporal order.
 -spec node_stats_list(Repeat, Interval) -> [Stats] when
       Repeat :: non_neg_integer(),
       Interval :: pos_integer(),
       Stats :: {[Absolutes::{atom(),term()}],
                 [Increments::{atom(),term()}]}.
 node_stats_list(N, Interval) ->
     lists:reverse(node_stats(N, Interval, fun(X,Acc) -> [X|Acc] end, [])).

 %% @doc Gathers statistics `N' time, waiting `Interval' milliseconds between
 %% each run, and accumulates results using a folding function `FoldFun'.
 %% The function will gather statistics in two forms: Absolutes and Increments.
 %%
 %% Absolutes are values that keep changing with time, and are useful to know
 %% about as a datapoint: process count, size of the run queue, error_logger
 %% queue length, and the memory of the node (total, processes, atoms, binaries,
 %% and ets tables).
 %%
 %% Increments are values that are mostly useful when compared to a previous
 %% one to have an idea what they're doing, because otherwise they'd never
 %% stop increasing: bytes in and out of the node, number of garbage colelctor
 %% runs, words of memory that were garbage collected, and the global reductions
 %% count for the node.
 -spec node_stats(N, Interval, FoldFun, Acc) -> Acc when
       N :: non_neg_integer(),
       Interval :: pos_integer(),
       FoldFun :: fun((Stats, Acc) -> Acc),
       Acc :: term(),
       Stats :: {[Absolutes::{atom(),term()}],
                 [Increments::{atom(),term()}]}.
 node_stats(N, Interval, FoldFun, Init) ->
     %% Stats is an ugly fun, but it does its thing.
     Stats = fun({{OldIn,OldOut},{OldGCs,OldWords,_}}) ->
         %% Absolutes
         ProcC = erlang:system_info(process_count),
         RunQ = erlang:statistics(run_queue),
         {_,LogQ} = process_info(whereis(error_logger),  message_queue_len),
         %% Mem (Absolutes)
         Mem = erlang:memory(),
         Tot = proplists:get_value(total, Mem),
         ProcM = proplists:get_value(processes_used,Mem),
         Atom = proplists:get_value(atom_used,Mem),
         Bin = proplists:get_value(binary, Mem),
         Ets = proplists:get_value(ets, Mem),
         %% Incremental
         {{input,In},{output,Out}} = erlang:statistics(io),
         GC={GCs,Words,_} = erlang:statistics(garbage_collection),
         BytesIn = In-OldIn,
         BytesOut = Out-OldOut,
         GCCount = GCs-OldGCs,
         GCWords = Words-OldWords,
         {_, Reds} = erlang:statistics(reductions),
          %% Stats Results
         {{[{process_count,ProcC}, {run_queue,RunQ},
            {error_logger_queue_len,LogQ}, {memory_total,Tot},
            {memory_procs,ProcM}, {memory_atoms,Atom},
            {memory_bin,Bin}, {memory_ets,Ets}],
           [{bytes_in,BytesIn}, {bytes_out,BytesOut},
            {gc_count,GCCount}, {gc_words_reclaimed,GCWords},
            {reductions,Reds}]},
          %% New State
          {{In,Out}, GC}}
     end,
     {{input,In},{output,Out}} = erlang:statistics(io),
     Gc = erlang:statistics(garbage_collection),
     recon_lib:time_fold(N, Interval, Stats, {{In,Out}, Gc}, FoldFun, Init).

 %%% OTP & Manipulations %%%

 %% @doc Fetch the internal state of an OTP process.
 %% Calls `sys:get_state/1' directly in R16B01+, and fetches
 %% it dynamically on older versions of OTP.
 -spec get_state(pid_term()) -> term().
 get_state(PidTerm) ->
     Proc = recon_lib:term_to_pid(PidTerm),
     try
         sys:get_state(Proc)
     catch
         error:undef ->
             case sys:get_status(Proc) of
                 {status,_Pid,{module,gen_server},Data} ->
                     {data, Props} = lists:last(lists:nth(5, Data)),
                     proplists:get_value("State", Props);
                 {status,_Pod,{module,gen_fsm},Data} ->
                     {data, Props} = lists:last(lists:nth(5, Data)),
                     proplists:get_value("StateData", Props)
             end
     end.

 %%% Code & Stuff %%%

 %% @doc Equivalent to `remote_load(nodes(), Mod)'.
 -spec remote_load(module()) -> term().
 remote_load(Mod) -> remote_load(nodes(), Mod).

 %% @doc Loads one or more modules remotely, in a diskless manner.  Allows to
 %% share code loaded locally with a remote node that doesn't have it
 -spec remote_load(Nodes, module()) -> term() when
       Nodes :: [node(),...] | node().
 remote_load(Nodes=[_|_], Mod) when is_atom(Mod) ->
     {Mod, Bin, File} = code:get_object_code(Mod),
     rpc:multicall(Nodes, code, load_binary, [Mod, File, Bin]);
 remote_load(Nodes=[_|_], Modules) when is_list(Modules) ->
     [remote_load(Nodes, Mod) || Mod <- Modules];
 remote_load(Node, Mod) ->
     remote_load([Node], Mod).

 %% @doc Obtain the source code of a module compiled with `debug_info'.
 %% The returned list sadly does not allow to format the types and typed
 %% records the way they look in the original module, but instead goes to
 %% an intermediary form used in the AST. They will still be placed
 %% in the right module attributes, however.
 %% @todo Figure out a way to pretty-print typespecs and records.
 -spec source(module()) -> iolist().
 source(Module) ->
     Path = code:which(Module),
     {ok,{_,[{abstract_code,{_,AC}}]}} = beam_lib:chunks(Path, [abstract_code]),
     erl_prettypr:format(erl_syntax:form_list(AC)).

 %%% Ports Info %%%

 %% @doc returns a list of all TCP ports (the data type) open on the node.
 -spec tcp() -> [port()].
 tcp() -> recon_lib:port_list(name, "tcp_inet").

 %% @doc returns a list of all UDP ports (the data type) open on the node.
 -spec udp() -> [port()].
 udp() -> recon_lib:port_list(name, "udp_inet").

 %% @doc returns a list of all SCTP ports (the data type) open on the node.
 -spec sctp() -> [port()].
 sctp() -> recon_lib:port_list(name, "sctp_inet").

 %% @doc returns a list of all file handles open on the node.
 -spec files() -> [port()].
 files() -> recon_lib:port_list(name, "efile").

 %% @doc Shows a list of all different ports on the node with their respective
 %% types.
 -spec port_types() -> [{pos_integer(),Type::string()}].
 port_types() ->
     lists:usort(
         %% sorts by biggest count, smallest type
         fun({KA,VA}, {KB,VB}) -> {VA,KB} > {VB,KA} end,
         recon_lib:count([Name || {_, Name} <- recon_lib:port_list(name)])
     ).

 %% @doc Fetches a given attribute from all inet ports (TCP, UDP, SCTP)
 %% and returns the biggest `Num' consumers.
 %%
 %% The values to be used can be the number of octets (bytes) sent, received,
 %% or both (`send_oct', `recv_oct', `oct', respectively), or the number
 %% of packets sent, received, or both (`send_cnt', `recv_cnt', `cnt',
 %% respectively). Individual absolute values for each metric will be returned
 %% in the 3rd position of the resulting tuple.
 %%
 %% @todo Implement this function so it only stores `Num' entries in
 %% memory at any given time, instead of as many as there are
 %% processes.
 -spec inet_count(AttributeName, Num) -> [inet_attrs()] when
       AttributeName :: 'recv_cnt' | 'recv_oct' | 'send_cnt' | 'send_oct'
                      | 'cnt' | 'oct',
       Num :: non_neg_integer().
 inet_count(Attr, Num) ->
     lists:sublist(lists:usort(
         fun({_,A,_},{_,B,_}) -> A > B end,
         recon_lib:inet_attrs(Attr)
     ), Num).

 %% @doc Fetches a given attribute from all inet ports (TCP, UDP, SCTP)
 %% and returns the biggest entries, over a sliding time window.
 %%
 %% Warning: this function depends on data gathered at two snapshots, and then
 %% building a dictionary with entries to differentiate them. This can take a
 %% heavy toll on memory when you have many dozens of thousands of ports open.
 %%
 %% The values to be used can be the number of octets (bytes) sent, received,
 %% or both (`send_oct', `recv_oct', `oct', respectively), or the number
 %% of packets sent, received, or both (`send_cnt', `recv_cnt', `cnt',
 %% respectively). Individual absolute values for each metric will be returned
 %% in the 3rd position of the resulting tuple.
 -spec inet_window(AttributeName, Num, Milliseconds) -> [inet_attrs()] when
       AttributeName :: 'recv_cnt' | 'recv_oct' | 'send_cnt' | 'send_oct'
                      | 'cnt' | 'oct',
       Num :: non_neg_integer(),
       Milliseconds :: pos_integer().
 inet_window(Attr, Num, Time) when is_atom(Attr) ->
     Sample = fun() -> recon_lib:inet_attrs(Attr) end,
     {First,Last} = recon_lib:sample(Time, Sample),
     lists:sublist(lists:usort(
         fun({_,A,_},{_,B,_}) -> A > B end,
         recon_lib:sliding_window(First, Last)
     ), Num).


 %%% RPC Utils %%%

 %% @doc Shorthand for `rpc([node()|nodes()], Fun)'.
 -spec rpc(fun(() -> term())) -> {[Success::_],[Fail::_]}.
 rpc(Fun) ->
     rpc([node()|nodes()], Fun).

 %% @doc Shorthand for `rpc(Nodes, Fun, infinity)'.
 -spec rpc(node()|[node(),...], fun(() -> term())) -> {[Success::_],[Fail::_]}.
 rpc(Nodes, Fun) ->
     rpc(Nodes, Fun, infinity).

 %% @doc Runs an arbitrary fun (of arity 0) over one or more nodes.
 -spec rpc(node()|[node(),...], fun(() -> term()), timeout()) -> {[Success::_],[Fail::_]}.
 rpc(Nodes=[_|_], Fun, Timeout) when is_function(Fun,0) ->
     rpc:multicall(Nodes, erlang, apply, [Fun,[]], Timeout);
 rpc(Node, Fun, Timeout) when is_atom(Node) ->
     rpc([Node], Fun, Timeout).

 %% @doc Shorthand for `named_rpc([node()|nodes()], Fun)'.
 -spec named_rpc(fun(() -> term())) -> {[Success::_],[Fail::_]}.
 named_rpc(Fun) ->
     named_rpc([node()|nodes()], Fun).

 %% @doc Shorthand for `named_rpc(Nodes, Fun, infinity)'.
 -spec named_rpc(node()|[node(),...], fun(() -> term())) -> {[Success::_],[Fail::_]}.
 named_rpc(Nodes, Fun) ->
     named_rpc(Nodes, Fun, infinity).

 %% @doc Runs an arbitrary fun (of arity 0) over one or more nodes, and returns the
 %% name of the node that computed a given result along with it, in a tuple.
 -spec named_rpc(node()|[node(),...], fun(() -> term()), timeout()) -> {[Success::_],[Fail::_]}.
 named_rpc(Nodes=[_|_], Fun, Timeout) when is_function(Fun,0) ->
     rpc:multicall(Nodes, erlang, apply, [fun() -> {node(),Fun()} end,[]], Timeout);
 named_rpc(Node, Fun, Timeout) when is_atom(Node) ->
     named_rpc([Node], Fun, Timeout).
	%%% @author Fred Hebert <mononcqc@ferd.ca>
	%%% [http://ferd.ca/]
	%%% @doc Recon, as a module, provides access to the high-level functionality
	%%% contained in the Recon application.
	%%%
	%%% It has functions in five main categories:
	%%%
	%%% <dl>
	%%% <dt>1. State information</dt>
	%%% <dd>Process information is everything that has to do with the
	%%% general state of the node. Functions such as {@link info/1}
	%%% and {@link info/3} are wrappers to provide more details than
	%%% `erlang:process_info/2', while providing it in a production-safe
	%%% manner.</dd>
	%%% <dd>{@link proc_count/2} and {@link proc_window/3} are to be used
	%%% when you require information about processes in a larger sense:
	%%% biggest consumers of given process information (say memory or
	%%% reductions), either absolutely or over a sliding time window,
	%%% respectively.</dd>
	%%% <dd>{@link bin_leak/1} is a function that can be used to try and
	%%% see if your Erlang node is leaking refc binaries. See the function
	%%% itself for more details.</dd>
	%%% <dd>Functions to access node statistics, in a manner somewhat similar
	%%% to what <a href="https://github.com/ferd/vmstats">vmstats</a>
	%%% provides as a library. There are 3 of them:
	%%% {@link node_stats_print/2}, which displays them,
	%%% {@link node_stats_list/2}, which returns them in a list, and
	%%% {@link node_stats/4}, which provides a fold-like interface
	%%% for stats gathering.</dd>
	%%%
	%%% <dt>2. OTP tools</dt>
	%%% <dd>This category provides tools to interact with pieces of OTP
	%%% more easily. At this point, the only function included is
	%%% {@link get_state/1}, which works as a wrapper around
	%%% `sys:get_state/1' in R16B01, and provides the required
	%%% functionality for older versions of Erlang.</dd>
	%%%
	%%% <dt>3. Code Handling</dt>
	%%% <dd>Specific functions are in `recon' for the sole purpose
	%%% of interacting with source and compiled code.
	%%% {@link remote_load/1} and {@link remote_load/2} will allow
	%%% to take a local module, and load it remotely (in a diskless
	%%% manner) on another Erlang node you're connected to.</dd>
	%%% <dd>{@link source/1} allows to print the source of a loaded module,
	%%% in case it's not available in the currently running node.</dd>
	%%%
	%%% <dt>4. Ports and Sockets</dt>
	%%% <dd>To make it simpler to debug some network-related issues,
	%%% recon contains functions to deal with Erlang ports (raw, file
	%%% handles, or inet). Functions {@link tcp/0}, {@link udp/0},
	%%% {@link sctp/0}, {@link files/0}, and {@link port_types/0} will
	%%% list all the Erlang ports of a given type. The latter function
	%%% prints counts of all individual types.</dd>
	%%% <dd>Finally, the functions {@link inet_count/2} and {@link inet_window/3}
	%%% provide the absolute or sliding window functionality of
	%%% {@link proc_count/2} and {@link proc_count/3} to inet ports
	%%% and connections currently on the node.</dd>
	%%%
	%%% <dt>5. RPC</dt>
	%%% <dd>These are wrappers to make RPC work simpler with clusters of
	%%% Erlang nodes. Default RPC mechanisms (from the `rpc' module)
	%%% make it somewhat painful to call shell-defined funs over node
	%%% boundaries. The functions {@link rpc/1}, {@link rpc/2}, and
	%%% {@link rpc/3} will do it with a simpler interface.</dd>
	%%% <dd>Additionally, when you're running diagnostic code on remote
	%%% nodes and want to know which node evaluated what result, using
	%%% {@link named_rpc/1}, {@link named_rpc/2}, and {@link named_rpc/3}
	%%% will wrap the results in a tuple that tells you which node it's
	%%% coming from, making it easier to identify bad nodes.</dd>
	%%% </dl>
	%%% @end
	-module(recon).
	-export([info/1,info/3,
	proc_count/2, proc_window/3,
	bin_leak/1,
	node_stats_print/2, node_stats_list/2, node_stats/4]).
	-export([get_state/1]).
	-export([remote_load/1, remote_load/2,
	source/1]).
	-export([tcp/0, udp/0, sctp/0, files/0, port_types/0,
	inet_count/2, inet_window/3]).
	-export([rpc/1, rpc/2, rpc/3,
	named_rpc/1, named_rpc/2, named_rpc/3]).

	%%%%%%%%%%%%%
	%%% TYPES %%%
	%%%%%%%%%%%%%
	-type proc_attrs() :: {pid(),
	Attr::_,
	[Name::atom()
	\|{current_function, mfa()}
	\|{initial_call, mfa()}, ...]}.
	-type inet_attrs() :: {port(),
	Attr::_,
	[{atom(), term()}]}.

	-type pid_term() :: pid() \| atom() \| string()
	\| {global, term()} \| {via, module(), term()}
	\| {non_neg_integer(), non_neg_integer(), non_neg_integer()}.

	-export_type([proc_attrs/0, inet_attrs/0, pid_term/0]).
	%%%%%%%%%%%%%%%%%%
	%%% PUBLIC API %%%
	%%%%%%%%%%%%%%%%%%

	%%% Process Info %%%

	%% @doc Equivalent to `info(<A.B.C>)' where `A', `B', and `C' are integers part
	%% of a pid
	-spec info(N,N,N) -> [{atom(), [{atom(),term()}]},...] when
	N :: non_neg_integer().
	info(A,B,C) -> info(recon_lib:triple_to_pid(A,B,C)).

	%% @doc Allows to be similar to `erlang:process_info/1', but excludes fields
	%% such as the mailbox, which have a tendency to grow and be unsafe when called
	%% in production systems. Also includes a few more fields than what is usually
	%% given (`monitors', `monitored_by', etc.), and separates the fields in a more
	%% readable format based on the type of information contained.
	%%
	%% Moreover, it will fetch and read information on local processes that were
	%% registered locally (an atom), globally (`{global, Name}'), or through
	%% another registry supported in the `{via, Module, Name}' syntax (must have a
	%% `Module:whereis_name/1' function). Pids can also be passed in as a string
	%% (`"<0.39.0>"') or a triple (`{0,39,0}') and will be converted to be used.
	-spec info(pid_term()) -> [{Type, [{Key, Value}]},...] when
	Type :: meta \| signals \| location \| memory \| work,
	Key :: registered_name \| dictionary \| group_leader \| status
	\| links \| monitors \| monitored_by \| trap_exit \| initial_call
	\| current_stacktrace \| memory \| message_queue_len \| heap_size
	\| total_heap_size \| garbage_collection \| reductions,
	Value :: term().
	info(PidTerm) ->
	Pid = recon_lib:term_to_pid(PidTerm),
	Info = fun(List) -> erlang:process_info(Pid, List) end,
	[{meta, Info([registered_name, dictionary, group_leader, status])},
	{signals, Info([links, monitors, monitored_by, trap_exit])},
	{location, Info([initial_call, current_stacktrace])},
	{memory, Info([memory, message_queue_len, heap_size, total_heap_size,
	garbage_collection])},
	{work, Info([reductions])}].

	%% @doc Fetches a given attribute from all processes and returns
	%% the biggest `Num' consumers.
	%% @todo Implement this function so it only stores `Num' entries in
	%% memory at any given time, instead of as many as there are
	%% processes.
	-spec proc_count(AttributeName, Num) -> [proc_attrs()] when
	AttributeName :: atom(),
	Num :: non_neg_integer().
	proc_count(AttrName, Num) ->
	lists:sublist(lists:usort(
	fun({_,A,_},{_,B,_}) -> A > B end,
	recon_lib:proc_attrs(AttrName)
	), Num).

	%% @doc Fetches a given attribute from all processes and returns
	%% the biggest entries, over a sliding time window.
	%%
	%% This function is particularly useful when processes on the node
	%% are mostly short-lived, usually too short to inspect through other
	%% tools, in order to figure out what kind of processes are eating
	%% through a lot resources on a given node.
	%%
	%% It is important to see this function as a snapshot over a sliding
	%% window. A program's timeline during sampling might look like this:
	%%
	%% `--w---- [Sample1] ---x-------------y----- [Sample2] ---z--->'
	%%
	%% Some processes will live between `w' and die at `x', some between `y' and
	%% `z', and some between `x' and `y'. These samples will not be too significant
	%% as they're incomplete. If the majority of your processes run between a time
	%% interval `x'...`y' (in absolute terms), you should make sure that your
	%% sampling time is smaller than this so that for many processes, their
	%% lifetime spans the equivalent of `w' and `z'. Not doing this can skew the
	%% results: long-lived processes, that have 10 times the time to accumulate
	%% data (say reductions) will look like bottlenecks when they're not one.
	%%
	%% Warning: this function depends on data gathered at two snapshots, and then
	%% building a dictionary with entries to differentiate them. This can take a
	%% heavy toll on memory when you have many dozens of thousands of processes.
	-spec proc_window(AttributeName, Num, Milliseconds) -> [proc_attrs()] when
	AttributeName :: atom(),
	Num :: non_neg_integer(),
	Milliseconds :: pos_integer().
	proc_window(AttrName, Num, Time) ->
	Sample = fun() -> recon_lib:proc_attrs(AttrName) end,
	{First,Last} = recon_lib:sample(Time, Sample),
	lists:sublist(lists:usort(
	fun({_,A,_},{_,B,_}) -> A > B end,
	recon_lib:sliding_window(First, Last)
	), Num).

	%% @doc Refc binaries can be leaking when barely-busy processes route them
	%% around and do little else, or when extremely busy processes reach a stable
	%% amount of memory allocated and do the vast majority of their work with refc
	%% binaries. When this happens, it may take a very long while before references
	%% get deallocated and refc binaries get to be garbage collected, leading to
	%% Out Of Memory crashes.
	%% This function fetches the number of refc binary references in each process
	%% of the node, garbage collects them, and compares the resulting number of
	%% references in each of them. The function then returns the `N' processes
	%% that freed the biggest amount of binaries, potentially highlighting leaks.
	%%
	%% See <a href="http://www.erlang.org/doc/efficiency_guide/binaryhandling.html#id65722">The efficiency guide</a>
	%% for more details on refc binaries
	-spec bin_leak(pos_integer()) -> [proc_attrs()].
	bin_leak(N) ->
	lists:sublist(
	lists:usort(
	fun({K1,V1,_},{K2,V2,_}) -> {V1,K1} =< {V2,K2} end,
	[try
	{ok, {_,Pre,Id}} = recon_lib:proc_attrs(binary, Pid),
	erlang:garbage_collect(Pid),
	{ok, {_,Post,_}} = recon_lib:proc_attrs(binary, Pid),
	{Pid, length(Post)-length(Pre), Id}
	catch
	_:_ -> {Pid, 0}
	end \|\| Pid <- processes()]),
	N).

	%% @doc Shorthand for `node_stats(N, Interval, fun(X,_) -> io:format("~p~n",[X]) end, nostate)'.
	-spec node_stats_print(Repeat, Interval) -> term() when
	Repeat :: non_neg_integer(),
	Interval :: pos_integer().
	node_stats_print(N, Interval) ->
	node_stats(N, Interval, fun(X, _) -> io:format("~p~n",[X]) end, ok).

	%% @doc Shorthand for `node_stats(N, Interval, fun(X,Acc) -> [X\|Acc] end, [])'
	%% with the results reversed to be in the right temporal order.
	-spec node_stats_list(Repeat, Interval) -> [Stats] when
	Repeat :: non_neg_integer(),
	Interval :: pos_integer(),
	Stats :: {[Absolutes::{atom(),term()}],
	[Increments::{atom(),term()}]}.
	node_stats_list(N, Interval) ->
	lists:reverse(node_stats(N, Interval, fun(X,Acc) -> [X\|Acc] end, [])).

	%% @doc Gathers statistics `N' time, waiting `Interval' milliseconds between
	%% each run, and accumulates results using a folding function `FoldFun'.
	%% The function will gather statistics in two forms: Absolutes and Increments.
	%%
	%% Absolutes are values that keep changing with time, and are useful to know
	%% about as a datapoint: process count, size of the run queue, error_logger
	%% queue length, and the memory of the node (total, processes, atoms, binaries,
	%% and ets tables).
	%%
	%% Increments are values that are mostly useful when compared to a previous
	%% one to have an idea what they're doing, because otherwise they'd never
	%% stop increasing: bytes in and out of the node, number of garbage colelctor
	%% runs, words of memory that were garbage collected, and the global reductions
	%% count for the node.
	-spec node_stats(N, Interval, FoldFun, Acc) -> Acc when
	N :: non_neg_integer(),
	Interval :: pos_integer(),
	FoldFun :: fun((Stats, Acc) -> Acc),
	Acc :: term(),
	Stats :: {[Absolutes::{atom(),term()}],
	[Increments::{atom(),term()}]}.
	node_stats(N, Interval, FoldFun, Init) ->
	%% Stats is an ugly fun, but it does its thing.
	Stats = fun({{OldIn,OldOut},{OldGCs,OldWords,_}}) ->
	%% Absolutes
	ProcC = erlang:system_info(process_count),
	RunQ = erlang:statistics(run_queue),
	{_,LogQ} = process_info(whereis(error_logger), message_queue_len),
	%% Mem (Absolutes)
	Mem = erlang:memory(),
	Tot = proplists:get_value(total, Mem),
	ProcM = proplists:get_value(processes_used,Mem),
	Atom = proplists:get_value(atom_used,Mem),
	Bin = proplists:get_value(binary, Mem),
	Ets = proplists:get_value(ets, Mem),
	%% Incremental
	{{input,In},{output,Out}} = erlang:statistics(io),
	GC={GCs,Words,_} = erlang:statistics(garbage_collection),
	BytesIn = In-OldIn,
	BytesOut = Out-OldOut,
	GCCount = GCs-OldGCs,
	GCWords = Words-OldWords,
	{_, Reds} = erlang:statistics(reductions),
	%% Stats Results
	{{[{process_count,ProcC}, {run_queue,RunQ},
	{error_logger_queue_len,LogQ}, {memory_total,Tot},
	{memory_procs,ProcM}, {memory_atoms,Atom},
	{memory_bin,Bin}, {memory_ets,Ets}],
	[{bytes_in,BytesIn}, {bytes_out,BytesOut},
	{gc_count,GCCount}, {gc_words_reclaimed,GCWords},
	{reductions,Reds}]},
	%% New State
	{{In,Out}, GC}}
	end,
	{{input,In},{output,Out}} = erlang:statistics(io),
	Gc = erlang:statistics(garbage_collection),
	recon_lib:time_fold(N, Interval, Stats, {{In,Out}, Gc}, FoldFun, Init).

	%%% OTP & Manipulations %%%

	%% @doc Fetch the internal state of an OTP process.
	%% Calls `sys:get_state/1' directly in R16B01+, and fetches
	%% it dynamically on older versions of OTP.
	-spec get_state(pid_term()) -> term().
	get_state(PidTerm) ->
	Proc = recon_lib:term_to_pid(PidTerm),
	try
	sys:get_state(Proc)
	catch
	error:undef ->
	case sys:get_status(Proc) of
	{status,_Pid,{module,gen_server},Data} ->
	{data, Props} = lists:last(lists:nth(5, Data)),
	proplists:get_value("State", Props);
	{status,_Pod,{module,gen_fsm},Data} ->
	{data, Props} = lists:last(lists:nth(5, Data)),
	proplists:get_value("StateData", Props)
	end
	end.

	%%% Code & Stuff %%%

	%% @doc Equivalent to `remote_load(nodes(), Mod)'.
	-spec remote_load(module()) -> term().
	remote_load(Mod) -> remote_load(nodes(), Mod).

	%% @doc Loads one or more modules remotely, in a diskless manner. Allows to
	%% share code loaded locally with a remote node that doesn't have it
	-spec remote_load(Nodes, module()) -> term() when
	Nodes :: [node(),...] \| node().
	remote_load(Nodes=[_\|_], Mod) when is_atom(Mod) ->
	{Mod, Bin, File} = code:get_object_code(Mod),
	rpc:multicall(Nodes, code, load_binary, [Mod, File, Bin]);
	remote_load(Nodes=[_\|_], Modules) when is_list(Modules) ->
	[remote_load(Nodes, Mod) \|\| Mod <- Modules];
	remote_load(Node, Mod) ->
	remote_load([Node], Mod).

	%% @doc Obtain the source code of a module compiled with `debug_info'.
	%% The returned list sadly does not allow to format the types and typed
	%% records the way they look in the original module, but instead goes to
	%% an intermediary form used in the AST. They will still be placed
	%% in the right module attributes, however.
	%% @todo Figure out a way to pretty-print typespecs and records.
	-spec source(module()) -> iolist().
	source(Module) ->
	Path = code:which(Module),
	{ok,{_,[{abstract_code,{_,AC}}]}} = beam_lib:chunks(Path, [abstract_code]),
	erl_prettypr:format(erl_syntax:form_list(AC)).

	%%% Ports Info %%%

	%% @doc returns a list of all TCP ports (the data type) open on the node.
	-spec tcp() -> [port()].
	tcp() -> recon_lib:port_list(name, "tcp_inet").

	%% @doc returns a list of all UDP ports (the data type) open on the node.
	-spec udp() -> [port()].
	udp() -> recon_lib:port_list(name, "udp_inet").

	%% @doc returns a list of all SCTP ports (the data type) open on the node.
	-spec sctp() -> [port()].
	sctp() -> recon_lib:port_list(name, "sctp_inet").

	%% @doc returns a list of all file handles open on the node.
	-spec files() -> [port()].
	files() -> recon_lib:port_list(name, "efile").

	%% @doc Shows a list of all different ports on the node with their respective
	%% types.
	-spec port_types() -> [{pos_integer(),Type::string()}].
	port_types() ->
	lists:usort(
	%% sorts by biggest count, smallest type
	fun({KA,VA}, {KB,VB}) -> {VA,KB} > {VB,KA} end,
	recon_lib:count([Name \|\| {_, Name} <- recon_lib:port_list(name)])
	).

	%% @doc Fetches a given attribute from all inet ports (TCP, UDP, SCTP)
	%% and returns the biggest `Num' consumers.
	%%
	%% The values to be used can be the number of octets (bytes) sent, received,
	%% or both (`send_oct', `recv_oct', `oct', respectively), or the number
	%% of packets sent, received, or both (`send_cnt', `recv_cnt', `cnt',
	%% respectively). Individual absolute values for each metric will be returned
	%% in the 3rd position of the resulting tuple.
	%%
	%% @todo Implement this function so it only stores `Num' entries in
	%% memory at any given time, instead of as many as there are
	%% processes.
	-spec inet_count(AttributeName, Num) -> [inet_attrs()] when
	AttributeName :: 'recv_cnt' \| 'recv_oct' \| 'send_cnt' \| 'send_oct'
	\| 'cnt' \| 'oct',
	Num :: non_neg_integer().
	inet_count(Attr, Num) ->
	lists:sublist(lists:usort(
	fun({_,A,_},{_,B,_}) -> A > B end,
	recon_lib:inet_attrs(Attr)
	), Num).

	%% @doc Fetches a given attribute from all inet ports (TCP, UDP, SCTP)
	%% and returns the biggest entries, over a sliding time window.
	%%
	%% Warning: this function depends on data gathered at two snapshots, and then
	%% building a dictionary with entries to differentiate them. This can take a
	%% heavy toll on memory when you have many dozens of thousands of ports open.
	%%
	%% The values to be used can be the number of octets (bytes) sent, received,
	%% or both (`send_oct', `recv_oct', `oct', respectively), or the number
	%% of packets sent, received, or both (`send_cnt', `recv_cnt', `cnt',
	%% respectively). Individual absolute values for each metric will be returned
	%% in the 3rd position of the resulting tuple.
	-spec inet_window(AttributeName, Num, Milliseconds) -> [inet_attrs()] when
	AttributeName :: 'recv_cnt' \| 'recv_oct' \| 'send_cnt' \| 'send_oct'
	\| 'cnt' \| 'oct',
	Num :: non_neg_integer(),
	Milliseconds :: pos_integer().
	inet_window(Attr, Num, Time) when is_atom(Attr) ->
	Sample = fun() -> recon_lib:inet_attrs(Attr) end,
	{First,Last} = recon_lib:sample(Time, Sample),
	lists:sublist(lists:usort(
	fun({_,A,_},{_,B,_}) -> A > B end,
	recon_lib:sliding_window(First, Last)
	), Num).


	%%% RPC Utils %%%

	%% @doc Shorthand for `rpc([node()\|nodes()], Fun)'.
	-spec rpc(fun(() -> term())) -> {[Success::_],[Fail::_]}.
	rpc(Fun) ->
	rpc([node()\|nodes()], Fun).

	%% @doc Shorthand for `rpc(Nodes, Fun, infinity)'.
	-spec rpc(node()\|[node(),...], fun(() -> term())) -> {[Success::_],[Fail::_]}.
	rpc(Nodes, Fun) ->
	rpc(Nodes, Fun, infinity).

	%% @doc Runs an arbitrary fun (of arity 0) over one or more nodes.
	-spec rpc(node()\|[node(),...], fun(() -> term()), timeout()) -> {[Success::_],[Fail::_]}.
	rpc(Nodes=[_\|_], Fun, Timeout) when is_function(Fun,0) ->
	rpc:multicall(Nodes, erlang, apply, [Fun,[]], Timeout);
	rpc(Node, Fun, Timeout) when is_atom(Node) ->
	rpc([Node], Fun, Timeout).

	%% @doc Shorthand for `named_rpc([node()\|nodes()], Fun)'.
	-spec named_rpc(fun(() -> term())) -> {[Success::_],[Fail::_]}.
	named_rpc(Fun) ->
	named_rpc([node()\|nodes()], Fun).

	%% @doc Shorthand for `named_rpc(Nodes, Fun, infinity)'.
	-spec named_rpc(node()\|[node(),...], fun(() -> term())) -> {[Success::_],[Fail::_]}.
	named_rpc(Nodes, Fun) ->
	named_rpc(Nodes, Fun, infinity).

	%% @doc Runs an arbitrary fun (of arity 0) over one or more nodes, and returns the
	%% name of the node that computed a given result along with it, in a tuple.
	-spec named_rpc(node()\|[node(),...], fun(() -> term()), timeout()) -> {[Success::_],[Fail::_]}.
	named_rpc(Nodes=[_\|_], Fun, Timeout) when is_function(Fun,0) ->
	rpc:multicall(Nodes, erlang, apply, [fun() -> {node(),Fun()} end,[]], Timeout);
	named_rpc(Node, Fun, Timeout) when is_atom(Node) ->
	named_rpc([Node], Fun, Timeout).