Fix recon_lib failing on long ports or proc lists
Nodes with lots of processes or ports can see their proc_count,
proc_window, inet_count, or inet_window functions fail due to a race
condition where:
1. The list of ports or processes is created;
2. The ports or processes are iteratively polled for their properties;
3. Some port or process closes;
4. A badmatch error occurs and the entire function fails.
The error specifically happens in the functions of arity 2 in recon_lib
that made the fetch to each port or process.
The interface of these functions are getting changed to:
- account for the error
- return {ok, State} or {error, Reason} depending on the case
Moreover, the functions of arity 1 in recon_lib that make use of them
are changing so that their list comprehension filters bad data --
which we do not care about anyway.
A similar change is included to respect the new API in recon's refc
binary leak function.
diff --git a/README.md b/README.md
index 97053cd..77bc4cf 100644
--- a/README.md
+++ b/README.md
@@ -12,5 +12,10 @@
Changelog
---------
+- 0.4.0: fixed bug where nodes with lots of processes or ports could see their
+ count or window functions fail because a process or socket closed between the
+ time the function started and before it finished. This ends up changing the
+ API in `recon_lib` for the window and count functions that take a specific
+ pid as an argument.
- 0.3.1: factored out some logic from `recon:info/1` into `recon_lib:term_to_pid`
and allowed arbitrary terms to be used for pids in `recon:get_state/1`.
diff --git a/src/recon.app.src b/src/recon.app.src
index 107fce2..5ed65f7 100644
--- a/src/recon.app.src
+++ b/src/recon.app.src
@@ -1,6 +1,6 @@
{application, recon,
[{description, "Diagnostic tools for production use"},
- {vsn, "0.3.1"},
+ {vsn, "0.4.0"},
{modules, [recon]},
{registered, []},
{applications, [kernel, stdlib]}]}.
diff --git a/src/recon.erl b/src/recon.erl
index 5cf4049..82280d5 100644
--- a/src/recon.erl
+++ b/src/recon.erl
@@ -209,9 +209,9 @@
lists:usort(
fun({K1,V1,_},{K2,V2,_}) -> {V1,K1} =< {V2,K2} end,
[try
- {_,Pre,Id} = recon_lib:proc_attrs(binary, Pid),
+ {ok, {_,Pre,Id}} = recon_lib:proc_attrs(binary, Pid),
erlang:garbage_collect(Pid),
- {_,Post,_} = recon_lib:proc_attrs(binary, Pid),
+ {ok, {_,Post,_}} = recon_lib:proc_attrs(binary, Pid),
{Pid, length(Post)-length(Pre), Id}
catch
_:_ -> {Pid, 0}
diff --git a/src/recon_lib.erl b/src/recon_lib.erl
index 56378af..7084c58 100644
--- a/src/recon_lib.erl
+++ b/src/recon_lib.erl
@@ -67,16 +67,19 @@
-spec proc_attrs(term()) -> [recon:proc_attrs()].
proc_attrs(AttrName) ->
[Attrs || Pid <- processes() -- [self()],
- Attrs <- [proc_attrs(AttrName, Pid)]].
+ {ok, Attrs} <- [proc_attrs(AttrName, Pid)]].
%% @doc Returns the attributes of a given process. This form of attributes
%% is standard for most comparison functions for processes in recon.
--spec proc_attrs(term(), pid()) -> recon:proc_attrs().
+-spec proc_attrs(term(), pid()) -> {ok, recon:proc_attrs()} | {error, term()}.
proc_attrs(AttrName, Pid) ->
- [{_, Attr}, {registered_name,Name}, Init, Cur] =
- process_info(Pid, [AttrName, registered_name,
- current_function, initial_call]),
- {Pid, Attr, [Name || is_atom(Name)]++[Init, Cur]}.
+ case process_info(Pid, [AttrName, registered_name,
+ current_function, initial_call]) of
+ [{_, Attr}, {registered_name,Name}, Init, Cur] ->
+ {ok, {Pid, Attr, [Name || is_atom(Name)]++[Init, Cur]}};
+ undefined ->
+ {error, undefined}
+ end.
%% @doc Returns the attributes ({@link recon:inet_attrs()}) of
%% all inet ports (UDP, SCTP, TCP) of the node.
@@ -88,12 +91,13 @@
Name =:= "udp_inet" orelse
Name =:= "sctp_inet"],
[Attrs || Port <- Ports,
- Attrs <- [inet_attrs(AttrName, Port)]].
+ {ok, Attrs} <- [inet_attrs(AttrName, Port)]].
%% @doc Returns the attributes required for a given inet port (UDP,
%% SCTP, TCP). This form of attributes is standard for most comparison
%% functions for processes in recon.
--spec inet_attrs(AttributeName, port()) -> recon:inet_attrs() when
+-spec inet_attrs(AttributeName, port()) -> {ok,recon:inet_attrs()}
+ | {error,term()} when
AttributeName :: 'recv_cnt' | 'recv_oct' | 'send_cnt' | 'send_oct'
| 'cnt' | 'oct'.
inet_attrs(Attr, Port) ->
@@ -102,9 +106,13 @@
oct -> [recv_oct, send_oct];
_ -> [Attr]
end,
- {ok, Props} = inet:getstat(Port, Attrs),
- ValSum = lists:foldl(fun({_,X},Y) -> X+Y end, 0, Props),
- {Port,ValSum,Props}.
+ case inet:getstat(Port, Attrs) of
+ {ok, Props} ->
+ ValSum = lists:foldl(fun({_,X},Y) -> X+Y end, 0, Props),
+ {ok, {Port,ValSum,Props}};
+ {error, Reason} ->
+ {error, Reason}
+ end.
%% @doc Equivalent of `pid(X,Y,Z)' in the Erlang shell.