blob: d882fa9de3ad53fe9a8e243458fc571bf2bb1db4 [file] [log] [blame]
%% taken from http://crunchyd.com/scutil/
%% All code here is MIT Licensed
%% http://scutil.com/license.html
-module(folsom_statistics_scutil).
-export([
arithmetic_mean/1,
geometric_mean/1,
harmonic_mean/1,
kendall_correlation/2,
spearman_correlation/2,
pearson_correlation/2
]).
-compile([native]).
% http://en.wikipedia.org/wiki/Arithmetic_mean
arithmetic_mean([]) ->
0.0;
arithmetic_mean(List) when is_list(List) ->
lists:sum(List) / length(List).
% http://en.wikipedia.org/wiki/Geometric_mean
geometric_mean([]) ->
0.0;
geometric_mean(List) when is_list(List) ->
math:exp(arithmetic_mean([ math:log(X) || X<-List ])).
% http://en.wikipedia.org/wiki/Harmonic_mean
harmonic_mean([]) ->
0.0;
harmonic_mean(List) when is_list(List) ->
length(List) / lists:sum([ 1/X || X<-List ]).
% seems to match the value returned by the 'cor' (method="kendal") R function
% http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient
kendall_correlation(List1, List2) when is_list(List1), is_list(List2) ->
{RA,_} = lists:unzip(tied_ordered_ranking(List1)),
{RB,_} = lists:unzip(tied_ordered_ranking(List2)),
Ordering = lists:keysort(1, lists:zip(RA,RB)),
{_,OrdB} = lists:unzip(Ordering),
N = length(List1),
P = lists:sum(kendall_right_of(OrdB, [])),
-(( (4*P) / (N * (N - 1))) - 1).
% seems to match the value returned by the 'cor' (method="spearman") R function
% http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
spearman_correlation(List1, List2) when is_list(List1), is_list(List2) ->
{TR1,_} = lists:unzip(simple_ranking(List1)),
{TR2,_} = lists:unzip(simple_ranking(List2)),
Numerator = 6 * lists:sum([ square(D1-D2) || {D1,D2} <- lists:zip(TR1,TR2) ]),
Denominator = math:pow(length(List1),3)-length(List1),
1-(Numerator/Denominator).
% seems to match the value returned by the 'cor' (method="pearson") R function
% http://en.wikipedia.org/wiki/Pearson_correlation_coefficient
pearson_correlation(List1, List2) when is_list(List1), is_list(List2) ->
SumXY = lists:sum([A*B || {A,B} <- lists:zip(List1,List2) ]), % the sum of the products of each matched pair
SumX = lists:sum(List1),
SumY = lists:sum(List2),
SumXX = lists:sum([L*L || L<-List1]), % the sums of the squared items
SumYY = lists:sum([L*L || L<-List2]),
N = length(List1),
case math:sqrt( ( (N*SumXX)-(SumX*SumX) ) * ( (N*SumYY)-(SumY*SumY) ) ) of
0 ->
0.0; % some nasty value sets otherwise cause divide by zero
0.0 ->
0.0; % eg [ [1,1,1,1,1], [1,1,2,1,2] ]
Denom ->
Numer = (N*SumXY) - (SumX * SumY),
(Numer/Denom)
end.
%%%===================================================================
%%% Internal functions
%%%==================================================================
simple_ranking(List) when is_list(List) ->
lists:zip(lists:seq(1,length(List)),lists:reverse(lists:sort(List))).
tied_ranking(List) ->
tied_rank_worker(simple_ranking(List), [], no_prev_value).
tied_ordered_ranking(List) when is_list(List) ->
tied_ordered_ranking(List, tied_ranking(List), []).
tied_ordered_ranking([], [], Work) ->
lists:reverse(Work);
tied_ordered_ranking([Front|Rem], Ranks, Work) ->
{value,Item} = lists:keysearch(Front,2,Ranks),
{IRank,Front} = Item,
tied_ordered_ranking(Rem, Ranks--[Item], [{IRank,Front}]++Work).
square(Value) ->
math:pow(Value, 2).
kendall_right_of([], Work) ->
lists:reverse(Work);
kendall_right_of([F|R], Work) ->
kendall_right_of(R, [kendall_right_of_item(F,R)]++Work).
kendall_right_of_item(B, Rem) ->
length([R || R <- Rem, R < B]).
tied_add_prev(Work, {FoundAt, NewValue}) ->
lists:duplicate( length(FoundAt), {lists:sum(FoundAt)/length(FoundAt), NewValue} ) ++ Work.
tied_rank_worker([], Work, PrevValue) ->
lists:reverse(tied_add_prev(Work, PrevValue));
tied_rank_worker([Item|Remainder], Work, PrevValue) ->
case PrevValue of
no_prev_value ->
{BaseRank,BaseVal} = Item,
tied_rank_worker(Remainder, Work, {[BaseRank],BaseVal});
{FoundAt,OldVal} ->
case Item of
{Id,OldVal} ->
tied_rank_worker(Remainder, Work, {[Id]++FoundAt,OldVal});
{Id,NewVal} ->
tied_rank_worker(Remainder, tied_add_prev(Work, PrevValue), {[Id],NewVal})
end
end.