Merge pull request #15 from rodo/master

Add unit test on uncovered function, move test data from src/ to test/
diff --git a/src/bear.erl b/src/bear.erl
index 3feb0bc..7d9eed9 100644
--- a/src/bear.erl
+++ b/src/bear.erl
@@ -524,15 +524,3 @@
     erlang:max(1, V);
 perc(P, Len) when is_float(P), 0 =< P, P =< 1 ->
     erlang:max(1, round(P * Len)).
-
-
-test_values() ->
-    [1,1,1,1,1,1,1,
-     2,2,2,2,2,2,2,
-     3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-     4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-     5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-     6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-     9,9,9,9,9,9,9].
diff --git a/test/bear_test.erl b/test/bear_test.erl
index 5b2b4e1..ce35ea3 100644
--- a/test/bear_test.erl
+++ b/test/bear_test.erl
@@ -30,7 +30,7 @@
 
 -include_lib("eunit/include/eunit.hrl").
 
--define(PRECISION, 1.0e15).
+-define(PRECISION_DIGIT, 6).
 
 get_statistics_1_empty_test() ->
     %% get_statistics/1
@@ -54,28 +54,29 @@
 get_statistics_1_regular_test() ->
     %% get_statistics/1
     %% Non empty set of values
-    Percentile = [{50, 5},{75, 8},{90, 9},{95, 10},{99, 10},{999, 10}],
-    Stats = bear:get_statistics(lists:seq(1,10)),
+    Percentile = [{50, -10},{75, 23},{90, 43},{95, 46},{99, 50},{999, 50}],
+    Stats = bear:get_statistics(sample1()),
 
     {geometric_mean, Geometric} = lists:keyfind(geometric_mean, 1, Stats),
     {harmonic_mean, Harmonic} = lists:keyfind(harmonic_mean, 1, Stats),
     {variance, Variance} = lists:keyfind(variance, 1, Stats),
     {standard_deviation, StandardDeviation} = lists:keyfind(standard_deviation, 1, Stats),
     {kurtosis, Kurtosis} = lists:keyfind(kurtosis, 1, Stats),
+    {skewness, Skewness} = lists:keyfind(skewness, 1, Stats),
 
-    ?assertEqual({min, 1}, lists:keyfind(min, 1, Stats)),
-    ?assertEqual({max, 10}, lists:keyfind(max, 1, Stats)),
-    ?assertEqual({arithmetic_mean, 5.5}, lists:keyfind(arithmetic_mean, 1, Stats)),
-    ?assertEqual(4528728688116766, erlang:trunc(?PRECISION * Geometric)),
-    ?assertEqual(3414171521474055, erlang:trunc(?PRECISION * Harmonic)),
-    ?assertEqual({median, 5}, lists:keyfind(median, 1, Stats)),
-    ?assertEqual(9166666666666666, erlang:trunc(?PRECISION * Variance)),
-    ?assertEqual(3027650354097491, erlang:trunc(?PRECISION * StandardDeviation)),
-    ?assertEqual({skewness, 0.0}, lists:keyfind(skewness, 1, Stats)),
-    ?assertEqual(-1561636363636363, erlang:trunc(?PRECISION * Kurtosis)),
+    ?assertEqual({min, -49}, lists:keyfind(min, 1, Stats)),
+    ?assertEqual({max, 50}, lists:keyfind(max, 1, Stats)),
+    ?assertEqual({arithmetic_mean, -1.66}, lists:keyfind(arithmetic_mean, 1, Stats)),
+    ?assertEqual(true, approx(4.08326, Geometric)),
+    ?assertEqual(true, approx(54.255629738, Harmonic)),
+    ?assertEqual({median, -10}, lists:keyfind(median, 1, Stats)),
+    ?assertEqual(true, approx(921.0453061, Variance)),
+    ?assertEqual(true, approx(30.348728, StandardDeviation)),
+    ?assertEqual(true, approx(0.148722, Skewness)),
+    ?assertEqual(true, approx(-1.2651687, Kurtosis)),
     ?assertEqual({percentile, Percentile}, lists:keyfind(percentile, 1, Stats)),
-    ?assertEqual({histogram, [{6,6},{11,4},{16,0}]}, lists:keyfind(histogram, 1, Stats)),
-    ?assertEqual({n, 10}, lists:keyfind(n, 1, Stats)).
+    ?assertEqual({histogram, [{-20,16},{11,16},{41,12},{71,6}]}, lists:keyfind(histogram, 1, Stats)),
+    ?assertEqual({n, 50}, lists:keyfind(n, 1, Stats)).
 
 get_statistics_2_1_test() ->
     %% get_statistics/2
@@ -152,26 +153,33 @@
     C = bear:update_bin(4, [4], Dict),
     ?assertEqual(1, dict:fetch(4, C)).
 
-get_covariance_test() ->
+get_covariance_exceptions_test() ->
     %% Array 1 is too short
     ?assertEqual(0.0, bear:get_covariance([], [2,1,2,3,4,5,6])),
     %% Array 2 is too short
     ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [])),
     %% diffenrent arry length
-    ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [1,2,3,4,5,6,7])),
+    ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [1,2,3,4,5,6,7])).
+
+get_covariance_regular_test() ->
     %% Usual case
-    ?assertEqual(-30944444444444444, erlang:trunc(?PRECISION * bear:get_covariance([11,2,3,41,5,9], [34,2,23,4,5,6]))).
+    %% Result is not the same as R compute, R use an unbiased estimate
+    %% http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Covariance
+    ?assertEqual(true, approx(170.813599, bear:get_covariance(sample1(),sample2()))).
 
 ranks_of_test() ->
     ?assertEqual([4.0,3.0,1.0,2.0], bear:ranks_of([3,4,15,6])).
 
-get_pearson_correlation_test() ->
+get_pearson_correlation_exceptions_test() ->
     ?assertEqual(0.0, bear:get_pearson_correlation([], 42)),
     ?assertEqual(0.0, bear:get_pearson_correlation(42, [])),
     ?assertEqual(0.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,11))),
     ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,10))),
-    ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(0,10), lists:seq(5,15))),
-    ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(40,60,2), lists:seq(10,20))).
+    ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(0,10), lists:seq(5,15))).
+
+get_pearson_correlation_regular_test() ->
+    %% Target is calculate by R
+    ?assertEqual(true, approx(0.2068785, bear:get_pearson_correlation(sample1(), sample2()))).
 
 get_pearson_correlation_nullresult_test() ->
     %% The two series do not correlate
@@ -193,25 +201,33 @@
     ?assertEqual(3, bear:get_bin_count(9, 15, 3)),
     ?assertEqual(4, bear:get_bin_count(10.2, 20.2, 4)).
 
-get_kendall_correlation_test()->
+get_kendall_correlation_exceptions_test()->
     ?assertEqual(0.0, bear:get_kendall_correlation([], [])),
     ?assertEqual(0.0, bear:get_kendall_correlation([], [1,2,3,4,5,6,7])),
     ?assertEqual(0.0, bear:get_kendall_correlation([1,2,3,4,5,6,7],[])),
-    ?assertEqual(0.0, bear:get_kendall_correlation(lists:seq(1,10),lists:seq(1,11))),
-    ?assertEqual(1.0, bear:get_kendall_correlation([1,2,3,4,5,6,7], [2,3,4,5,6,7,9])).
+    ?assertEqual(0.0, bear:get_kendall_correlation(lists:seq(1,10),lists:seq(1,11))).
 
-get_spearman_correlation_test()->
+get_kendall_correlation_regular_test()->
+    Kendall = bear:get_kendall_correlation(sample1(order), sample2(order)),
+    ?assertEqual(true, approx(0.9787755, Kendall)).
+
+kendall_correlation_test()->
+    Kendall = bear:kendall_correlation(sample1(order), sample2(order)),
+    ?assertEqual(true, approx(0.9787755, Kendall)).
+
+get_spearman_correlation_exceptions_test()->
     ?assertEqual(0.0, bear:get_spearman_correlation([], [])),
     ?assertEqual(0.0, bear:get_spearman_correlation([], [1,2,3,4,5,6,7])),
     ?assertEqual(0.0, bear:get_spearman_correlation([1,2,3,4,5,6,7],[])),
-    ?assertEqual(0.0, bear:get_spearman_correlation(lists:seq(1,10),lists:seq(1,11))),
-    ?assertEqual(1.0, bear:get_spearman_correlation([1,2,3,4,5,6,7], [2,3,4,5,6,7,9])).
+    ?assertEqual(0.0, bear:get_spearman_correlation(lists:seq(1,10),lists:seq(1,11))).
 
+get_spearman_correlation_regular_test()->
+    ?assertEqual(true, approx(0.997888, bear:get_spearman_correlation(sample1(order), sample2(order)))).
 
 math_log_test() ->
     ?assertEqual(1, bear:math_log(0)),
     ?assertEqual(1.0, bear:math_log(0.0)),
-    ?assertEqual(3737669618283368, erlang:trunc(?PRECISION * bear:math_log(42))).
+    ?assertEqual(true, approx(3.737669618283368, bear:math_log(42))).
 
 inverse_test() ->
     ?assertEqual(0, bear:inverse(0)),
@@ -236,12 +252,25 @@
     ?assertEqual([{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5}],
                  bear:tied_rank_worker([{2.0,5},{2.0,5}], [{2.0,5}], {[1,2,3], 5})).
 
+perc_test() ->
+    ?assertEqual(14, bear:perc(36, 40)),
+    ?assertEqual(5, bear:perc(900, 5)),
+    ?assertEqual(5, bear:perc(0.9, 5)).
+
+get_statistics_subset_nev_test() ->
+    %% Not enough values case
+    ?assertEqual([], bear:get_statistics_subset([1,2], [])).
+
+get_statistics_subset_regular_test() ->
+    %% Regular case
+    ?assertEqual([{max, 50},{min, -49}], bear:get_statistics_subset(sample1(), [max,min])).
+
 subset_test() ->
-    Stats = bear:get_statistics(bear:test_values()),
+    Stats = bear:get_statistics(test_values()),
     match_values(Stats).
 
 full_subset_test() ->
-    Stats = bear:get_statistics(bear:test_values()),
+    Stats = bear:get_statistics(test_values()),
     match_values2(Stats).
 
 negative_test() ->
@@ -255,7 +284,7 @@
     [{min, -10}] = bear:get_statistics_subset(Values, [min]).
 
 match_values([H|T]) ->
-    Res = bear:get_statistics_subset(bear:test_values(), [mk_item(H)]),
+    Res = bear:get_statistics_subset(test_values(), [mk_item(H)]),
     Res = [H],
     match_values(T);
 match_values([]) ->
@@ -268,5 +297,61 @@
 
 match_values2(Stats) ->
     Items = [mk_item(I) || I <- Stats],
-    Stats = bear:get_statistics_subset(bear:test_values(), Items),
+    Stats = bear:get_statistics_subset(test_values(), Items),
     ok.
+
+test_values() ->
+    [1,1,1,1,1,1,1,
+     2,2,2,2,2,2,2,
+     3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+     6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+     9,9,9,9,9,9,9].
+
+negative_values() ->
+    %% All values are negative
+    [-1,-1,-1,-1,-1,-1,-1,
+     -2,-2,-2,-2,-2,-2,-2,
+     -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+     -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+     -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+     -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+     -7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,
+     -8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,
+     -9,-9,-9,-9,-9,-9,-9].
+
+between(Value, Low, High) ->
+    (Value >= Low) and (Value =< High).
+
+approx(Target, Value) ->
+    High = Target + math:pow(10, - ?PRECISION_DIGIT),
+    Low = Target - math:pow(10, - ?PRECISION_DIGIT),
+    case (Value > Low) and (Value < High) of
+        true -> true;
+        _ -> Value
+    end.
+
+check_sample_test() ->
+    ?assertEqual(50, length(sample1())),
+    ?assertEqual(50, length(sample1(order))),
+    ?assertEqual(50, length(sample2())),
+    ?assertEqual(50, length(sample2(order))).
+
+sample1(X) when X == order ->
+    lists:sort(sample1()).
+
+sample2(X) when X == order ->
+    lists:sort(sample2()).
+
+sample1() ->
+    %% datas from file bear/samples/data.csv
+    %% first column X
+    [-16,-18,-47,22,-18,36,25,49,-24,15,36,-10,-21,43,-35,1,-24,10,33,-21,-18,-36,-36,-43,-37,-10,23,50,31,-49,43,46,22,-43,12,-47,15,-14,6,-31,46,-8,0,-46,-16,-22,6,10,38,-11].
+
+sample2() ->
+    %% datas from file bear/samples/data.csv
+    %% second column Y
+    [33,20,-35,16,-19,8,25,3,4,10,36,-20,-41,43,28,39,-30,3,-47,-23,17,-6,-50,16,-26,-49,8,-31,24,16,32,27,-19,-32,-17,1,-37,25,-50,-32,-42,-22,25,18,-34,-37,7,-13,16,10].