Moved tests into separate folder. Added PropEr test of encoding. Moved more logic into backends to allow for more optimizations.
diff --git a/Makefile b/Makefile
index 4a2adb1..7dd9884 100644
--- a/Makefile
+++ b/Makefile
@@ -1,2 +1,2 @@
perf_report:
- erl -pa deps/*/ebin ebin -noshell -run hyper perf_report -s init stop
\ No newline at end of file
+ erl -pa deps/*/ebin ebin -noshell -run hyper perf_report -s init stop
diff --git a/rebar.config b/rebar.config
index 8c460c9..66e9137 100644
--- a/rebar.config
+++ b/rebar.config
@@ -1,9 +1,10 @@
-{cover_enabled, true}.
+%% {cover_enabled, true}.
{deps, [
{basho_stats, "",
{git, "https://github.com/knutin/basho_stats.git", {branch, "master"}}},
{bisect, "",
- {git, "https://github.com/knutin/bisect.git", {branch, "master"}}}
+ {git, "https://github.com/knutin/bisect.git", {branch, "master"}}},
+ {proper, "", {git,"https://github.com/manopapad/proper.git", "master"}}
%% {erlang_murmurhash, "",
%% {git, "https://github.com/thekvs/erlang-murmurhash.git", {branch, "master"}}},
]}.
diff --git a/src/hyper.erl b/src/hyper.erl
index 2982473..dc770be 100644
--- a/src/hyper.erl
+++ b/src/hyper.erl
@@ -4,11 +4,11 @@
%% research.google.com/en//pubs/archive/40671.pdf
-module(hyper).
%%-compile(native).
--include_lib("eunit/include/eunit.hrl").
--export([new/1, new/2, insert/2, card/1, union/1, union/2, intersect_card/2]).
--export([to_json/1, from_json/1, from_json/2, compact/1]).
--export([perf_report/0, bytes/1]).
+-export([new/1, new/2, insert/2, insert_many/2]).
+-export([union/1, union/2]).
+-export([card/1, intersect_card/2]).
+-export([to_json/1, from_json/1, from_json/2, compact/1, bytes/1]).
-type precision() :: 4..16.
-type registers() :: any().
@@ -21,6 +21,8 @@
-export_type([filter/0, precision/0, registers/0]).
+%% Exported for testing
+-export([run_of_zeroes/1, perf_report/0]).
%%
%% API
@@ -43,18 +45,17 @@
ZeroCount = run_of_zeroes(RegisterValue) + 1,
- case Mod:get(Index, Registers) of
- {ok, Small} when Small < ZeroCount ->
- Hyper#hyper{registers = {Mod, Mod:set(Index, ZeroCount, Registers)}};
- {ok, Large} when ZeroCount =< Large ->
- Hyper;
- undefined ->
- Hyper#hyper{registers = {Mod, Mod:set(Index, ZeroCount, Registers)}}
- end;
+ %% Registers are only allowed to increase, implement by backend
+ Hyper#hyper{registers = {Mod, Mod:set(Index, ZeroCount, Registers)}};
insert(_Value, _Hyper) ->
error(badarg).
+-spec insert_many([value()], filter()) -> filter().
+insert_many(L, Hyper) ->
+ lists:foldl(fun insert/2, Hyper, L).
+
+
-spec union([filter()]) -> filter().
union(Filters) when is_list(Filters) ->
@@ -199,208 +200,6 @@
%% TESTS
%%
-
-hyper_test_() ->
- {foreach, fun () -> ok end, fun (_) -> ok end,
- [
- ?_test(basic_t()),
- ?_test(serialization_t()),
- ?_test(backend_t()),
- ?_test(encoding_t()),
- ?_test(register_sum_t()),
- {timeout, 10, ?_test(error_range_t())},
- ?_test(many_union_t()),
- ?_test(union_t()),
- ?_test(small_big_union_t()),
- ?_test(intersect_card_t())
- ]}.
-
-basic_t() ->
- lists:foreach(fun (Mod) ->
- ?assertEqual(1, trunc(card(insert(<<"1">>, new(4, Mod)))))
- end, [hyper_bisect, hyper_binary, hyper_gb, hyper_array]).
-
-
-serialization_t() ->
- Mod = hyper_binary,
- Hyper = compact(insert_many(generate_unique(10), new(5, Mod))),
-
- {hyper_binary, L} = Hyper#hyper.registers,
- {hyper_binary, R} = (compact(from_json(to_json(Hyper), Mod)))#hyper.registers,
-
- ?assertEqual(trunc(card(Hyper)), trunc(card(from_json(to_json(Hyper), Mod)))),
- ?assertEqual(Hyper#hyper.p, (from_json(to_json(Hyper), Mod))#hyper.p),
- ?assertEqual(L, R).
-
-
-backend_t() ->
- Values = generate_unique(15),
-
- Gb = compact(insert_many(Values, new(7, hyper_gb))),
- Array = compact(insert_many(Values, new(7, hyper_array))),
- Bisect = compact(insert_many(Values, new(7, hyper_bisect))),
- Binary = compact(insert_many(Values, new(7, hyper_binary))),
-
- ?assertEqual(card(Gb), card(Array)),
- ?assertEqual(card(Gb), card(Bisect)),
- ?assertEqual(card(Gb), card(Binary)),
-
- {hyper_gb , GbRegisters} = Gb#hyper.registers,
- {hyper_array , ArrayRegisters} = Array#hyper.registers,
- {hyper_bisect, BisectRegisters} = Bisect#hyper.registers,
- {hyper_binary, BinaryRegisters} = Binary#hyper.registers,
-
- %% error_logger:info_msg("Gb: ~p~nArray: ~p~nBisect: ~p~nBinary: ~p~n",
- %% [hyper_gb:encode_registers(GbRegisters),
- %% hyper_array:encode_registers(ArrayRegisters),
- %% hyper_bisect:encode_registers(BisectRegisters),
- %% hyper_binary:encode_registers(BinaryRegisters)]),
-
- ?assertEqual(hyper_gb:encode_registers(GbRegisters),
- hyper_array:encode_registers(ArrayRegisters)),
-
- ?assertEqual(hyper_gb:encode_registers(GbRegisters),
- hyper_bisect:encode_registers(BisectRegisters)),
-
- ?assertEqual(hyper_gb:encode_registers(GbRegisters),
- hyper_binary:encode_registers(BinaryRegisters)),
-
- {_, {GbSerialized, _}} = (from_json(to_json(Array), hyper_gb))#hyper.registers,
- ?assertEqual(gb_trees:to_list(element(1, GbRegisters)),
- gb_trees:to_list(GbSerialized)),
-
- ?assertEqual(card(Gb), card(from_json(to_json(Array), hyper_gb))),
- ?assertEqual(Array, from_json(to_json(Array), hyper_array)),
- ?assertEqual(Array, from_json(to_json(Bisect), hyper_array)),
- ?assertEqual(Bisect, from_json(to_json(Array), hyper_bisect)),
-
- ?assertEqual(to_json(Gb), to_json(Array)),
- ?assertEqual(to_json(Gb), to_json(Bisect)),
- ?assertEqual(to_json(Gb), to_json(Binary)).
-
-
-
-encoding_t() ->
- Hyper = insert_many(generate_unique(10), new(4)),
- ?assertEqual(trunc(card(Hyper)), trunc(card(from_json(to_json(Hyper))))).
-
-
-register_sum_t() ->
- Mods = [hyper_array, hyper_gb, hyper_bisect, hyper_binary],
- P = 4,
- M = trunc(math:pow(2, P)),
-
- SetRegisters = [1, 5, 10],
- RegisterValue = 3,
-
- ExpectedSum =
- (math:pow(2, -0) * M)
- - (math:pow(2, -0) * length(SetRegisters))
- + (math:pow(2, -RegisterValue) * length(SetRegisters)),
-
- [begin
- Registers = lists:foldl(fun (I, Acc) ->
- Mod:set(I, RegisterValue, Acc)
- end, Mod:new(P), SetRegisters),
- ?assertEqual({Mod, ExpectedSum}, {Mod, Mod:register_sum(Registers)})
- end || Mod <- Mods].
-
-
-error_range_t() ->
- Mods = [hyper_gb, hyper_array, hyper_bisect, hyper_binary],
- Run = fun (Cardinality, P, Mod) ->
- lists:foldl(fun (V, H) ->
- insert(V, H)
- end, new(P, Mod), generate_unique(Cardinality))
- end,
- ExpectedError = 0.02,
- P = 14,
- random:seed(1, 2, 3),
-
- [begin
- Estimate = trunc(card(Run(Card, P, Mod))),
- ?assert(abs(Estimate - Card) < Card * ExpectedError)
- end || Card <- lists:seq(1000, 50000, 5000),
- Mod <- Mods].
-
-many_union_t() ->
- random:seed(1, 2, 3),
- Card = 1000,
- NumSets = 3,
-
- Sets = [sets:from_list(generate_unique(Card)) || _ <- lists:seq(1, NumSets)],
- Filters = lists:map(fun (S) ->
- insert_many(sets:to_list(S),
- new(10, hyper_bisect))
- end, Sets),
-
- ?assert(abs(sets:size(sets:union(Sets)) - card(union(Filters)))
- < (Card * NumSets) * 0.1).
-
-
-
-union_t() ->
- random:seed(1, 2, 3),
-
- LeftDistinct = sets:from_list(generate_unique(10000)),
-
- RightDistinct = sets:from_list(generate_unique(5000)
- ++ lists:sublist(sets:to_list(LeftDistinct),
- 5000)),
-
- LeftHyper = insert_many(sets:to_list(LeftDistinct), new(13)),
- RightHyper = insert_many(sets:to_list(RightDistinct), new(13)),
-
- UnionHyper = union(LeftHyper, RightHyper),
- Intersection = card(LeftHyper) + card(RightHyper) - card(UnionHyper),
-
- ?assert(abs(card(UnionHyper) - sets:size(sets:union(LeftDistinct, RightDistinct)))
- < 200),
- ?assert(abs(Intersection - sets:size(
- sets:intersection(LeftDistinct, RightDistinct)))
- < 200).
-
-small_big_union_t() ->
- random:seed(1, 2, 3),
- SmallCard = 100,
- BigCard = 15000, % switches to dense at 10922 items
-
- SmallSet = sets:from_list(generate_unique(SmallCard)),
- BigSet = sets:from_list(generate_unique(BigCard)),
-
- SmallHyper = insert_many(sets:to_list(SmallSet), new(15, hyper_bisect)),
- BigHyper = insert_many(sets:to_list(BigSet), new(15, hyper_bisect)),
- ?assertMatch({hyper_bisect, {sparse, _, _, _}}, SmallHyper#hyper.registers),
- ?assertMatch({hyper_bisect, {dense, _}}, BigHyper#hyper.registers),
-
- UnionHyper = union(SmallHyper, BigHyper),
- TrueUnion = sets:size(sets:union(SmallSet, BigSet)),
- ?assert(abs(card(UnionHyper) - TrueUnion) < TrueUnion * 0.01).
-
-
-
-intersect_card_t() ->
- random:seed(1, 2, 3),
-
- LeftDistinct = sets:from_list(generate_unique(10000)),
-
- RightDistinct = sets:from_list(generate_unique(5000)
- ++ lists:sublist(sets:to_list(LeftDistinct),
- 5000)),
-
- LeftHyper = insert_many(sets:to_list(LeftDistinct), new(13)),
- RightHyper = insert_many(sets:to_list(RightDistinct), new(13)),
-
- IntersectCard = intersect_card(LeftHyper, RightHyper),
-
- ?assert(IntersectCard =< hyper:card(hyper:union(LeftHyper, RightHyper))),
-
- %% NOTE: we can't really say much about the error here,
- %% so just pick something and see if the intersection makes sense
- Error = 0.05,
- ?assert((abs(5000 - IntersectCard) / 5000) =< Error).
-
-
%% report_wrapper_test_() ->
%% [{timeout, 600000000, ?_test(estimate_report())}].
@@ -472,8 +271,6 @@
Int = random:uniform(100000000000000),
random_bytes([<<Int:64/integer>> | Acc], N-1).
-insert_many(L, Hyper) ->
- lists:foldl(fun insert/2, Hyper, L).
%%
@@ -503,6 +300,7 @@
R = [begin
+ M = trunc(math:pow(2, P)),
InsertUs = Time(fun (Values, H) ->
insert_many(Values, H)
end,
@@ -520,13 +318,13 @@
Filter = insert_many(generate_unique(Card), new(P, Mod)),
- {Mod, Registers} = Filter#hyper.registers,
- Fill = Mod:fold(fun (_, V, Acc) when V > 0 -> Acc+1;
- (_, _, Acc) -> Acc
- end, 0, Registers),
- Bytes = bytes(Filter),
- {Mod, P, Card, Fill, Bytes,
+ {Mod, Registers} = Filter#hyper.registers,
+ Bytes = Mod:encode_registers(Registers),
+ Filled = lists:filter(fun (I) -> binary:at(Bytes, I) =/= 0 end,
+ lists:seq(0, M-1)),
+
+ {Mod, P, Card, length(Filled) / M, bytes(Filter),
InsertUs / Card, UnionUs, CardUs, ToJsonUs}
end || Mod <- Mods,
@@ -548,7 +346,7 @@
lists:foreach(fun ({Mod, P, Card, Fill, Bytes,
AvgInsertUs, AvgUnionUs, AvgCardUs, AvgToJsonUs}) ->
M = trunc(math:pow(2, P)),
- Filled = lists:flatten(io_lib:format("~.2f", [Fill / M])),
+ Filled = lists:flatten(io_lib:format("~.2f", [Fill])),
AvgInsertUsL = lists:flatten(
io_lib:format("~.2f", [AvgInsertUs])),
diff --git a/src/hyper_array.erl b/src/hyper_array.erl
index f4469d7..de94f9e 100644
--- a/src/hyper_array.erl
+++ b/src/hyper_array.erl
@@ -1,5 +1,5 @@
-module(hyper_array).
--export([new/1, get/2, set/3, fold/3, max_merge/2, bytes/1]).
+-export([new/1, set/3, fold/3, max_merge/2, bytes/1]).
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
-behaviour(hyper_register).
@@ -7,30 +7,24 @@
M = trunc(math:pow(2, P)),
array:new([{size, M}, {fixed, true}, {default, 0}]).
-get(Index, A) ->
- case array:get(Index, A) of
- 0 ->
- undefined;
- Value ->
- {ok, Value}
- end.
-
-
set(Index, Value, A) ->
- array:set(Index, Value, A).
+ case array:get(Index, A) of
+ R when R > Value ->
+ A;
+ _ ->
+ array:set(Index, Value, A)
+ end.
fold(F, Acc, A) ->
array:sparse_foldl(F, Acc, A).
max_merge(Left, Right) ->
fold(fun (Index, L, Registers) ->
- case get(Index, Registers) of
- {ok, R} when R < L ->
+ case array:get(Index, Registers) of
+ R when R < L ->
set(Index, L, Registers);
- {ok, _} ->
- Registers;
- undefined ->
- set(Index, L, Registers)
+ _ ->
+ Registers
end
end, Right, Left).
diff --git a/src/hyper_binary.erl b/src/hyper_binary.erl
index 5155006..865ae12 100644
--- a/src/hyper_binary.erl
+++ b/src/hyper_binary.erl
@@ -1,7 +1,7 @@
-module(hyper_binary).
-include_lib("eunit/include/eunit.hrl").
--export([new/1, get/2, set/3, fold/3, compact/1, max_merge/1, max_merge/2, bytes/1]).
+-export([new/1, set/3, fold/3, compact/1, max_merge/1, max_merge/2, bytes/1]).
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2]).
-behaviour(hyper_register).
@@ -17,31 +17,22 @@
M = trunc(math:pow(2, P)),
{dense, binary:copy(<<0>>, M), [], 0}.
-
set(Index, Value, {dense, B, Tmp, TmpCount}) ->
- NewTmp = orddict:store(Index, Value, Tmp),
- case TmpCount < 50 of
- true ->
- {dense, B, NewTmp, TmpCount+1};
- false ->
- {dense, merge_tmp(B, NewTmp), [], 0}
+ case binary:at(B, Index) of
+ R when R < Value ->
+ case lists:keyfind(Index, 1, Tmp) of
+ {Index, TmpR} when TmpR > Value ->
+ {dense, B, Tmp, TmpCount};
+ _ ->
+ {dense, B, lists:keystore(Index, 1, Tmp, {Index, Value}), TmpCount+1}
+ end;
+ _ ->
+ {dense, B, Tmp, TmpCount}
end.
-get(Index, {dense, B, Tmp, _TmpCount}) ->
- case binary:at(B, Index) of
- 0 ->
- case orddict:find(Index, Tmp) of
- {ok, V} ->
- {ok, V};
- error ->
- undefined
- end;
- V ->
- {ok, V}
- end.
compact({dense, B, Tmp, _TmpCount}) ->
- {dense, merge_tmp(B, Tmp), [], 0}.
+ {dense, merge_tmp(B, lists:sort(Tmp)), [], 0}.
fold(F, Acc, {dense, B, Tmp, _}) ->
do_fold(F, Acc, merge_tmp(B, Tmp)).
@@ -122,13 +113,23 @@
merge_test() ->
- Buffered = lists:foldl(fun (I, Acc) -> set(I, I, Acc) end,
- new(4), lists:seq(0, 15)),
- Compact = compact(Buffered),
- ?assertEqual(undefined, get(0, Compact)),
- ?assertEqual({ok, 3}, get(3, Compact)),
- ?assertEqual({ok, 15}, get(15, Compact)),
- ?assertError(badarg, get(16, Compact)).
+ Tmp1 = [{1, 1},
+ {3, 3},
+ {9, 3},
+ {15, 15}],
+ Tmp2 = [{3, 5},
+ {9, 2},
+ {10, 5}],
+
+ {dense, NewB, [], 0} = new(4),
+ {dense, Compact, [], 0} = compact({dense, NewB, Tmp1, length(Tmp1)}),
+ {dense, Compact2, [], 0} = compact({dense, Compact, Tmp2, length(Tmp2)}),
+
+ ?assertEqual(<<0, 1, 0, 5,
+ 0, 0, 0, 0,
+ 0, 2, 5, 0,
+ 0, 0, 0, 15>>, Compact2).
+
serialize_test() ->
H = compact(lists:foldl(fun (I, Acc) -> set(I, I, Acc) end,
@@ -136,4 +137,3 @@
{dense, B, _, _} = H,
?assertEqual(B, encode_registers(H)),
?assertEqual(H, decode_registers(encode_registers(H), 4)).
-
diff --git a/src/hyper_bisect.erl b/src/hyper_bisect.erl
index dd2f358..cbfbff2 100644
--- a/src/hyper_bisect.erl
+++ b/src/hyper_bisect.erl
@@ -1,7 +1,7 @@
-module(hyper_bisect).
-include_lib("eunit/include/eunit.hrl").
--export([new/1, get/2, set/3, fold/3, max_merge/1, max_merge/2, bytes/1]).
+-export([new/1, set/3, fold/3, max_merge/1, max_merge/2, bytes/1]).
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
-behaviour(hyper_register).
@@ -19,40 +19,32 @@
{sparse, bisect:new(?KEY_SIZE div 8, ?VALUE_SIZE div 8), P, Threshold}.
-get(Index, {sparse, B, _, _}) ->
+set(Index, Value, {sparse, B, P, Threshold} = S) ->
case bisect:find(B, <<Index:?KEY_SIZE/integer>>) of
- not_found ->
- undefined;
- <<Value:?VALUE_SIZE/integer>> ->
- {ok, Value}
+ <<R:?VALUE_SIZE/integer>> when R > Value->
+ S;
+ _ ->
+ NewB = bisect:insert(B,
+ <<Index:?KEY_SIZE/integer>>,
+ <<Value:?VALUE_SIZE/integer>>),
+ case bisect:num_keys(NewB) < Threshold of
+ true ->
+ {sparse, NewB, P, Threshold};
+ false ->
+ {dense, bisect2dense(NewB, P)}
+ end
end;
-get(Index, {dense, B}) ->
+set(Index, Value, {dense, B} = D) ->
case binary:at(B, Index) of
- 0 ->
- undefined;
- V ->
- {ok, V}
+ R when R > Value ->
+ D;
+ _ ->
+ <<Left:Index/binary, _:?VALUE_SIZE, Right/binary>> = B,
+ {dense, iolist_to_binary([Left, <<Value:?VALUE_SIZE/integer>>, Right])}
end.
-set(Index, Value, {sparse, B, P, Threshold}) ->
- NewB = bisect:insert(B,
- <<Index:?KEY_SIZE/integer>>,
- <<Value:?VALUE_SIZE/integer>>),
-
- case bisect:num_keys(NewB) < Threshold of
- true ->
- {sparse, NewB, P, Threshold};
- false ->
- {dense, bisect2dense(NewB, P)}
- end;
-
-set(Index, Value, {dense, B}) ->
- <<Left:Index/binary, _:?VALUE_SIZE, Right/binary>> = B,
- {dense, iolist_to_binary([Left, <<Value:?VALUE_SIZE/integer>>, Right])}.
-
-
fold(F, Acc, {sparse, B, _, _}) ->
InterfaceF = fun (<<Index:?KEY_SIZE/integer>>,
@@ -235,8 +227,3 @@
0, 0, 0, 5>>,
?assertEqual(M, size(Expected)),
?assertEqual(Expected, Dense).
-
-set_dense_test() ->
- P = 4,
- B = set(2, 2, {dense, binary:copy(<<0>>, trunc(math:pow(2, P)))}),
- ?assertEqual({ok, 2}, get(2, B)).
diff --git a/src/hyper_gb.erl b/src/hyper_gb.erl
index 6c1bcfd..e5627d9 100644
--- a/src/hyper_gb.erl
+++ b/src/hyper_gb.erl
@@ -1,5 +1,5 @@
-module(hyper_gb).
--export([new/1, get/2, set/3, fold/3, max_merge/2, bytes/1]).
+-export([new/1, set/3, fold/3, max_merge/2, bytes/1]).
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
-include_lib("eunit/include/eunit.hrl").
@@ -17,7 +17,13 @@
end.
set(Index, Value, {T, M}) ->
- {gb_trees:enter(Index, Value, T), M}.
+ case gb_trees:lookup(Index, T) of
+ {value, R} when R > Value->
+ {T, M};
+ _ ->
+ {gb_trees:enter(Index, Value, T), M}
+ end.
+
max_merge(Small, Big) ->
fold(fun (Index, L, Registers) ->
diff --git a/src/hyper_register.erl b/src/hyper_register.erl
index b0baa18..717a41e 100644
--- a/src/hyper_register.erl
+++ b/src/hyper_register.erl
@@ -1,7 +1,6 @@
-module(hyper_register).
-callback new(P :: hyper:precision()) -> hyper:registers().
--callback get(Index :: integer(), hyper:registers()) -> {ok, integer()} | undefined.
-callback set(Index :: integer(), Value :: integer(),
hyper:registers()) -> hyper:registers().
-callback max_merge(hyper:registers(), hyper:registers()) -> hyper:registers().
diff --git a/test/hyper_test.erl b/test/hyper_test.erl
new file mode 100644
index 0000000..9f719e8
--- /dev/null
+++ b/test/hyper_test.erl
@@ -0,0 +1,339 @@
+-module(hyper_test).
+-include_lib("proper/include/proper.hrl").
+-include_lib("eunit/include/eunit.hrl").
+
+-record(hyper, {p, registers}). % copy of #hyper in hyper.erl
+
+hyper_test_() ->
+ {foreach, fun () -> ok end, fun (_) -> ok end,
+ [
+ ?_test(basic_t()),
+ ?_test(serialization_t()),
+ {timeout, 30, ?_test(backend_t())},
+ ?_test(encoding_t()),
+ ?_test(register_sum_t()),
+ {timeout, 30, ?_test(error_range_t())},
+ ?_test(many_union_t()),
+ ?_test(union_t()),
+ ?_test(small_big_union_t()),
+ ?_test(intersect_card_t()),
+ {timeout, 600, fun () -> [] = proper:module(?MODULE) end}
+ ]}.
+
+basic_t() ->
+ [?assertEqual(1, trunc(
+ hyper:card(
+ hyper:insert(<<"1">>, hyper:new(4, Mod)))))
+ || Mod <- [hyper_bisect, hyper_binary, hyper_gb, hyper_array]].
+
+
+serialization_t() ->
+ Mod = hyper_binary,
+ Hyper = hyper:compact(hyper:insert_many(generate_unique(10), hyper:new(5, Mod))),
+
+ {hyper_binary, L} = Hyper#hyper.registers,
+ {hyper_binary, R} = (hyper:compact(
+ hyper:from_json(
+ hyper:to_json(Hyper), Mod)))#hyper.registers,
+
+ ?assertEqual(trunc(hyper:card(Hyper)),
+ trunc(
+ hyper:card(
+ hyper:from_json(
+ hyper:to_json(Hyper), Mod)))),
+ ?assertEqual(Hyper#hyper.p, (hyper:from_json(
+ hyper:to_json(Hyper), Mod))#hyper.p),
+ ?assertEqual(L, R).
+
+
+backend_t() ->
+ Values = generate_unique(1000000),
+ P = 9,
+ M = trunc(math:pow(2, P)),
+
+ Gb = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_gb))),
+ Array = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_array))),
+ Bisect = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_bisect))),
+ Binary = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_binary))),
+
+
+ {hyper_gb , GbRegisters} = Gb#hyper.registers,
+ {hyper_array , ArrayRegisters} = Array#hyper.registers,
+ {hyper_bisect, BisectRegisters} = Bisect#hyper.registers,
+ {hyper_binary, BinaryRegisters} = Binary#hyper.registers,
+
+ %% error_logger:info_msg("Gb: ~p~nArray: ~p~nBisect: ~p~nBinary: ~p~n",
+ %% [hyper_gb:encode_registers(GbRegisters),
+ %% hyper_array:encode_registers(ArrayRegisters),
+ %% hyper_bisect:encode_registers(BisectRegisters),
+ %% hyper_binary:encode_registers(BinaryRegisters)]),
+
+
+ ExpectedRegisters = lists:foldl(
+ fun (Value, Registers) ->
+ Hash = crypto:hash(sha, Value),
+ <<Index:P, RegisterValue:P/bitstring,
+ _/bitstring>> = Hash,
+ ZeroCount = hyper:run_of_zeroes(RegisterValue)
+ + 1,
+
+ case orddict:find(Index, Registers) of
+ {ok, R} when R > ZeroCount ->
+ Registers;
+ _ ->
+ orddict:store(Index, ZeroCount, Registers)
+ end
+ end, orddict:new(), Values),
+ ExpectedBytes = iolist_to_binary(
+ [begin
+ case orddict:find(I, ExpectedRegisters) of
+ {ok, V} ->
+ <<V:8/integer>>;
+ error ->
+ <<0>>
+ end
+ end || I <- lists:seq(0, M-1)]),
+
+ ?assertEqual(ExpectedBytes, hyper_gb:encode_registers(GbRegisters)),
+ ?assertEqual(ExpectedBytes, hyper_array:encode_registers(ArrayRegisters)),
+ ?assertEqual(ExpectedBytes, hyper_bisect:encode_registers(BisectRegisters)),
+ ?assertEqual(ExpectedBytes, hyper_binary:encode_registers(BinaryRegisters)),
+
+ ?assertEqual(hyper:card(Gb),
+ hyper:card(hyper:from_json(hyper:to_json(Array), hyper_gb))),
+ ?assertEqual(Array, hyper:from_json(hyper:to_json(Array), hyper_array)),
+ ?assertEqual(Array, hyper:from_json(hyper:to_json(Bisect), hyper_array)),
+ ?assertEqual(Bisect, hyper:from_json(hyper:to_json(Array), hyper_bisect)),
+
+ ?assertEqual(hyper:to_json(Gb), hyper:to_json(Array)),
+ ?assertEqual(hyper:to_json(Gb), hyper:to_json(Bisect)),
+ ?assertEqual(hyper:to_json(Gb), hyper:to_json(Binary)),
+
+ ?assertEqual(hyper:card(Gb), hyper:card(Array)),
+ ?assertEqual(hyper:card(Gb), hyper:card(Bisect)),
+ ?assertEqual(hyper:card(Gb), hyper:card(Binary)).
+
+
+
+
+encoding_t() ->
+ Hyper = hyper:insert_many(generate_unique(10), hyper:new(4)),
+ ?assertEqual(trunc(hyper:card(Hyper)),
+ trunc(hyper:card(hyper:from_json(hyper:to_json(Hyper))))).
+
+
+register_sum_t() ->
+ Mods = [hyper_array, hyper_gb, hyper_bisect, hyper_binary],
+ P = 4,
+ M = trunc(math:pow(2, P)),
+
+ SetRegisters = [1, 5, 10],
+ RegisterValue = 3,
+
+ ExpectedSum =
+ (math:pow(2, -0) * M)
+ - (math:pow(2, -0) * length(SetRegisters))
+ + (math:pow(2, -RegisterValue) * length(SetRegisters)),
+
+ [begin
+ Registers = lists:foldl(fun (I, Acc) ->
+ Mod:set(I, RegisterValue, Acc)
+ end, Mod:new(P), SetRegisters),
+ ?assertEqual({Mod, ExpectedSum}, {Mod, Mod:register_sum(Registers)})
+ end || Mod <- Mods].
+
+
+error_range_t() ->
+ Mods = [hyper_gb, hyper_array, hyper_bisect, hyper_binary],
+ Run = fun (Cardinality, P, Mod) ->
+ lists:foldl(fun (V, H) ->
+ hyper:insert(V, H)
+ end, hyper:new(P, Mod), generate_unique(Cardinality))
+ end,
+ ExpectedError = 0.02,
+ P = 14,
+ random:seed(1, 2, 3),
+
+ [begin
+ Estimate = trunc(hyper:card(Run(Card, P, Mod))),
+ ?assert(abs(Estimate - Card) < Card * ExpectedError)
+ end || Card <- lists:seq(1000, 50000, 5000),
+ Mod <- Mods].
+
+many_union_t() ->
+ random:seed(1, 2, 3),
+ Card = 1000,
+ NumSets = 3,
+
+ Sets = [sets:from_list(generate_unique(Card)) || _ <- lists:seq(1, NumSets)],
+ Filters = lists:map(fun (S) ->
+ hyper:insert_many(sets:to_list(S),
+ hyper:new(10, hyper_bisect))
+ end, Sets),
+
+ ?assert(abs(sets:size(sets:union(Sets)) - hyper:card(hyper:union(Filters)))
+ < (Card * NumSets) * 0.1).
+
+
+
+union_t() ->
+ random:seed(1, 2, 3),
+
+ LeftDistinct = sets:from_list(generate_unique(10000)),
+
+ RightDistinct = sets:from_list(generate_unique(5000)
+ ++ lists:sublist(sets:to_list(LeftDistinct),
+ 5000)),
+
+ LeftHyper = hyper:insert_many(sets:to_list(LeftDistinct), hyper:new(13)),
+ RightHyper = hyper:insert_many(sets:to_list(RightDistinct), hyper:new(13)),
+
+ UnionHyper = hyper:union(LeftHyper, RightHyper),
+ Intersection = hyper:card(LeftHyper)
+ + hyper:card(RightHyper) - hyper:card(UnionHyper),
+
+ ?assert(abs(hyper:card(UnionHyper) -
+ sets:size(sets:union(LeftDistinct, RightDistinct)))
+ < 200),
+ ?assert(abs(Intersection - sets:size(
+ sets:intersection(LeftDistinct, RightDistinct)))
+ < 200).
+
+small_big_union_t() ->
+ random:seed(1, 2, 3),
+ SmallCard = 100,
+ BigCard = 15000, % switches to dense at 10922 items
+
+ SmallSet = sets:from_list(generate_unique(SmallCard)),
+ BigSet = sets:from_list(generate_unique(BigCard)),
+
+ SmallHyper = hyper:insert_many(sets:to_list(SmallSet),
+ hyper:new(15, hyper_bisect)),
+ BigHyper = hyper:insert_many(sets:to_list(BigSet),
+ hyper:new(15, hyper_bisect)),
+ ?assertMatch({hyper_bisect, {sparse, _, _, _}}, SmallHyper#hyper.registers),
+ ?assertMatch({hyper_bisect, {dense, _}}, BigHyper#hyper.registers),
+
+ UnionHyper = hyper:union(SmallHyper, BigHyper),
+ TrueUnion = sets:size(sets:union(SmallSet, BigSet)),
+ ?assert(abs(hyper:card(UnionHyper) - TrueUnion) < TrueUnion * 0.01).
+
+
+
+intersect_card_t() ->
+ random:seed(1, 2, 3),
+
+ LeftDistinct = sets:from_list(generate_unique(10000)),
+
+ RightDistinct = sets:from_list(generate_unique(5000)
+ ++ lists:sublist(sets:to_list(LeftDistinct),
+ 5000)),
+
+ LeftHyper = hyper:insert_many(sets:to_list(LeftDistinct), hyper:new(13)),
+ RightHyper = hyper:insert_many(sets:to_list(RightDistinct), hyper:new(13)),
+
+ IntersectCard = hyper:intersect_card(LeftHyper, RightHyper),
+
+ ?assert(IntersectCard =< hyper:card(hyper:union(LeftHyper, RightHyper))),
+
+ %% NOTE: we can't really say much about the error here,
+ %% so just pick something and see if the intersection makes sense
+ Error = 0.05,
+ ?assert((abs(5000 - IntersectCard) / 5000) =< Error).
+
+
+%%
+%% PROPERTIES
+%%
+
+backends() ->
+ [hyper_gb, hyper_array, hyper_bisect, hyper_binary].
+
+gen_values() ->
+ %% ?LET(S, oneof(lists:seq(1, 100000, 10)), generate_unique(S)).
+ ?SIZED(Size, generate_unique(Size*4)).
+
+expected_bytes(P, Values) ->
+ M = trunc(math:pow(2, P)),
+ ExpectedRegisters = lists:foldl(
+ fun (Value, Registers) ->
+ Hash = crypto:hash(sha, Value),
+ <<Index:P, RegisterValue:P/bitstring,
+ _/bitstring>> = Hash,
+ ZeroCount = hyper:run_of_zeroes(RegisterValue)
+ + 1,
+
+ case orddict:find(Index, Registers) of
+ {ok, R} when R > ZeroCount ->
+ Registers;
+ _ ->
+ orddict:store(Index, ZeroCount, Registers)
+ end
+ end, orddict:new(), Values),
+ iolist_to_binary(
+ [begin
+ case orddict:find(I, ExpectedRegisters) of
+ {ok, V} ->
+ <<V:8/integer>>;
+ error ->
+ <<0>>
+ end
+ end || I <- lists:seq(0, M-1)]).
+
+
+prop_getset() ->
+ ?FORALL({Mod, P, Values}, {oneof(backends()), range(4, 16), gen_values()},
+ begin
+ L = lists:map(
+ fun (Value) ->
+ Hash = crypto:hash(sha, Value),
+ <<Index:P, RegisterValue:P/bitstring,
+ _/bitstring>> = Hash,
+ ZeroCount = hyper:run_of_zeroes(RegisterValue)
+ + 1,
+ {Index, ZeroCount}
+ end, Values),
+
+ R = lists:foldl(
+ fun ({Index, ZeroCount}, Register) ->
+ Mod:set(Index, ZeroCount, Register)
+ end, Mod:new(P), L),
+ case Mod:encode_registers(Mod:compact(R))
+ =:= expected_bytes(P, Values) of
+ true ->
+ true;
+ false ->
+ error_logger:info_msg("values~n~pencoded~n~p~nexpected~n~p~n",
+ [L,
+ Mod:encode_registers(R),
+ expected_bytes(P, Values)]),
+ false
+ end
+ end).
+
+
+%%
+%% HELPERS
+%%
+
+
+generate_unique(N) ->
+ generate_unique(lists:usort(random_bytes(N)), N).
+
+
+generate_unique(L, N) ->
+ case length(L) of
+ N ->
+ L;
+ Less ->
+ generate_unique(lists:usort(random_bytes(N - Less) ++ L), N)
+ end.
+
+
+random_bytes(N) ->
+ random_bytes([], N).
+
+random_bytes(Acc, 0) -> Acc;
+random_bytes(Acc, N) ->
+ Int = random:uniform(100000000000000),
+ random_bytes([<<Int:64/integer>> | Acc], N-1).