Moved more heavy lifting into the backend module to allow for more optimizations. Introduced hyper_binary, using a fixed size binary as an array.
diff --git a/rebar.config b/rebar.config
index b4a0172..8c460c9 100644
--- a/rebar.config
+++ b/rebar.config
@@ -1,3 +1,4 @@
+{cover_enabled, true}.
{deps, [
{basho_stats, "",
{git, "https://github.com/knutin/basho_stats.git", {branch, "master"}}},
diff --git a/src/hyper.erl b/src/hyper.erl
index b30b581..2982473 100644
--- a/src/hyper.erl
+++ b/src/hyper.erl
@@ -7,7 +7,7 @@
-include_lib("eunit/include/eunit.hrl").
-export([new/1, new/2, insert/2, card/1, union/1, union/2, intersect_card/2]).
--export([to_json/1, from_json/1, from_json/2]).
+-export([to_json/1, from_json/1, from_json/2, compact/1]).
-export([perf_report/0, bytes/1]).
-type precision() :: 4..16.
@@ -28,7 +28,7 @@
-spec new(precision()) -> filter().
new(P) ->
- new(P, hyper_gb).
+ new(P, hyper_binary).
-spec new(precision(), module()) -> filter().
new(P, Mod) when 4 =< P andalso P =< 16 andalso is_atom(Mod) ->
@@ -62,9 +62,13 @@
case lists:usort(lists:map(fun (#hyper{p = P, registers = {Mod, _}}) ->
{P, Mod}
end, Filters)) of
- [{_P, _Mod}] ->
- [First | Rest] = Filters,
- lists:foldl(fun union/2, First, Rest)
+ [{_P, Mod}] ->
+ Registers = lists:map(fun (#hyper{registers = {_, R}}) ->
+ R
+ end, Filters),
+
+ [First | _] = Filters,
+ First#hyper{registers = {Mod, Mod:max_merge(Registers)}}
end.
union(#hyper{registers = {Mod, SmallRegisters}} = Small,
@@ -84,10 +88,11 @@
-spec card(filter()) -> float().
-card(#hyper{registers = {Mod, Registers}, p = P}) ->
+card(#hyper{registers = {Mod, Registers0}, p = P}) ->
M = trunc(pow(2, P)),
+ Registers = Mod:compact(Registers0),
- RegisterSum = register_sum(M-1, Mod, Registers, 0),
+ RegisterSum = Mod:register_sum(Registers),
E = alpha(M) * pow(M, 2) / RegisterSum,
Ep = case E =< 5 * M of
@@ -95,7 +100,7 @@
false -> E
end,
- V = count_zeros(M-1, Mod, Registers, 0),
+ V = Mod:zero_count(Registers),
H = case V of
0 ->
@@ -114,16 +119,21 @@
bytes(#hyper{registers = {Mod, Registers}}) ->
Mod:bytes(Registers).
+compact(#hyper{registers = {Mod, Registers}} = Hyper) ->
+ Hyper#hyper{registers = {Mod, Mod:compact(Registers)}}.
+
%%
%% SERIALIZATION
%%
-spec to_json(filter()) -> any().
-to_json(Hyper) ->
- M = trunc(pow(2, Hyper#hyper.p)),
+to_json(#hyper{p = P, registers = {Mod, Registers}}) ->
+ Compact = Mod:compact(Registers),
{[
- {<<"p">>, Hyper#hyper.p},
- {<<"registers">>, encode_registers(M, Hyper#hyper.registers)}
+ {<<"p">>, P},
+ {<<"registers">>, base64:encode(
+ zlib:gzip(
+ Mod:encode_registers(Compact)))}
]}.
-spec from_json(any()) -> filter().
@@ -133,46 +143,14 @@
-spec from_json(any(), module()) -> filter().
from_json({Struct}, Mod) ->
P = proplists:get_value(<<"p">>, Struct),
- {_, Registers} = lists:foldl(fun (0, {I, A}) ->
- {I+1, A};
- (V, {I, A}) ->
- {I+1, Mod:set(I, V, A)}
- end,
- {0, Mod:new(P)},
- decode_registers(
- proplists:get_value(<<"registers">>, Struct))),
+ Bytes = zlib:gunzip(
+ base64:decode(
+ proplists:get_value(<<"registers">>, Struct))),
+ Registers = Mod:decode_registers(Bytes, P),
#hyper{p = P, registers = {Mod, Registers}}.
-encode_registers(M, Registers) ->
- ByteEncoded = encode_registers(M-1, Registers, []),
- base64:encode(
- zlib:gzip(
- iolist_to_binary(ByteEncoded))).
-
-encode_registers(I, _Registers, ByteEncoded) when I < 0 ->
- ByteEncoded;
-
-encode_registers(I, {Mod, Registers}, ByteEncoded) when I >= 0 ->
- Byte = case Mod:get(I, Registers) of
- {ok, V} ->
- <<V:8/integer>>;
- undefined ->
- <<0>>
- end,
- encode_registers(I - 1, {Mod, Registers}, [Byte | ByteEncoded]).
-
-
-decode_registers(B) ->
- ByteEncoded = zlib:gunzip(base64:decode(B)),
- decode_registers(ByteEncoded, []).
-
-decode_registers(<<>>, Acc) ->
- lists:reverse(Acc);
-decode_registers(<<I:8/integer, Rest/binary>>, Acc) ->
- decode_registers(Rest, [I | Acc]).
-
%%
%% HELPERS
%%
@@ -198,27 +176,6 @@
end.
-register_sum(I, _Mod, _Registers, Sum) when I < 0 ->
- Sum;
-register_sum(I, Mod, Registers, Sum) when I >= 0 ->
- Val = case Mod:get(I, Registers) of
- {ok, R} ->
- pow(2, -R);
- undefined ->
- pow(2, -0)
- end,
- register_sum(I - 1, Mod, Registers, Sum + Val).
-
-count_zeros(I, _Mod, _Registers, Count) when I < 0 ->
- Count;
-count_zeros(I, Mod, Registers, Count) when I >= 0 ->
- Val = case Mod:get(I, Registers) =:= undefined of
- true -> 1;
- false -> 0
- end,
- count_zeros(I - 1, Mod, Registers, Count + Val).
-
-
estimate_bias(E, P) ->
BiasVector = list_to_tuple(hyper_const:bias_data(P)),
@@ -243,60 +200,130 @@
%%
-basic_test() ->
- ?assertEqual(1, trunc(card(insert(<<"1">>, new(4, hyper_bisect))))),
- ?assertEqual(1, trunc(card(insert(<<"1">>, new(4, hyper_gb))))),
- ?assertEqual(1, trunc(card(insert(<<"1">>, new(4, hyper_array))))).
+hyper_test_() ->
+ {foreach, fun () -> ok end, fun (_) -> ok end,
+ [
+ ?_test(basic_t()),
+ ?_test(serialization_t()),
+ ?_test(backend_t()),
+ ?_test(encoding_t()),
+ ?_test(register_sum_t()),
+ {timeout, 10, ?_test(error_range_t())},
+ ?_test(many_union_t()),
+ ?_test(union_t()),
+ ?_test(small_big_union_t()),
+ ?_test(intersect_card_t())
+ ]}.
+
+basic_t() ->
+ lists:foreach(fun (Mod) ->
+ ?assertEqual(1, trunc(card(insert(<<"1">>, new(4, Mod)))))
+ end, [hyper_bisect, hyper_binary, hyper_gb, hyper_array]).
-serialization_test() ->
- Hyper = insert_many(generate_unique(10), new(14, hyper_gb)),
+serialization_t() ->
+ Mod = hyper_binary,
+ Hyper = compact(insert_many(generate_unique(10), new(5, Mod))),
- {hyper_gb, L} = Hyper#hyper.registers,
- {hyper_gb, R} = (from_json(to_json(Hyper)))#hyper.registers,
+ {hyper_binary, L} = Hyper#hyper.registers,
+ {hyper_binary, R} = (compact(from_json(to_json(Hyper), Mod)))#hyper.registers,
- ?assertEqual(trunc(card(Hyper)), trunc(card(from_json(to_json(Hyper))))),
- ?assertEqual(Hyper#hyper.p, (from_json(to_json(Hyper)))#hyper.p),
- ?assertEqual(gb_trees:to_list(L), gb_trees:to_list(R)).
+ ?assertEqual(trunc(card(Hyper)), trunc(card(from_json(to_json(Hyper), Mod)))),
+ ?assertEqual(Hyper#hyper.p, (from_json(to_json(Hyper), Mod))#hyper.p),
+ ?assertEqual(L, R).
-backend_test() ->
- Values = generate_unique(1000),
+backend_t() ->
+ Values = generate_unique(15),
- Gb = insert_many(Values, new(14, hyper_gb)),
- Array = insert_many(Values, new(14, hyper_array)),
- Bisect = insert_many(Values, new(14, hyper_bisect)),
+ Gb = compact(insert_many(Values, new(7, hyper_gb))),
+ Array = compact(insert_many(Values, new(7, hyper_array))),
+ Bisect = compact(insert_many(Values, new(7, hyper_bisect))),
+ Binary = compact(insert_many(Values, new(7, hyper_binary))),
?assertEqual(card(Gb), card(Array)),
?assertEqual(card(Gb), card(Bisect)),
+ ?assertEqual(card(Gb), card(Binary)),
+
+ {hyper_gb , GbRegisters} = Gb#hyper.registers,
+ {hyper_array , ArrayRegisters} = Array#hyper.registers,
+ {hyper_bisect, BisectRegisters} = Bisect#hyper.registers,
+ {hyper_binary, BinaryRegisters} = Binary#hyper.registers,
+
+ %% error_logger:info_msg("Gb: ~p~nArray: ~p~nBisect: ~p~nBinary: ~p~n",
+ %% [hyper_gb:encode_registers(GbRegisters),
+ %% hyper_array:encode_registers(ArrayRegisters),
+ %% hyper_bisect:encode_registers(BisectRegisters),
+ %% hyper_binary:encode_registers(BinaryRegisters)]),
+
+ ?assertEqual(hyper_gb:encode_registers(GbRegisters),
+ hyper_array:encode_registers(ArrayRegisters)),
+
+ ?assertEqual(hyper_gb:encode_registers(GbRegisters),
+ hyper_bisect:encode_registers(BisectRegisters)),
+
+ ?assertEqual(hyper_gb:encode_registers(GbRegisters),
+ hyper_binary:encode_registers(BinaryRegisters)),
+
+ {_, {GbSerialized, _}} = (from_json(to_json(Array), hyper_gb))#hyper.registers,
+ ?assertEqual(gb_trees:to_list(element(1, GbRegisters)),
+ gb_trees:to_list(GbSerialized)),
+
+ ?assertEqual(card(Gb), card(from_json(to_json(Array), hyper_gb))),
+ ?assertEqual(Array, from_json(to_json(Array), hyper_array)),
+ ?assertEqual(Array, from_json(to_json(Bisect), hyper_array)),
+ ?assertEqual(Bisect, from_json(to_json(Array), hyper_bisect)),
?assertEqual(to_json(Gb), to_json(Array)),
?assertEqual(to_json(Gb), to_json(Bisect)),
-
- ?assertEqual(Array, from_json(to_json(Array), hyper_array)),
- ?assertEqual(Array, from_json(to_json(Bisect), hyper_array)),
- ?assertEqual(Bisect, from_json(to_json(Array), hyper_bisect)).
+ ?assertEqual(to_json(Gb), to_json(Binary)).
-encoding_test() ->
- Hyper = insert_many(generate_unique(100000), new(14)),
+
+encoding_t() ->
+ Hyper = insert_many(generate_unique(10), new(4)),
?assertEqual(trunc(card(Hyper)), trunc(card(from_json(to_json(Hyper))))).
-error_range_test() ->
- Run = fun (Cardinality, P) ->
+register_sum_t() ->
+ Mods = [hyper_array, hyper_gb, hyper_bisect, hyper_binary],
+ P = 4,
+ M = trunc(math:pow(2, P)),
+
+ SetRegisters = [1, 5, 10],
+ RegisterValue = 3,
+
+ ExpectedSum =
+ (math:pow(2, -0) * M)
+ - (math:pow(2, -0) * length(SetRegisters))
+ + (math:pow(2, -RegisterValue) * length(SetRegisters)),
+
+ [begin
+ Registers = lists:foldl(fun (I, Acc) ->
+ Mod:set(I, RegisterValue, Acc)
+ end, Mod:new(P), SetRegisters),
+ ?assertEqual({Mod, ExpectedSum}, {Mod, Mod:register_sum(Registers)})
+ end || Mod <- Mods].
+
+
+error_range_t() ->
+ Mods = [hyper_gb, hyper_array, hyper_bisect, hyper_binary],
+ Run = fun (Cardinality, P, Mod) ->
lists:foldl(fun (V, H) ->
insert(V, H)
- end, new(P), generate_unique(Cardinality))
+ end, new(P, Mod), generate_unique(Cardinality))
end,
ExpectedError = 0.02,
P = 14,
- lists:foreach(fun (Card) ->
- Estimate = trunc(card(Run(Card, P))),
- ?assert(abs(Estimate - Card) < Card * ExpectedError)
- end, lists:seq(10000, 50000, 10000)).
+ random:seed(1, 2, 3),
-many_union_test() ->
+ [begin
+ Estimate = trunc(card(Run(Card, P, Mod))),
+ ?assert(abs(Estimate - Card) < Card * ExpectedError)
+ end || Card <- lists:seq(1000, 50000, 5000),
+ Mod <- Mods].
+
+many_union_t() ->
random:seed(1, 2, 3),
Card = 1000,
NumSets = 3,
@@ -312,7 +339,7 @@
-union_test() ->
+union_t() ->
random:seed(1, 2, 3),
LeftDistinct = sets:from_list(generate_unique(10000)),
@@ -333,7 +360,7 @@
sets:intersection(LeftDistinct, RightDistinct)))
< 200).
-small_big_union_test() ->
+small_big_union_t() ->
random:seed(1, 2, 3),
SmallCard = 100,
BigCard = 15000, % switches to dense at 10922 items
@@ -352,7 +379,7 @@
-intersect_card_test() ->
+intersect_card_t() ->
random:seed(1, 2, 3),
LeftDistinct = sets:from_list(generate_unique(10000)),
@@ -415,7 +442,7 @@
length(Estimations))),
- {_, Mean, _, _, Sd} = basho_stats_histogram:summary_stats(Hist),
+ {_, Mean, _, _, _} = basho_stats_histogram:summary_stats(Hist),
P99 = basho_stats_histogram:quantile(0.99, Hist),
P1 = basho_stats_histogram:quantile(0.01, Hist),
@@ -456,8 +483,8 @@
perf_report() ->
Ps = [15],
Cards = [1, 100, 1000, 5000, 10000, 15000, 25000, 50000, 100000, 1000000],
- Mods = [hyper_gb, hyper_array, hyper_bisect],
- Repeats = 10,
+ Mods = [hyper_gb, hyper_array, hyper_bisect, hyper_binary],
+ Repeats = 1,
random:seed(1, 2, 3),
@@ -471,7 +498,7 @@
end),
receive {Pid, ElapsedUs} -> ElapsedUs end
end,
- [Run() || _ <- lists:seq(1, Repeats)]
+ lists:sum([Run() || _ <- lists:seq(1, Repeats)]) / Repeats
end,
@@ -488,6 +515,9 @@
CardUs = Time(fun card/1,
[insert_many(generate_unique(Card), new(P, Mod))]),
+ ToJsonUs = Time(fun to_json/1,
+ [insert_many(generate_unique(Card), new(P, Mod))]),
+
Filter = insert_many(generate_unique(Card), new(P, Mod)),
{Mod, Registers} = Filter#hyper.registers,
@@ -497,44 +527,47 @@
Bytes = bytes(Filter),
{Mod, P, Card, Fill, Bytes,
- lists:sum(InsertUs) / Card / Repeats,
- lists:sum(UnionUs) / Repeats,
- lists:sum(CardUs) / Repeats}
+ InsertUs / Card, UnionUs, CardUs, ToJsonUs}
end || Mod <- Mods,
P <- Ps,
Card <- Cards],
- io:format("~s ~s ~s ~s ~s ~s ~s ~s~n",
+ io:format("~s ~s ~s ~s ~s ~s ~s ~s ~s~n",
[string:left("module" , 12, $ ),
- string:left("precision" , 10, $ ),
- string:right("card" , 10, $ ),
- string:right("fill" , 10, $ ),
+ string:left("P" , 4, $ ),
+ string:right("card" , 8, $ ),
+ string:right("fill" , 6, $ ),
string:right("bytes" , 10, $ ),
string:right("insert us" , 10, $ ),
string:right("union ms" , 10, $ ),
- string:right("card ms" , 10, $ )
+ string:right("card ms" , 10, $ ),
+ string:right("json ms" , 10, $ )
]),
- lists:foreach(fun ({Mod, P, Card, Fill, Bytes, AvgInsertUs, AvgUnionUs, AvgCardUs}) ->
+ lists:foreach(fun ({Mod, P, Card, Fill, Bytes,
+ AvgInsertUs, AvgUnionUs, AvgCardUs, AvgToJsonUs}) ->
M = trunc(math:pow(2, P)),
Filled = lists:flatten(io_lib:format("~.2f", [Fill / M])),
+
AvgInsertUsL = lists:flatten(
io_lib:format("~.2f", [AvgInsertUs])),
-
UnionMs = lists:flatten(
io_lib:format("~.2f", [AvgUnionUs / 1000])),
CardMs = lists:flatten(
io_lib:format("~.2f", [AvgCardUs / 1000])),
- io:format("~s ~s ~s ~s ~s ~s ~s ~s~n",
+ ToJsonMs = lists:flatten(
+ io_lib:format("~.2f", [AvgToJsonUs / 1000])),
+ io:format("~s ~s ~s ~s ~s ~s ~s ~s ~s~n",
[
string:left(atom_to_list(Mod) , 12, $ ),
- string:left(integer_to_list(P) , 10, $ ),
- string:right(integer_to_list(Card) , 10, $ ),
- string:right(Filled , 10, $ ),
+ string:left(integer_to_list(P) , 4, $ ),
+ string:right(integer_to_list(Card) , 8, $ ),
+ string:right(Filled , 6, $ ),
string:right(integer_to_list(Bytes), 10, $ ),
string:right(AvgInsertUsL , 10, $ ),
string:right(UnionMs , 10, $ ),
- string:right(CardMs , 10, $ )
+ string:right(CardMs , 10, $ ),
+ string:right(ToJsonMs , 10, $ )
])
end, R).
diff --git a/src/hyper_array.erl b/src/hyper_array.erl
index 3446a17..f4469d7 100644
--- a/src/hyper_array.erl
+++ b/src/hyper_array.erl
@@ -1,5 +1,6 @@
-module(hyper_array).
-export([new/1, get/2, set/3, fold/3, max_merge/2, bytes/1]).
+-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
-behaviour(hyper_register).
new(P) ->
@@ -35,3 +36,34 @@
bytes(A) ->
erts_debug:flat_size(A) * 8.
+
+register_sum(A) ->
+ array:foldl(fun (_, Value, Sum) ->
+ Sum + math:pow(2, -Value)
+ end, 0, A).
+
+zero_count(A) ->
+ array:foldl(fun (_, 0, Sum) -> Sum + 1;
+ (_, _, Sum) -> Sum
+ end, 0, A).
+
+compact(A) ->
+ A.
+
+encode_registers(A) ->
+ iolist_to_binary(
+ lists:reverse(
+ array:foldl(fun (_, V, Acc) -> [<<V:8/integer>> | Acc] end,
+ [], A))).
+
+decode_registers(Bytes, P) ->
+ do_decode_registers(Bytes, 0, new(P)).
+
+do_decode_registers(<<>>, _, A) ->
+ A;
+do_decode_registers(<<Value:8/integer, Rest/binary>>, I, A) ->
+ NewA = case Value of
+ 0 -> A;
+ N -> array:set(I, N, A)
+ end,
+ do_decode_registers(Rest, I+1, NewA).
diff --git a/src/hyper_binary.erl b/src/hyper_binary.erl
new file mode 100644
index 0000000..5155006
--- /dev/null
+++ b/src/hyper_binary.erl
@@ -0,0 +1,139 @@
+-module(hyper_binary).
+-include_lib("eunit/include/eunit.hrl").
+
+-export([new/1, get/2, set/3, fold/3, compact/1, max_merge/1, max_merge/2, bytes/1]).
+-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2]).
+
+-behaviour(hyper_register).
+
+-define(KEY_SIZE, 16).
+-define(REPEAT_SIZE, 16).
+-define(VALUE_SIZE, 8).
+-define(MARKER, "rle").
+
+%% TODO: rle
+
+new(P) ->
+ M = trunc(math:pow(2, P)),
+ {dense, binary:copy(<<0>>, M), [], 0}.
+
+
+set(Index, Value, {dense, B, Tmp, TmpCount}) ->
+ NewTmp = orddict:store(Index, Value, Tmp),
+ case TmpCount < 50 of
+ true ->
+ {dense, B, NewTmp, TmpCount+1};
+ false ->
+ {dense, merge_tmp(B, NewTmp), [], 0}
+ end.
+
+get(Index, {dense, B, Tmp, _TmpCount}) ->
+ case binary:at(B, Index) of
+ 0 ->
+ case orddict:find(Index, Tmp) of
+ {ok, V} ->
+ {ok, V};
+ error ->
+ undefined
+ end;
+ V ->
+ {ok, V}
+ end.
+
+compact({dense, B, Tmp, _TmpCount}) ->
+ {dense, merge_tmp(B, Tmp), [], 0}.
+
+fold(F, Acc, {dense, B, Tmp, _}) ->
+ do_fold(F, Acc, merge_tmp(B, Tmp)).
+
+max_merge([First | Rest]) ->
+ lists:foldl(fun (B, Acc) ->
+ max_merge(B, Acc)
+ end, First, Rest).
+
+max_merge({dense, Small, SmallTmp, _}, {dense, Big, BigTmp, _}) ->
+ {dense, do_merge(merge_tmp(Small, SmallTmp), merge_tmp(Big, BigTmp)), [], 0}.
+
+bytes({dense, B, _, _}) ->
+ erlang:byte_size(B).
+
+
+register_sum(B) ->
+ fold(fun (_, V, Acc) -> Acc + math:pow(2, -V) end, 0, B).
+
+zero_count(B) ->
+ fold(fun (_, 0, Acc) -> Acc + 1;
+ (_, _, Acc) -> Acc
+ end, 0, B).
+
+encode_registers({dense, B, _, _}) ->
+ B.
+
+decode_registers(Bytes, _P) ->
+ {dense, Bytes, [], 0}.
+
+
+%%
+%% INTERNALS
+%%
+
+do_merge(<<>>, <<>>) ->
+ [];
+do_merge(<<Left:?VALUE_SIZE, SmallRest/binary>>,
+ <<Right:?VALUE_SIZE, BigRest/binary>>) ->
+ [max(Left, Right) | do_merge(SmallRest, BigRest)].
+
+do_fold(F, Acc, B) ->
+ do_fold(F, Acc, B, 0).
+
+do_fold(_, Acc, <<>>, _) ->
+ Acc;
+do_fold(F, Acc, <<Value:?VALUE_SIZE/integer, Rest/binary>>, Index) ->
+ do_fold(F, F(Index, Value, Acc), Rest, Index+1).
+
+merge_tmp(B, L) ->
+ iolist_to_binary(
+ lists:reverse(
+ merge_tmp(B, lists:sort(L), -1, []))).
+
+merge_tmp(B, [], _PrevIndex, Acc) ->
+ [B | Acc];
+
+merge_tmp(B, [{Index, Value} | Rest], PrevIndex, Acc) ->
+ I = Index - PrevIndex - 1,
+ case B of
+ <<Left:I/binary, _:?VALUE_SIZE/integer, Right/binary>> ->
+ NewAcc = [<<Value:?VALUE_SIZE/integer>>, Left | Acc],
+ %% error_logger:info_msg("B: ~p~nindex: ~p, previndex: ~p, i: ~p~n"
+ %% "left: ~p~nright: ~p, right size: ~p~n"
+ %% "new acc: ~p~n",
+ %% [B, Index, PrevIndex, I, Left, Right,
+ %% byte_size(Right),
+ %% iolist_to_binary(lists:reverse(NewAcc))]),
+ merge_tmp(Right, Rest, Index, NewAcc);
+ <<Left:I/binary>> ->
+ [<<Value:?VALUE_SIZE/integer>>, Left | Acc]
+ end.
+
+
+%%
+%% TESTS
+%%
+
+
+merge_test() ->
+ Buffered = lists:foldl(fun (I, Acc) -> set(I, I, Acc) end,
+ new(4), lists:seq(0, 15)),
+ Compact = compact(Buffered),
+ ?assertEqual(undefined, get(0, Compact)),
+ ?assertEqual({ok, 3}, get(3, Compact)),
+ ?assertEqual({ok, 15}, get(15, Compact)),
+ ?assertError(badarg, get(16, Compact)).
+
+serialize_test() ->
+ H = compact(lists:foldl(fun (I, Acc) -> set(I, I, Acc) end,
+ new(4), lists:seq(0, 15))),
+ {dense, B, _, _} = H,
+ ?assertEqual(B, encode_registers(H)),
+ ?assertEqual(H, decode_registers(encode_registers(H), 4)).
+
diff --git a/src/hyper_bisect.erl b/src/hyper_bisect.erl
index 34cc7da..dd2f358 100644
--- a/src/hyper_bisect.erl
+++ b/src/hyper_bisect.erl
@@ -1,7 +1,8 @@
-module(hyper_bisect).
-include_lib("eunit/include/eunit.hrl").
--export([new/1, get/2, set/3, fold/3, max_merge/2, bytes/1]).
+-export([new/1, get/2, set/3, fold/3, max_merge/1, max_merge/2, bytes/1]).
+-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
-behaviour(hyper_register).
-define(KEY_SIZE, 16).
@@ -64,6 +65,13 @@
do_dense_fold(F, Acc, B).
+max_merge(Registers) ->
+ [First | Rest] = Registers,
+ lists:foldl(fun (R, Acc) ->
+ max_merge(R, Acc)
+ end, First, Rest).
+
+
max_merge({sparse, Small, P, T}, {sparse, Big, P, T}) ->
{sparse, bisect:merge(Small, Big), P, T};
@@ -84,10 +92,79 @@
do_dense_merge(Dense, bisect2dense(Sparse, P))))}.
+compact(B) ->
+ B.
+
bytes({sparse, Sparse, _, _}) -> bisect:size(Sparse);
bytes({dense, Dense}) -> erlang:byte_size(Dense).
+register_sum(B) ->
+ M = case B of
+ {dense, Bytes} -> erlang:byte_size(Bytes);
+ {sparse, _, P, _} -> trunc(math:pow(2, P))
+ end,
+
+ {MaxI, Sum} = fold(fun (Index, Value, {I, Acc}) ->
+ Zeros = Index - I - 1,
+ {Index, Acc + math:pow(2, -Value)
+ + (math:pow(2, -0) * Zeros)}
+ end, {-1, 0}, B),
+ Sum + (M - 1 - MaxI) * math:pow(2, -0).
+
+zero_count({dense, _} = B) ->
+ fold(fun (_, 0, Acc) -> Acc + 1;
+ (_, _, Acc) -> Acc
+ end, 0, B);
+zero_count({sparse, B, P, _}) ->
+ M = trunc(math:pow(2, P)),
+ M - bisect:num_keys(B).
+
+encode_registers({dense, B}) ->
+ B;
+encode_registers({sparse, B, P, _}) ->
+ M = trunc(math:pow(2, P)),
+ iolist_to_binary(
+ encode_registers(M-1, B, [])).
+
+
+encode_registers(I, _B, ByteEncoded) when I < 0 ->
+ ByteEncoded;
+
+encode_registers(I, B, ByteEncoded) when I >= 0 ->
+ Byte = case bisect:find(B, <<I:?KEY_SIZE/integer>>) of
+ not_found ->
+ <<0>>;
+ V ->
+ V % already encoded
+ end,
+ encode_registers(I - 1, B, [Byte | ByteEncoded]).
+
+
+
+decode_registers(Bytes, P) ->
+ DenseSize = trunc(math:pow(2, P)),
+ EntrySize = (?KEY_SIZE + ?VALUE_SIZE) div 8,
+ Threshold = DenseSize div EntrySize,
+
+ L = do_decode_registers(Bytes, 0),
+ case length(L) < Threshold of
+ true ->
+ B = bisect:new(?KEY_SIZE div 8, ?VALUE_SIZE div 8),
+ {sparse, bisect:from_orddict(B, L), P, Threshold};
+ false ->
+ {dense, Bytes}
+ end.
+
+do_decode_registers(<<>>, _) ->
+ [];
+do_decode_registers(<<0, Rest/binary>>, I) ->
+ do_decode_registers(Rest, I+1);
+do_decode_registers(<<Value:?VALUE_SIZE, Rest/binary>>, I) ->
+ [{<<I:?KEY_SIZE/integer>>, <<Value:?VALUE_SIZE/integer>>}
+ | do_decode_registers(Rest, I+1)].
+
+
%%
%% INTERNAL
%%
diff --git a/src/hyper_gb.erl b/src/hyper_gb.erl
index 9793245..6c1bcfd 100644
--- a/src/hyper_gb.erl
+++ b/src/hyper_gb.erl
@@ -1,11 +1,14 @@
-module(hyper_gb).
-export([new/1, get/2, set/3, fold/3, max_merge/2, bytes/1]).
+-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
+-include_lib("eunit/include/eunit.hrl").
+
-behaviour(hyper_register).
-new(_P) ->
- gb_trees:empty().
+new(P) ->
+ {gb_trees:empty(), trunc(math:pow(2, P))}.
-get(Index, T) ->
+get(Index, {T, _M}) ->
case gb_trees:lookup(Index, T) of
{value, V} ->
{ok, V};
@@ -13,22 +16,22 @@
undefined
end.
-set(Index, Value, T) ->
- gb_trees:enter(Index, Value, T).
+set(Index, Value, {T, M}) ->
+ {gb_trees:enter(Index, Value, T), M}.
-max_merge(Left, Right) ->
+max_merge(Small, Big) ->
fold(fun (Index, L, Registers) ->
case get(Index, Registers) of
- {ok, R} when R < L ->
- set(Index, L, Registers);
- {ok, _} ->
- Registers;
- undefined ->
- set(Index, L, Registers)
- end
- end, Right, Left).
+ {ok, R} when R < L ->
+ set(Index, L, Registers);
+ {ok, _} ->
+ Registers;
+ undefined ->
+ set(Index, L, Registers)
+ end
+ end, Big, Small).
-fold(F, A, {_, T}) when is_function(F, 3) ->
+fold(F, A, {{_, T}, _M}) when is_function(F, 3) ->
fold_1(F, A, T).
fold_1(F, Acc0, {Key, Value, Small, Big}) ->
@@ -38,5 +41,84 @@
fold_1(_, Acc, _) ->
Acc.
-bytes(T) ->
+bytes({T, _}) ->
erts_debug:flat_size(T) * 8.
+
+
+register_sum({T, M}) ->
+ {MaxI, Sum} = fold(fun (Index, Value, {I, Acc}) ->
+ Zeroes = Index - I - 1,
+ {Index, Acc + math:pow(2, -Value) +
+ (math:pow(2, -0) * Zeroes)}
+ end, {-1, 0}, {T, M}),
+ Sum + (M - 1 - MaxI) * math:pow(2, -0).
+
+
+zero_count({T, M}) ->
+ M - gb_trees:size(T).
+
+compact(T) ->
+ T.
+
+encode_registers({T, M}) ->
+ iolist_to_binary(
+ encode_registers(M-1, T, [])).
+
+encode_registers(I, _T, ByteEncoded) when I < 0 ->
+ ByteEncoded;
+
+encode_registers(I, T, ByteEncoded) when I >= 0 ->
+ Byte = case gb_trees:lookup(I, T) of
+ {value, V} ->
+ <<V:8/integer>>;
+ none ->
+ <<0>>
+ end,
+ encode_registers(I - 1, T, [Byte | ByteEncoded]).
+
+
+decode_registers(Bytes, P) ->
+ L = do_decode_registers(Bytes, 0),
+ T = gb_trees:from_orddict(L),
+ M = trunc(math:pow(2, P)),
+ {T, M}.
+
+
+do_decode_registers(<<>>, _) ->
+ [];
+do_decode_registers(<<0:8/integer, Rest/binary>>, I) ->
+ do_decode_registers(Rest, I+1);
+do_decode_registers(<<Value:8/integer, Rest/binary>>, I) ->
+ [{I, Value} | do_decode_registers(Rest, I+1)].
+
+%%
+%% TESTS
+%%
+
+sum_test() ->
+ T = set(3, 5, set(1, 1, new(4))),
+
+ ?assertEqual(lists:sum([
+ math:pow(2, -0), % 0
+ math:pow(2, -1), % 1
+ math:pow(2, -0), % 2
+ math:pow(2, -5), % 3
+ math:pow(2, -0), % 4
+ math:pow(2, -0), % 5
+ math:pow(2, -0), % 6
+ math:pow(2, -0), % 7
+ math:pow(2, -0), % 8
+ math:pow(2, -0), % 9
+ math:pow(2, -0), % 10
+ math:pow(2, -0), % 11
+ math:pow(2, -0), % 12
+ math:pow(2, -0), % 13
+ math:pow(2, -0), % 14
+ math:pow(2, -0) % 5
+ ]),
+ register_sum(T)).
+
+zero_test() ->
+ P = 4, M = 16,
+ T = set(3, 5, set(1, 1, new(P))),
+ ?assertEqual(M - 2, zero_count(T)).
diff --git a/src/hyper_register.erl b/src/hyper_register.erl
index e8ef122..b0baa18 100644
--- a/src/hyper_register.erl
+++ b/src/hyper_register.erl
@@ -4,7 +4,6 @@
-callback get(Index :: integer(), hyper:registers()) -> {ok, integer()} | undefined.
-callback set(Index :: integer(), Value :: integer(),
hyper:registers()) -> hyper:registers().
--callback fold(fun(), Acc :: any(), hyper:registers()) -> Acc :: any().
-callback max_merge(hyper:registers(), hyper:registers()) -> hyper:registers().