Use (now bug-free) bisect:bulk_insert/2 for merging two bisects.
diff --git a/src/hyper_bisect.erl b/src/hyper_bisect.erl
index 54236de..f2f7c69 100644
--- a/src/hyper_bisect.erl
+++ b/src/hyper_bisect.erl
@@ -59,19 +59,31 @@
max_merge(Registers) ->
[First | Rest] = Registers,
- lists:foldl(fun (R, Acc) ->
- max_merge(R, Acc)
- end, First, Rest).
+ lists:foldl(fun max_merge/2, First, Rest).
+
max_merge({sparse, Small, P, T}, {sparse, Big, P, T}) ->
- {sparse, bisect:merge(Small, Big), P, T};
+ ToInsert = bisect:foldl(Small,
+ fun (Index, Value, Acc) ->
+ case bisect:find(Big, Index) of
+ not_found ->
+ [{Index, Value} | Acc];
+ V when V >= Value ->
+ Acc;
+ V when V < Value ->
+ [{Index, Value} | Acc]
+ end
+ end,
+ []),
+ Merged = bisect:bulk_insert(Big, lists:sort(ToInsert)),
+ {sparse, Merged, P, T};
max_merge({dense, Left}, {dense, Right}) ->
- {dense, iolist_to_binary(
- lists:reverse(
- do_dense_merge(Left, Right)))};
+ Merged = iolist_to_binary(
+ do_dense_merge(Left, Right)),
+ {dense, Merged};
max_merge({dense, Dense}, {sparse, Sparse, P, _}) ->
{dense, iolist_to_binary(
@@ -117,20 +129,7 @@
encode_registers({sparse, B, P, _}) ->
M = trunc(math:pow(2, P)),
iolist_to_binary(
- encode_registers(M-1, B, [])).
-
-
-encode_registers(I, _B, ByteEncoded) when I < 0 ->
- ByteEncoded;
-
-encode_registers(I, B, ByteEncoded) when I >= 0 ->
- Byte = case bisect:find(B, <<I:?KEY_SIZE/integer>>) of
- not_found ->
- <<0>>;
- V ->
- V % already encoded
- end,
- encode_registers(I - 1, B, [Byte | ByteEncoded]).
+ do_encode_registers(M-1, B, [])).
@@ -153,14 +152,6 @@
{dense, Bytes}
end.
-do_decode_registers(<<>>, _) ->
- [];
-do_decode_registers(<<0, Rest/binary>>, I) ->
- do_decode_registers(Rest, I+1);
-do_decode_registers(<<Value:?VALUE_SIZE, Rest/binary>>, I) ->
- [{<<I:?KEY_SIZE/integer>>, <<Value:?VALUE_SIZE/integer>>}
- | do_decode_registers(Rest, I+1)].
-
%%
%% INTERNAL
@@ -168,7 +159,8 @@
do_dense_merge(<<>>, <<>>) ->
[];
-do_dense_merge(<<Left, LeftRest/binary>>, <<Right, RightRest/binary>>) ->
+do_dense_merge(<<Left:?VALUE_SIZE/integer, LeftRest/binary>>,
+ <<Right:?VALUE_SIZE/integer, RightRest/binary>>) ->
[max(Left, Right) | do_dense_merge(LeftRest, RightRest)].
@@ -209,6 +201,28 @@
iolist_to_binary(lists:reverse(Filled)).
+do_encode_registers(I, _B, ByteEncoded) when I < 0 ->
+ ByteEncoded;
+do_encode_registers(I, B, ByteEncoded) when I >= 0 ->
+ Byte = case bisect:find(B, <<I:?KEY_SIZE/integer>>) of
+ not_found ->
+ <<0>>;
+ V ->
+ V % already encoded
+ end,
+ do_encode_registers(I - 1, B, [Byte | ByteEncoded]).
+
+
+do_decode_registers(<<>>, _) ->
+ [];
+do_decode_registers(<<0, Rest/binary>>, I) ->
+ do_decode_registers(Rest, I+1);
+do_decode_registers(<<Value:?VALUE_SIZE, Rest/binary>>, I) ->
+ [{<<I:?KEY_SIZE/integer>>, <<Value:?VALUE_SIZE/integer>>}
+ | do_decode_registers(Rest, I+1)].
+
+
+
%%
%% TESTS
%%
diff --git a/src/hyper_gb.erl b/src/hyper_gb.erl
index 18c0870..b344762 100644
--- a/src/hyper_gb.erl
+++ b/src/hyper_gb.erl
@@ -1,5 +1,5 @@
-module(hyper_gb).
--export([new/1, set/3, fold/3, max_merge/2, bytes/1]).
+-export([new/1, set/3, max_merge/1, max_merge/2, bytes/1]).
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
-include_lib("eunit/include/eunit.hrl").
@@ -24,6 +24,10 @@
{gb_trees:enter(Index, Value, T), M}
end.
+max_merge(Registers) ->
+ [First | Rest] = Registers,
+ lists:foldl(fun (R, Acc) -> max_merge(R, Acc) end,
+ First, Rest).
max_merge(Small, Big) ->
fold(fun (Index, L, Registers) ->
diff --git a/test/hyper_test.erl b/test/hyper_test.erl
index d56d0f8..3d2438d 100644
--- a/test/hyper_test.erl
+++ b/test/hyper_test.erl
@@ -182,13 +182,27 @@
random:seed(1, 2, 3),
Card = 1000,
NumSets = 3,
+ Mod = hyper_bisect,
+ P = 15,
Sets = [sets:from_list(generate_unique(Card)) || _ <- lists:seq(1, NumSets)],
Filters = lists:map(fun (S) ->
hyper:insert_many(sets:to_list(S),
- hyper:new(10, hyper_bisect))
+ hyper:new(P, Mod))
end, Sets),
+ ExpectedFilter = hyper:compact(
+ hyper:insert_many(
+ lists:flatten([sets:to_list(S) || S <- Sets]),
+ hyper:new(P, Mod))),
+ H = hyper:compact(hyper:union(Filters)),
+
+ {Mod, ExpectedRegisters} = ExpectedFilter#hyper.registers,
+ {Mod, ActualRegisters} = H#hyper.registers,
+
+ ?assertEqual(Mod:encode_registers(ExpectedRegisters),
+ Mod:encode_registers(ActualRegisters)),
+
?assert(abs(sets:size(sets:union(Sets)) - hyper:card(hyper:union(Filters)))
< (Card * NumSets) * 0.1).