Optimizations for buffering writes in hyper_binary.
diff --git a/src/hyper.erl b/src/hyper.erl
index 6775b92..89e385e 100644
--- a/src/hyper.erl
+++ b/src/hyper.erl
@@ -64,7 +64,9 @@
{P, Mod}
end, Filters)) of
[{_P, Mod}] ->
- Registers = lists:map(fun (#hyper{registers = {_, R}}) ->
+ Registers = lists:map(fun (H) ->
+ Compact = compact(H),
+ #hyper{registers = {_, R}} = Compact,
R
end, Filters),
@@ -72,11 +74,8 @@
First#hyper{registers = {Mod, Mod:max_merge(Registers)}}
end.
-union(#hyper{registers = {Mod, SmallRegisters}} = Small,
- #hyper{registers = {Mod, BigRegisters}} = Big)
- when Small#hyper.p =:= Big#hyper.p ->
- NewRegisters = Mod:max_merge(SmallRegisters, BigRegisters),
- Big#hyper{registers = {Mod, NewRegisters}}.
+union(Small, Big) ->
+ union([Small, Big]).
@@ -283,7 +282,6 @@
Mods = [hyper_gb, hyper_array, hyper_bisect, hyper_binary],
Repeats = 10,
- random:seed(1, 2, 3),
Time = fun (F, Args) ->
Run = fun () ->
@@ -300,12 +298,15 @@
R = [begin
+ io:format("."),
+ random:seed(1, 2, 3),
+
M = trunc(math:pow(2, P)),
InsertUs = Time(fun (Values, H) ->
insert_many(Values, H)
end,
[generate_unique(Card), new(P, Mod)]),
- ReusableH = insert_many(generate_unique(Card), new(P, Mod)),
+ ReusableH = compact(insert_many(generate_unique(Card), new(P, Mod))),
UnionUs = Time(fun union/2,
[insert_many(generate_unique(Card div 10), new(P, Mod)),
@@ -329,7 +330,7 @@
end || Mod <- Mods,
P <- Ps,
Card <- Cards],
-
+ io:format("~n"),
io:format("~s ~s ~s ~s ~s ~s ~s ~s ~s~n",
[string:left("module" , 12, $ ),
string:left("P" , 4, $ ),
@@ -344,7 +345,6 @@
lists:foreach(fun ({Mod, P, Card, Fill, Bytes,
AvgInsertUs, AvgUnionUs, AvgCardUs, AvgToJsonUs}) ->
- M = trunc(math:pow(2, P)),
Filled = lists:flatten(io_lib:format("~.2f", [Fill])),
AvgInsertUsL = lists:flatten(
diff --git a/src/hyper_array.erl b/src/hyper_array.erl
index de94f9e..1f6367b 100644
--- a/src/hyper_array.erl
+++ b/src/hyper_array.erl
@@ -1,5 +1,5 @@
-module(hyper_array).
--export([new/1, set/3, fold/3, max_merge/2, bytes/1]).
+-export([new/1, set/3, fold/3, max_merge/1, max_merge/2, bytes/1]).
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2, compact/1]).
-behaviour(hyper_register).
@@ -18,6 +18,10 @@
fold(F, Acc, A) ->
array:sparse_foldl(F, Acc, A).
+max_merge(As) ->
+ [First | Rest] = As,
+ lists:foldl(fun max_merge/2, First, Rest).
+
max_merge(Left, Right) ->
fold(fun (Index, L, Registers) ->
case array:get(Index, Registers) of
diff --git a/src/hyper_binary.erl b/src/hyper_binary.erl
index 46238e7..4a64eb9 100644
--- a/src/hyper_binary.erl
+++ b/src/hyper_binary.erl
@@ -20,19 +20,12 @@
set(Index, Value, {dense, B, Tmp, TmpCount}) ->
case binary:at(B, Index) of
R when R < Value ->
- case lists:keyfind(Index, 1, Tmp) of
- {Index, TmpR} when TmpR > Value ->
- {dense, B, Tmp, TmpCount};
- _ ->
- New = {dense, B,
- lists:keystore(Index, 1, Tmp, {Index, Value}),
- TmpCount + 1},
- case TmpCount < 100 of
- true ->
- New;
- false ->
- compact(New)
- end
+ New = {dense, B, [{Index, Value} | Tmp], TmpCount + 1},
+ case TmpCount < 100 of
+ true ->
+ New;
+ false ->
+ compact(New)
end;
_ ->
{dense, B, Tmp, TmpCount}
@@ -40,18 +33,28 @@
compact({dense, B, Tmp, _TmpCount}) ->
- {dense, merge_tmp(B, lists:sort(Tmp)), [], 0}.
+ MaxR = lists:foldl(fun ({I, V}, Acc) ->
+ case orddict:find(I, Acc) of
+ {ok, R} when R >= V ->
+ Acc;
+ _ ->
+ orddict:store(I, V, Acc)
+ end
+ end, orddict:new(), Tmp),
+ NewB = merge_tmp(B, MaxR),
+ {dense, NewB, [], 0}.
-fold(F, Acc, {dense, B, Tmp, _}) ->
- do_fold(F, Acc, merge_tmp(B, Tmp)).
+fold(F, Acc, {dense, B, [], _}) ->
+ do_fold(F, Acc, B).
max_merge([First | Rest]) ->
lists:foldl(fun (B, Acc) ->
max_merge(B, Acc)
end, First, Rest).
-max_merge({dense, Small, SmallTmp, _}, {dense, Big, BigTmp, _}) ->
- {dense, do_merge(merge_tmp(Small, SmallTmp), merge_tmp(Big, BigTmp)), [], 0}.
+max_merge({dense, Small, [], _}, {dense, Big, [], _}) ->
+ Merged = iolist_to_binary(do_merge(Small, Big)),
+ {dense, Merged, [], 0}.
bytes({dense, B, _, _}) ->
erlang:byte_size(B).
@@ -99,6 +102,7 @@
do_fold(F, Acc, <<Value:?VALUE_SIZE/integer, Rest/binary>>, Index) ->
do_fold(F, F(Index, Value, Acc), Rest, Index+1).
+
merge_tmp(B, L) ->
iolist_to_binary(
lists:reverse(
@@ -109,6 +113,7 @@
merge_tmp(B, [{Index, Value} | Rest], PrevIndex, Acc) ->
I = Index - PrevIndex - 1,
+
case B of
<<Left:I/binary, _:?VALUE_SIZE/integer, Right/binary>> ->
NewAcc = [<<Value:?VALUE_SIZE/integer>>, Left | Acc],
diff --git a/src/hyper_bisect.erl b/src/hyper_bisect.erl
index f2f7c69..bc23396 100644
--- a/src/hyper_bisect.erl
+++ b/src/hyper_bisect.erl
@@ -19,20 +19,18 @@
{sparse, bisect:new(?KEY_SIZE div 8, ?VALUE_SIZE div 8), P, Threshold}.
-set(Index, Value, {sparse, B, P, Threshold} = S) ->
- case bisect:find(B, <<Index:?KEY_SIZE/integer>>) of
- <<R:?VALUE_SIZE/integer>> when R > Value->
- S;
- _ ->
- NewB = bisect:insert(B,
- <<Index:?KEY_SIZE/integer>>,
- <<Value:?VALUE_SIZE/integer>>),
- case bisect:num_keys(NewB) < Threshold of
- true ->
- {sparse, NewB, P, Threshold};
- false ->
- {dense, bisect2dense(NewB, P)}
- end
+set(Index, Value, {sparse, B, P, Threshold}) ->
+ V = <<Value:?VALUE_SIZE/integer>>,
+ UpdateF = fun (OldValue) when OldValue >= V -> OldValue;
+ (OldValue) when OldValue < V -> V
+ end,
+ NewB = bisect:update(B, <<Index:?KEY_SIZE/integer>>, V, UpdateF),
+
+ case bisect:num_keys(NewB) < Threshold of
+ true ->
+ {sparse, NewB, P, Threshold};
+ false ->
+ {dense, bisect2dense(NewB, P)}
end;
set(Index, Value, {dense, B} = D) ->
diff --git a/src/hyper_gb.erl b/src/hyper_gb.erl
index b344762..7627437 100644
--- a/src/hyper_gb.erl
+++ b/src/hyper_gb.erl
@@ -124,7 +124,7 @@
math:pow(2, -0), % 12
math:pow(2, -0), % 13
math:pow(2, -0), % 14
- math:pow(2, -0) % 5
+ math:pow(2, -0) % 15
]),
register_sum(T)).
diff --git a/test/hyper_test.erl b/test/hyper_test.erl
index 3d2438d..d3db19a 100644
--- a/test/hyper_test.erl
+++ b/test/hyper_test.erl
@@ -157,7 +157,8 @@
Registers = lists:foldl(fun (I, Acc) ->
Mod:set(I, RegisterValue, Acc)
end, Mod:new(P), SetRegisters),
- ?assertEqual({Mod, ExpectedSum}, {Mod, Mod:register_sum(Registers)})
+ Compact = Mod:compact(Registers),
+ ?assertEqual({Mod, ExpectedSum}, {Mod, Mod:register_sum(Compact)})
end || Mod <- Mods].
@@ -182,29 +183,40 @@
random:seed(1, 2, 3),
Card = 1000,
NumSets = 3,
- Mod = hyper_bisect,
- P = 15,
- Sets = [sets:from_list(generate_unique(Card)) || _ <- lists:seq(1, NumSets)],
- Filters = lists:map(fun (S) ->
- hyper:insert_many(sets:to_list(S),
- hyper:new(P, Mod))
- end, Sets),
+ [begin
+ M = trunc(math:pow(2, P)),
+ Error = 1.04 / math:sqrt(M),
- ExpectedFilter = hyper:compact(
- hyper:insert_many(
- lists:flatten([sets:to_list(S) || S <- Sets]),
- hyper:new(P, Mod))),
- H = hyper:compact(hyper:union(Filters)),
+ Sets = [sets:from_list(generate_unique(Card))
+ || _ <- lists:seq(1, NumSets)],
+ Filters = [hyper:insert_many(sets:to_list(S), hyper:new(P, Mod))
+ || S <- Sets],
- {Mod, ExpectedRegisters} = ExpectedFilter#hyper.registers,
- {Mod, ActualRegisters} = H#hyper.registers,
+ ExpectedFilter = hyper:compact(
+ hyper:insert_many(
+ lists:flatten([sets:to_list(S) || S <- Sets]),
+ hyper:new(P, Mod))),
+ H = hyper:union(Filters),
- ?assertEqual(Mod:encode_registers(ExpectedRegisters),
- Mod:encode_registers(ActualRegisters)),
+ {Mod, ExpectedRegisters} = ExpectedFilter#hyper.registers,
+ {Mod, ActualRegisters} = H#hyper.registers,
- ?assert(abs(sets:size(sets:union(Sets)) - hyper:card(hyper:union(Filters)))
- < (Card * NumSets) * 0.1).
+ ?assertEqual(Mod:encode_registers(ExpectedRegisters),
+ Mod:encode_registers(ActualRegisters)),
+
+ Delta = abs(sets:size(sets:union(Sets)) - hyper:card(hyper:union(Filters))),
+ case Delta < (Card * NumSets * Error) of
+ true ->
+ ok;
+ false ->
+ error_logger:info_msg("too high error, expected ~.2f%, actual ~.2f%~n"
+ "~p, p = ~p, card = ~p",
+ [Error, Delta / (Card * NumSets), Mod, P, Card]),
+ ?assert(false)
+ end
+ end || Mod <- backends(),
+ P <- [15]].
@@ -220,7 +232,7 @@
LeftHyper = hyper:insert_many(sets:to_list(LeftDistinct), hyper:new(13)),
RightHyper = hyper:insert_many(sets:to_list(RightDistinct), hyper:new(13)),
- UnionHyper = hyper:union(LeftHyper, RightHyper),
+ UnionHyper = hyper:union([LeftHyper, RightHyper]),
Intersection = hyper:card(LeftHyper)
+ hyper:card(RightHyper) - hyper:card(UnionHyper),
@@ -370,7 +382,9 @@
true ->
true;
false ->
- error_logger:info_msg("values~n~pencoded~n~p~nexpected~n~p~n",
+ error_logger:info_msg("values~n~p~n"
+ "encoded~n~p~n"
+ "expected~n~p~n",
[L,
Mod:encode_registers(R),
expected_bytes(P, Values)]),