blob: 4a64eb9bb7e270cd7eb11e7633dab206dbc47c4e [file] [log] [blame]
-module(hyper_binary).
-include_lib("eunit/include/eunit.hrl").
-export([new/1, set/3, fold/3, compact/1, max_merge/1, max_merge/2, bytes/1]).
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2]).
-behaviour(hyper_register).
-define(KEY_SIZE, 16).
-define(REPEAT_SIZE, 16).
-define(VALUE_SIZE, 8).
-define(MARKER, "rle").
%% TODO: rle
new(P) ->
M = trunc(math:pow(2, P)),
{dense, binary:copy(<<0>>, M), [], 0}.
set(Index, Value, {dense, B, Tmp, TmpCount}) ->
case binary:at(B, Index) of
R when R < Value ->
New = {dense, B, [{Index, Value} | Tmp], TmpCount + 1},
case TmpCount < 100 of
true ->
New;
false ->
compact(New)
end;
_ ->
{dense, B, Tmp, TmpCount}
end.
compact({dense, B, Tmp, _TmpCount}) ->
MaxR = lists:foldl(fun ({I, V}, Acc) ->
case orddict:find(I, Acc) of
{ok, R} when R >= V ->
Acc;
_ ->
orddict:store(I, V, Acc)
end
end, orddict:new(), Tmp),
NewB = merge_tmp(B, MaxR),
{dense, NewB, [], 0}.
fold(F, Acc, {dense, B, [], _}) ->
do_fold(F, Acc, B).
max_merge([First | Rest]) ->
lists:foldl(fun (B, Acc) ->
max_merge(B, Acc)
end, First, Rest).
max_merge({dense, Small, [], _}, {dense, Big, [], _}) ->
Merged = iolist_to_binary(do_merge(Small, Big)),
{dense, Merged, [], 0}.
bytes({dense, B, _, _}) ->
erlang:byte_size(B).
register_sum(B) ->
fold(fun (_, V, Acc) -> Acc + math:pow(2, -V) end, 0, B).
zero_count(B) ->
fold(fun (_, 0, Acc) -> Acc + 1;
(_, _, Acc) -> Acc
end, 0, B).
encode_registers({dense, B, _, _}) ->
B.
decode_registers(Bytes, P) ->
M = m(P),
case Bytes of
<<B:M/binary>> ->
{dense, B, [], 0};
<<B:M/binary, 0>> ->
{dense, B, [], 0}
end.
%%
%% INTERNALS
%%
m(P) ->
trunc(math:pow(2, P)).
do_merge(<<>>, <<>>) ->
[];
do_merge(<<Left:?VALUE_SIZE, SmallRest/binary>>,
<<Right:?VALUE_SIZE, BigRest/binary>>) ->
[max(Left, Right) | do_merge(SmallRest, BigRest)].
do_fold(F, Acc, B) ->
do_fold(F, Acc, B, 0).
do_fold(_, Acc, <<>>, _) ->
Acc;
do_fold(F, Acc, <<Value:?VALUE_SIZE/integer, Rest/binary>>, Index) ->
do_fold(F, F(Index, Value, Acc), Rest, Index+1).
merge_tmp(B, L) ->
iolist_to_binary(
lists:reverse(
merge_tmp(B, lists:sort(L), -1, []))).
merge_tmp(B, [], _PrevIndex, Acc) ->
[B | Acc];
merge_tmp(B, [{Index, Value} | Rest], PrevIndex, Acc) ->
I = Index - PrevIndex - 1,
case B of
<<Left:I/binary, _:?VALUE_SIZE/integer, Right/binary>> ->
NewAcc = [<<Value:?VALUE_SIZE/integer>>, Left | Acc],
%% error_logger:info_msg("B: ~p~nindex: ~p, previndex: ~p, i: ~p~n"
%% "left: ~p~nright: ~p, right size: ~p~n"
%% "new acc: ~p~n",
%% [B, Index, PrevIndex, I, Left, Right,
%% byte_size(Right),
%% iolist_to_binary(lists:reverse(NewAcc))]),
merge_tmp(Right, Rest, Index, NewAcc);
<<Left:I/binary>> ->
[<<Value:?VALUE_SIZE/integer>>, Left | Acc]
end.
%%
%% TESTS
%%
merge_test() ->
Tmp1 = [{1, 1},
{3, 3},
{9, 3},
{15, 15}],
Tmp2 = [{3, 5},
{9, 2},
{10, 5}],
{dense, NewB, [], 0} = new(4),
{dense, Compact, [], 0} = compact({dense, NewB, Tmp1, length(Tmp1)}),
{dense, Compact2, [], 0} = compact({dense, Compact, Tmp2, length(Tmp2)}),
?assertEqual(<<0, 1, 0, 5,
0, 0, 0, 0,
0, 2, 5, 0,
0, 0, 0, 15>>, Compact2).
serialize_test() ->
H = compact(lists:foldl(fun (I, Acc) -> set(I, I, Acc) end,
new(4), lists:seq(0, 15))),
{dense, B, _, _} = H,
?assertEqual(B, encode_registers(H)),
?assertEqual(H, decode_registers(encode_registers(H), 4)).