blob: 94150418e9eb24ee3d176cbae58f129a6127edae [file] [log] [blame]
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.
%% @doc Data structure used to represent document edit histories.
%% A key tree is used to represent the edit history of a document. Each node of
%% the tree represents a particular version. Relations between nodes represent
%% the order that these edits were applied. For instance, a set of three edits
%% would produce a tree of versions A->B->C indicating that edit C was based on
%% version B which was in turn based on A. In a world without replication (and
%% no ability to disable MVCC checks), all histories would be forced to be
%% linear lists of edits due to constraints imposed by MVCC (ie, new edits must
%% be based on the current version). However, we have replication, so we must
%% deal with not so easy cases, which lead to trees.
%%
%% Consider a document in state A. This doc is replicated to a second node. We
%% then edit the document on each node leaving it in two different states, B
%% and C. We now have two key trees, A->B and A->C. When we go to replicate a
%% second time, the key tree must combine these two trees which gives us
%% A->(B|C). This is how conflicts are introduced. In terms of the key tree, we
%% say that we have two leaves (B and C) that are not deleted. The presense of
%% the multiple leaves indicate conflict. To remove a conflict, one of the
%% edits (B or C) can be deleted, which results in, A->(B|C->D) where D is an
%% edit that is specially marked with the a deleted=true flag.
%%
%% What makes this a bit more complicated is that there is a limit to the
%% number of revisions kept, specified in couch_db.hrl (default is 1000). When
%% this limit is exceeded only the last 1000 are kept. This comes in to play
%% when branches are merged. The comparison has to begin at the same place in
%% the branches. A revision id is of the form N-XXXXXXX where N is the current
%% revision depth. So each path will have a start number, calculated in
%% couch_doc:to_path using the formula N - length(RevIds) + 1 So, .eg. if a doc
%% was edit 1003 times this start number would be 4, indicating that 3
%% revisions were truncated.
%%
%% This comes into play in @see merge_at/3 which recursively walks down one
%% tree or the other until they begin at the same revision.
-module(couch_key_tree).
-export([
count_leafs/1,
find_missing/2,
fold/3,
get/2,
get_all_leafs/1,
get_all_leafs_full/1,
get_full_key_paths/2,
get_key_leafs/2,
map/2,
map_leafs/2,
mapfold/3,
multi_merge/2,
merge/2,
remove_leafs/2,
stem/2
]).
-include_lib("couch/include/couch_db.hrl").
-type treenode() :: {Key::term(), Value::term(), [Node::treenode()]}.
-type tree() :: {Depth::pos_integer(), [treenode()]}.
-type revtree() :: [tree()].
%% @doc Merge multiple paths into the given tree.
-spec multi_merge(revtree(), tree()) -> revtree().
multi_merge(RevTree, Trees) ->
lists:foldl(fun(Tree, RevTreeAcc) ->
{NewRevTree, _} = merge(RevTreeAcc, Tree),
NewRevTree
end, RevTree, lists:sort(Trees)).
%% @doc Merge a path into a tree.
-spec merge(revtree(), tree() | path()) ->
{revtree(), new_leaf | new_branch | internal_node}.
merge(RevTree, Tree) ->
{Merged, Result} = merge_tree(RevTree, Tree, []),
{lists:sort(Merged), Result}.
%% @private
%% @doc Attempt to merge Tree into each branch of the RevTree.
%% If it can't find a branch that the new tree merges into, add it as a
%% new branch in the RevTree.
-spec merge_tree(revtree(), tree() | path(), revtree()) ->
{revtree(), new_leaf | new_branch | internal_node}.
merge_tree([], Tree, []) ->
{[Tree], new_leaf};
merge_tree([], Tree, MergeAcc) ->
{[Tree|MergeAcc], new_branch};
merge_tree([{Depth, Nodes} | Rest], {IDepth, INodes}=Tree, MergeAcc) ->
% For the intrepid observer following along at home, notice what we're
% doing here with (Depth - IDepth). This tells us which of the two
% branches (Nodes or INodes) we need to seek into. If Depth > IDepth
% that means we need go into INodes to find where we line up with
% Nodes. If Depth < IDepth, its obviously the other way. If it turns
% out that (Depth - IDepth) == 0, then we know that this is where
% we begin our actual merge operation (ie, looking for key matches).
% Its helpful to note that this whole moving into sub-branches is due
% to how we store trees that have been stemmed. When a path is
% stemmed so that the root node is lost, we wrap it in a tuple with
% the number keys that have been droped. This number is the depth
% value that's used throughout this module.
case merge_at([Nodes], Depth - IDepth, [INodes]) of
{[Merged], Result} ->
NewDepth = erlang:min(Depth, IDepth),
{Rest ++ [{NewDepth, Merged} | MergeAcc], Result};
fail ->
merge_tree(Rest, Tree, [{Depth, Nodes} | MergeAcc])
end.
%% @private
%% @doc Locate the point at which merging can start.
%% Because of stemming we may need to seek into one of the branches
%% before we can start comparing node keys. If one of the branches
%% ends up running out of nodes we know that these two branches can
%% not be merged.
-spec merge_at([node()], integer(), [node()]) ->
{revtree(), new_leaf | new_branch | internal_node} | fail.
merge_at(_Nodes, _Pos, []) ->
fail;
merge_at([], _Pos, _INodes) ->
fail;
merge_at(Nodes, Pos, [{IK, IV, [NextINode]}]) when Pos > 0 ->
% Depth was bigger than IDepth, so we need to discard from the
% insert path to find where it might start matching.
case merge_at(Nodes, Pos - 1, [NextINode]) of
{Merged, Result} -> {[{IK, IV, Merged}], Result};
fail -> fail
end;
merge_at(_Nodes, Pos, [{_IK, _IV, []}]) when Pos > 0 ->
% We've run out of path on the insert side, there's no way we can
% merge with this branch
fail;
merge_at([{K, V, SubTree} | Sibs], Pos, INodes) when Pos < 0 ->
% When Pos is negative, Depth was less than IDepth, so we
% need to discard from the revision tree path
case merge_at(SubTree, Pos + 1, INodes) of
{Merged, Result} ->
{[{K, V, Merged} | Sibs], Result};
fail ->
% Merging along the subtree failed. We need to also try
% merging the insert branch against the siblings of this
% node.
case merge_at(Sibs, Pos, INodes) of
{Merged, Result} -> {[{K, V, SubTree} | Merged], Result};
fail -> fail
end
end;
merge_at([{K, V1, Nodes} | Sibs], 0, [{K, V2, INodes}]) ->
% Keys are equal. At this point we have found a possible starting
% position for our merge to take place.
{Merged, Result} = merge_extend(Nodes, INodes),
{[{K, value_pref(V1, V2), Merged} | Sibs], Result};
merge_at([{K1, _, _} | _], 0, [{K2, _, _}]) when K1 > K2 ->
% Siblings keys are ordered, no point in continuing
fail;
merge_at([Tree | Sibs], 0, INodes) ->
% INodes key comes after this key, so move on to the next sibling.
case merge_at(Sibs, 0, INodes) of
{Merged, Result} -> {[Tree | Merged], Result};
fail -> fail
end.
-spec merge_extend(revtree(), revtree()) ->
{revtree(), new_leaf | new_branch | internal_node}.
merge_extend([], B) when B =/= [] ->
% Most likely the insert branch simply extends this one, so the new
% branch is exactly B. Its also possible that B is a branch because
% its key sorts greater than all siblings of an internal node. This
% condition is checked in the last clause of this function and the
% new_leaf result is fixed to be new_branch.
{B, new_leaf};
merge_extend(A, []) ->
% Insert branch ends an internal node in our original revtree()
% so the end result is exactly our original revtree.
{A, internal_node};
merge_extend([{K, V1, SubA} | NextA], [{K, V2, SubB}]) ->
% Here we're simply extending the path to the next deeper
% level in the two branches.
{Merged, Result} = merge_extend(SubA, SubB),
{[{K, value_pref(V1, V2), Merged} | NextA], Result};
merge_extend([{K1, _, _}=NodeA | Rest], [{K2, _, _}=NodeB]) when K1 > K2 ->
% Keys are ordered so we know this is where the insert branch needs
% to be inserted into the tree. We also know that this creates a new
% branch so we have a new leaf to report.
{[NodeB, NodeA | Rest], new_branch};
merge_extend([Tree | RestA], NextB) ->
% Here we're moving on to the next sibling to try and extend our
% merge even deeper. The length check is due to the fact that the
% key in NextB might be larger than the largest key in RestA which
% means we've created a new branch.
{Merged, Result0} = merge_extend(RestA, NextB),
Result = case length(Merged) == length(RestA) of
true -> Result0;
false -> new_branch
end,
{[Tree | Merged], Result}.
find_missing(_Tree, []) ->
[];
find_missing([], SeachKeys) ->
SeachKeys;
find_missing([{Start, {Key, Value, SubTree}} | RestTree], SeachKeys) ->
PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Start],
ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Start],
Missing = find_missing_simple(Start, [{Key, Value, SubTree}], PossibleKeys),
find_missing(RestTree, ImpossibleKeys ++ Missing).
find_missing_simple(_Pos, _Tree, []) ->
[];
find_missing_simple(_Pos, [], SeachKeys) ->
SeachKeys;
find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SeachKeys) ->
PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Pos],
ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Pos],
SrcKeys2 = PossibleKeys -- [{Pos, Key}],
SrcKeys3 = find_missing_simple(Pos + 1, SubTree, SrcKeys2),
ImpossibleKeys ++ find_missing_simple(Pos, RestTree, SrcKeys3).
filter_leafs([], _Keys, FilteredAcc, RemovedKeysAcc) ->
{FilteredAcc, RemovedKeysAcc};
filter_leafs([{Pos, [{LeafKey, _}|_]} = Path |Rest], Keys, FilteredAcc, RemovedKeysAcc) ->
FilteredKeys = lists:delete({Pos, LeafKey}, Keys),
if FilteredKeys == Keys ->
% this leaf is not a key we are looking to remove
filter_leafs(Rest, Keys, [Path | FilteredAcc], RemovedKeysAcc);
true ->
% this did match a key, remove both the node and the input key
filter_leafs(Rest, FilteredKeys, FilteredAcc, [{Pos, LeafKey} | RemovedKeysAcc])
end.
% Removes any branches from the tree whose leaf node(s) are in the Keys
remove_leafs(Trees, Keys) ->
% flatten each branch in a tree into a tree path
Paths = get_all_leafs_full(Trees),
% filter out any that are in the keys list.
{FilteredPaths, RemovedKeys} = filter_leafs(Paths, Keys, [], []),
SortedPaths = lists:sort(
[{Pos + 1 - length(Path), Path} || {Pos, Path} <- FilteredPaths]
),
% convert paths back to trees
NewTree = lists:foldl(
fun({StartPos, Path},TreeAcc) ->
[SingleTree] = lists:foldl(
fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path),
{NewTrees, _} = merge(TreeAcc, {StartPos, SingleTree}),
NewTrees
end, [], SortedPaths),
{NewTree, RemovedKeys}.
% get the leafs in the tree matching the keys. The matching key nodes can be
% leafs or an inner nodes. If an inner node, then the leafs for that node
% are returned.
get_key_leafs(Tree, Keys) ->
get_key_leafs(Tree, Keys, []).
get_key_leafs(_, [], Acc) ->
{Acc, []};
get_key_leafs([], Keys, Acc) ->
{Acc, Keys};
get_key_leafs([{Pos, Tree}|Rest], Keys, Acc) ->
{Gotten, RemainingKeys} = get_key_leafs_simple(Pos, [Tree], Keys, []),
get_key_leafs(Rest, RemainingKeys, Gotten ++ Acc).
get_key_leafs_simple(_Pos, _Tree, [], _PathAcc) ->
{[], []};
get_key_leafs_simple(_Pos, [], Keys, _PathAcc) ->
{[], Keys};
get_key_leafs_simple(Pos, [{Key, _, SubTree}=Tree | RestTree], Keys, PathAcc) ->
case lists:delete({Pos, Key}, Keys) of
Keys ->
% Same list, key not found
NewPathAcc = [Key | PathAcc],
{ChildLeafs, Keys2} = get_key_leafs_simple(Pos + 1, SubTree, Keys, NewPathAcc),
{SiblingLeafs, Keys3} = get_key_leafs_simple(Pos, RestTree, Keys2, PathAcc),
{ChildLeafs ++ SiblingLeafs, Keys3};
Keys2 ->
% This is a key we were looking for, get all descendant
% leafs while removing any requested key we find. Notice
% that this key will be returned by get_key_leafs_simple2
% if it's a leaf so there's no need to return it here.
{ChildLeafs, Keys3} = get_key_leafs_simple2(Pos, [Tree], Keys2, PathAcc),
{SiblingLeafs, Keys4} = get_key_leafs_simple(Pos, RestTree, Keys3, PathAcc),
{ChildLeafs ++ SiblingLeafs, Keys4}
end.
get_key_leafs_simple2(_Pos, [], Keys, _PathAcc) ->
% No more tree to deal with so no more keys to return.
{[], Keys};
get_key_leafs_simple2(Pos, [{Key, Value, []} | RestTree], Keys, PathAcc) ->
% This is a leaf as defined by having an empty list of
% child nodes. The assertion is a bit subtle but the function
% clause match means its a leaf.
Keys2 = lists:delete({Pos, Key}, Keys),
{SiblingLeafs, Keys3} = get_key_leafs_simple2(Pos, RestTree, Keys2, PathAcc),
{[{Value, {Pos, [Key | PathAcc]}} | SiblingLeafs], Keys3};
get_key_leafs_simple2(Pos, [{Key, _Value, SubTree} | RestTree], Keys, PathAcc) ->
% This isn't a leaf. Recurse into the subtree and then
% process any sibling branches.
Keys2 = lists:delete({Pos, Key}, Keys),
NewPathAcc = [Key | PathAcc],
{ChildLeafs, Keys3} = get_key_leafs_simple2(Pos + 1, SubTree, Keys2, NewPathAcc),
{SiblingLeafs, Keys4} = get_key_leafs_simple2(Pos, RestTree, Keys3, PathAcc),
{ChildLeafs ++ SiblingLeafs, Keys4}.
get(Tree, KeysToGet) ->
{KeyPaths, KeysNotFound} = get_full_key_paths(Tree, KeysToGet),
FixedResults = [ {Value, {Pos, [Key0 || {Key0, _} <- Path]}} || {Pos, [{_Key, Value}|_]=Path} <- KeyPaths],
{FixedResults, KeysNotFound}.
get_full_key_paths(Tree, Keys) ->
get_full_key_paths(Tree, Keys, []).
get_full_key_paths(_, [], Acc) ->
{Acc, []};
get_full_key_paths([], Keys, Acc) ->
{Acc, Keys};
get_full_key_paths([{Pos, Tree}|Rest], Keys, Acc) ->
{Gotten, RemainingKeys} = get_full_key_paths(Pos, [Tree], Keys, []),
get_full_key_paths(Rest, RemainingKeys, Gotten ++ Acc).
get_full_key_paths(_Pos, _Tree, [], _KeyPathAcc) ->
{[], []};
get_full_key_paths(_Pos, [], KeysToGet, _KeyPathAcc) ->
{[], KeysToGet};
get_full_key_paths(Pos, [{KeyId, Value, SubTree} | RestTree], KeysToGet, KeyPathAcc) ->
KeysToGet2 = KeysToGet -- [{Pos, KeyId}],
CurrentNodeResult =
case length(KeysToGet2) =:= length(KeysToGet) of
true -> % not in the key list.
[];
false -> % this node is the key list. return it
[{Pos, [{KeyId, Value} | KeyPathAcc]}]
end,
{KeysGotten, KeysRemaining} = get_full_key_paths(Pos + 1, SubTree, KeysToGet2, [{KeyId, Value} | KeyPathAcc]),
{KeysGotten2, KeysRemaining2} = get_full_key_paths(Pos, RestTree, KeysRemaining, KeyPathAcc),
{CurrentNodeResult ++ KeysGotten ++ KeysGotten2, KeysRemaining2}.
get_all_leafs_full(Tree) ->
get_all_leafs_full(Tree, []).
get_all_leafs_full([], Acc) ->
Acc;
get_all_leafs_full([{Pos, Tree} | Rest], Acc) ->
get_all_leafs_full(Rest, get_all_leafs_full_simple(Pos, [Tree], []) ++ Acc).
get_all_leafs_full_simple(_Pos, [], _KeyPathAcc) ->
[];
get_all_leafs_full_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) ->
[{Pos, [{KeyId, Value} | KeyPathAcc]} | get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc)];
get_all_leafs_full_simple(Pos, [{KeyId, Value, SubTree} | RestTree], KeyPathAcc) ->
get_all_leafs_full_simple(Pos + 1, SubTree, [{KeyId, Value} | KeyPathAcc]) ++ get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc).
get_all_leafs(Trees) ->
get_all_leafs(Trees, []).
get_all_leafs([], Acc) ->
Acc;
get_all_leafs([{Pos, Tree}|Rest], Acc) ->
get_all_leafs(Rest, get_all_leafs_simple(Pos, [Tree], []) ++ Acc).
get_all_leafs_simple(_Pos, [], _KeyPathAcc) ->
[];
get_all_leafs_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) ->
[{Value, {Pos, [KeyId | KeyPathAcc]}} | get_all_leafs_simple(Pos, RestTree, KeyPathAcc)];
get_all_leafs_simple(Pos, [{KeyId, _Value, SubTree} | RestTree], KeyPathAcc) ->
get_all_leafs_simple(Pos + 1, SubTree, [KeyId | KeyPathAcc]) ++ get_all_leafs_simple(Pos, RestTree, KeyPathAcc).
count_leafs([]) ->
0;
count_leafs([{_Pos,Tree}|Rest]) ->
count_leafs_simple([Tree]) + count_leafs(Rest).
count_leafs_simple([]) ->
0;
count_leafs_simple([{_Key, _Value, []} | RestTree]) ->
1 + count_leafs_simple(RestTree);
count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) ->
count_leafs_simple(SubTree) + count_leafs_simple(RestTree).
fold(_Fun, Acc, []) ->
Acc;
fold(Fun, Acc0, [{Pos, Tree}|Rest]) ->
Acc1 = fold_simple(Fun, Acc0, Pos, [Tree]),
fold(Fun, Acc1, Rest).
fold_simple(_Fun, Acc, _Pos, []) ->
Acc;
fold_simple(Fun, Acc0, Pos, [{Key, Value, SubTree} | RestTree]) ->
Type = if SubTree == [] -> leaf; true -> branch end,
Acc1 = Fun({Pos, Key}, Value, Type, Acc0),
Acc2 = fold_simple(Fun, Acc1, Pos+1, SubTree),
fold_simple(Fun, Acc2, Pos, RestTree).
map(_Fun, []) ->
[];
map(Fun, [{Pos, Tree}|Rest]) ->
case erlang:fun_info(Fun, arity) of
{arity, 2} ->
[NewTree] = map_simple(fun(A,B,_C) -> Fun(A,B) end, Pos, [Tree]),
[{Pos, NewTree} | map(Fun, Rest)];
{arity, 3} ->
[NewTree] = map_simple(Fun, Pos, [Tree]),
[{Pos, NewTree} | map(Fun, Rest)]
end.
map_simple(_Fun, _Pos, []) ->
[];
map_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) ->
Value2 = Fun({Pos, Key}, Value,
if SubTree == [] -> leaf; true -> branch end),
[{Key, Value2, map_simple(Fun, Pos + 1, SubTree)} | map_simple(Fun, Pos, RestTree)].
mapfold(_Fun, Acc, []) ->
{[], Acc};
mapfold(Fun, Acc, [{Pos, Tree} | Rest]) ->
{[NewTree], Acc2} = mapfold_simple(Fun, Acc, Pos, [Tree]),
{Rest2, Acc3} = mapfold(Fun, Acc2, Rest),
{[{Pos, NewTree} | Rest2], Acc3}.
mapfold_simple(_Fun, Acc, _Pos, []) ->
{[], Acc};
mapfold_simple(Fun, Acc, Pos, [{Key, Value, SubTree} | RestTree]) ->
{Value2, Acc2} = Fun({Pos, Key}, Value,
if SubTree == [] -> leaf; true -> branch end, Acc),
{SubTree2, Acc3} = mapfold_simple(Fun, Acc2, Pos + 1, SubTree),
{RestTree2, Acc4} = mapfold_simple(Fun, Acc3, Pos, RestTree),
{[{Key, Value2, SubTree2} | RestTree2], Acc4}.
map_leafs(_Fun, []) ->
[];
map_leafs(Fun, [{Pos, Tree}|Rest]) ->
[NewTree] = map_leafs_simple(Fun, Pos, [Tree]),
[{Pos, NewTree} | map_leafs(Fun, Rest)].
map_leafs_simple(_Fun, _Pos, []) ->
[];
map_leafs_simple(Fun, Pos, [{Key, Value, []} | RestTree]) ->
Value2 = Fun({Pos, Key}, Value),
[{Key, Value2, []} | map_leafs_simple(Fun, Pos, RestTree)];
map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) ->
[{Key, Value, map_leafs_simple(Fun, Pos + 1, SubTree)} | map_leafs_simple(Fun, Pos, RestTree)].
stem(Trees, Limit) ->
try
{_, Branches} = lists:foldl(fun(Tree, {Seen, TreeAcc}) ->
{NewSeen, NewBranches} = stem_tree(Tree, Limit, Seen),
{NewSeen, NewBranches ++ TreeAcc}
end, {sets:new(), []}, Trees),
lists:sort(Branches)
catch throw:dupe_keys ->
repair_tree(Trees, Limit)
end.
stem_tree({Depth, Child}, Limit, Seen) ->
case stem_tree(Depth, Child, Limit, Seen) of
{NewSeen, _, NewChild, NewBranches} ->
{NewSeen, [{Depth, NewChild} | NewBranches]};
{NewSeen, _, NewBranches} ->
{NewSeen, NewBranches}
end.
stem_tree(_Depth, {Key, _Val, []} = Leaf, Limit, Seen) ->
{check_key(Key, Seen), Limit - 1, Leaf, []};
stem_tree(Depth, {Key, Val, Children}, Limit, Seen0) ->
Seen1 = check_key(Key, Seen0),
FinalAcc = lists:foldl(fun(Child, Acc) ->
{SeenAcc, LimitPosAcc, ChildAcc, BranchAcc} = Acc,
case stem_tree(Depth + 1, Child, Limit, SeenAcc) of
{NewSeenAcc, LimitPos, NewChild, NewBranches} ->
NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc),
NewChildAcc = [NewChild | ChildAcc],
NewBranchAcc = NewBranches ++ BranchAcc,
{NewSeenAcc, NewLimitPosAcc, NewChildAcc, NewBranchAcc};
{NewSeenAcc, LimitPos, NewBranches} ->
NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc),
NewBranchAcc = NewBranches ++ BranchAcc,
{NewSeenAcc, NewLimitPosAcc, ChildAcc, NewBranchAcc}
end
end, {Seen1, -1, [], []}, Children),
{FinalSeen, FinalLimitPos, FinalChildren, FinalBranches} = FinalAcc,
case FinalLimitPos of
N when N > 0, length(FinalChildren) > 0 ->
FinalNode = {Key, Val, lists:reverse(FinalChildren)},
{FinalSeen, FinalLimitPos - 1, FinalNode, FinalBranches};
0 when length(FinalChildren) > 0 ->
NewBranches = lists:map(fun(Child) ->
{Depth + 1, Child}
end, lists:reverse(FinalChildren)),
{FinalSeen, -1, NewBranches ++ FinalBranches};
N when N < 0, length(FinalChildren) == 0 ->
{FinalSeen, FinalLimitPos - 1, FinalBranches}
end.
check_key(Key, Seen) ->
case sets:is_element(Key, Seen) of
true ->
throw(dupe_keys);
false ->
sets:add_element(Key, Seen)
end.
repair_tree(Trees, Limit) ->
% flatten each branch in a tree into a tree path, sort by starting rev #
Paths = lists:sort(lists:map(fun({Pos, Path}) ->
StemmedPath = lists:sublist(Path, Limit),
{Pos + 1 - length(StemmedPath), StemmedPath}
end, get_all_leafs_full(Trees))),
% convert paths back to trees
lists:foldl(
fun({StartPos, Path},TreeAcc) ->
[SingleTree] = lists:foldl(
fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path),
{NewTrees, _} = merge(TreeAcc, {StartPos, SingleTree}),
NewTrees
end, [], Paths).
value_pref(Tuple, _) when is_tuple(Tuple),
(tuple_size(Tuple) == 3 orelse tuple_size(Tuple) == 4) ->
Tuple;
value_pref(_, Tuple) when is_tuple(Tuple),
(tuple_size(Tuple) == 3 orelse tuple_size(Tuple) == 4) ->
Tuple;
value_pref(?REV_MISSING, Other) ->
Other;
value_pref(Other, ?REV_MISSING) ->
Other;
value_pref(Last, _) ->
Last.