src/couch/src/couch_key_tree.erl - couchdb - Git at Google

 % Licensed under the Apache License, Version 2.0 (the "License"); you may not
 % use this file except in compliance with the License. You may obtain a copy of
 % the License at
 %
 %   http://www.apache.org/licenses/LICENSE-2.0
 %
 % Unless required by applicable law or agreed to in writing, software
 % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 % License for the specific language governing permissions and limitations under
 % the License.

 %% @doc Data structure used to represent document edit histories.

 %% A key tree is used to represent the edit history of a document. Each node of
 %% the tree represents a particular version. Relations between nodes represent
 %% the order that these edits were applied. For instance, a set of three edits
 %% would produce a tree of versions A->B->C indicating that edit C was based on
 %% version B which was in turn based on A. In a world without replication (and
 %% no ability to disable MVCC checks), all histories would be forced to be
 %% linear lists of edits due to constraints imposed by MVCC (ie, new edits must
 %% be based on the current version). However, we have replication, so we must
 %% deal with not so easy cases, which lead to trees.
 %%
 %% Consider a document in state A. This doc is replicated to a second node. We
 %% then edit the document on each node leaving it in two different states, B
 %% and C. We now have two key trees, A->B and A->C. When we go to replicate a
 %% second time, the key tree must combine these two trees which gives us
 %% A->(B|C). This is how conflicts are introduced. In terms of the key tree, we
 %% say that we have two leaves (B and C) that are not deleted. The presence of
 %% the multiple leaves indicate conflict. To remove a conflict, one of the
 %% edits (B or C) can be deleted, which results in, A->(B|C->D) where D is an
 %% edit that is specially marked with the a deleted=true flag.
 %%
 %% What makes this a bit more complicated is that there is a limit to the
 %% number of revisions kept, specified in couch_db.hrl (default is 1000). When
 %% this limit is exceeded only the last 1000 are kept. This comes in to play
 %% when branches are merged. The comparison has to begin at the same place in
 %% the branches. A revision id is of the form N-XXXXXXX where N is the current
 %% revision depth. So each path will have a start number, calculated in
 %% couch_doc:to_path using the formula N - length(RevIds) + 1 So, .eg. if a doc
 %% was edit 1003 times this start number would be 4, indicating that 3
 %% revisions were truncated.
 %%
 %% This comes into play in @see merge_at/3 which recursively walks down one
 %% tree or the other until they begin at the same revision.

 -module(couch_key_tree).

 -export([
     count_leafs/1,
     find_missing/2,
     fold/3,
     get/2,
     get_all_leafs/1,
     get_all_leafs_full/1,
     get_full_key_paths/2,
     get_key_leafs/2,
     map/2,
     map_leafs/2,
     mapfold/3,
     multi_merge/2,
     merge/2,
     remove_leafs/2,
     stem/2
 ]).

 -include_lib("couch/include/couch_db.hrl").
 -type treenode() :: {Key :: term(), Value :: term(), [Node :: treenode()]}.
 -type tree() :: {Depth :: pos_integer(), [treenode()]}.
 -type revtree() :: [tree()].

 %% @doc Merge multiple paths into the given tree.
 -spec multi_merge(revtree(), tree()) -> revtree().
 multi_merge(RevTree, Trees) ->
     lists:foldl(
         fun(Tree, RevTreeAcc) ->
             {NewRevTree, _} = merge(RevTreeAcc, Tree),
             NewRevTree
         end,
         RevTree,
         lists:sort(Trees)
     ).

 %% @doc Merge a path into a tree.
 -spec merge(revtree(), tree() | path()) ->
     {revtree(), new_leaf | new_branch | internal_node}.
 merge(RevTree, Tree) ->
     {Merged, Result} = merge_tree(RevTree, Tree, []),
     {lists:sort(Merged), Result}.

 %% @private
 %% @doc Attempt to merge Tree into each branch of the RevTree.
 %% If it can't find a branch that the new tree merges into, add it as a
 %% new branch in the RevTree.
 -spec merge_tree(revtree(), tree() | path(), revtree()) ->
     {revtree(), new_leaf | new_branch | internal_node}.
 merge_tree([], Tree, []) ->
     {[Tree], new_leaf};
 merge_tree([], Tree, MergeAcc) ->
     {[Tree | MergeAcc], new_branch};
 merge_tree([{Depth, Nodes} | Rest], {IDepth, INodes} = Tree, MergeAcc) ->
     % For the intrepid observer following along at home, notice what we're
     % doing here with (Depth - IDepth). This tells us which of the two
     % branches (Nodes or INodes) we need to seek into. If Depth > IDepth
     % that means we need go into INodes to find where we line up with
     % Nodes. If Depth < IDepth, its obviously the other way. If it turns
     % out that (Depth - IDepth) == 0, then we know that this is where
     % we begin our actual merge operation (ie, looking for key matches).
     % Its helpful to note that this whole moving into sub-branches is due
     % to how we store trees that have been stemmed. When a path is
     % stemmed so that the root node is lost, we wrap it in a tuple with
     % the number keys that have been dropped. This number is the depth
     % value that's used throughout this module.
     case merge_at([Nodes], Depth - IDepth, [INodes]) of
         {[Merged], Result} ->
             NewDepth = erlang:min(Depth, IDepth),
             {Rest ++ [{NewDepth, Merged} | MergeAcc], Result};
         fail ->
             merge_tree(Rest, Tree, [{Depth, Nodes} | MergeAcc])
     end.

 %% @private
 %% @doc Locate the point at which merging can start.
 %% Because of stemming we may need to seek into one of the branches
 %% before we can start comparing node keys. If one of the branches
 %% ends up running out of nodes we know that these two branches can
 %% not be merged.
 -spec merge_at([node()], integer(), [node()]) ->
     {revtree(), new_leaf | new_branch | internal_node} | fail.
 merge_at(_Nodes, _Pos, []) ->
     fail;
 merge_at([], _Pos, _INodes) ->
     fail;
 merge_at(Nodes, Pos, [{IK, IV, [NextINode]}]) when Pos > 0 ->
     % Depth was bigger than IDepth, so we need to discard from the
     % insert path to find where it might start matching.
     case merge_at(Nodes, Pos - 1, [NextINode]) of
         {Merged, Result} -> {[{IK, IV, Merged}], Result};
         fail -> fail
     end;
 merge_at(_Nodes, Pos, [{_IK, _IV, []}]) when Pos > 0 ->
     % We've run out of path on the insert side, there's no way we can
     % merge with this branch
     fail;
 merge_at([{K, V, SubTree} | Sibs], Pos, INodes) when Pos < 0 ->
     % When Pos is negative, Depth was less than IDepth, so we
     % need to discard from the revision tree path
     case merge_at(SubTree, Pos + 1, INodes) of
         {Merged, Result} ->
             {[{K, V, Merged} | Sibs], Result};
         fail ->
             % Merging along the subtree failed. We need to also try
             % merging the insert branch against the siblings of this
             % node.
             case merge_at(Sibs, Pos, INodes) of
                 {Merged, Result} -> {[{K, V, SubTree} | Merged], Result};
                 fail -> fail
             end
     end;
 merge_at([{K, V1, Nodes} | Sibs], 0, [{K, V2, INodes}]) ->
     % Keys are equal. At this point we have found a possible starting
     % position for our merge to take place.
     {Merged, Result} = merge_extend(Nodes, INodes),
     {[{K, value_pref(V1, V2), Merged} | Sibs], Result};
 merge_at([{K1, _, _} | _], 0, [{K2, _, _}]) when K1 > K2 ->
     % Siblings keys are ordered, no point in continuing
     fail;
 merge_at([Tree | Sibs], 0, INodes) ->
     % INodes key comes after this key, so move on to the next sibling.
     case merge_at(Sibs, 0, INodes) of
         {Merged, Result} -> {[Tree | Merged], Result};
         fail -> fail
     end.

 -spec merge_extend(revtree(), revtree()) ->
     {revtree(), new_leaf | new_branch | internal_node}.
 merge_extend([], B) when B =/= [] ->
     % Most likely the insert branch simply extends this one, so the new
     % branch is exactly B. Its also possible that B is a branch because
     % its key sorts greater than all siblings of an internal node. This
     % condition is checked in the last clause of this function and the
     % new_leaf result is fixed to be new_branch.
     {B, new_leaf};
 merge_extend(A, []) ->
     % Insert branch ends an internal node in our original revtree()
     % so the end result is exactly our original revtree.
     {A, internal_node};
 merge_extend([{K, V1, SubA} | NextA], [{K, V2, SubB}]) ->
     % Here we're simply extending the path to the next deeper
     % level in the two branches.
     {Merged, Result} = merge_extend(SubA, SubB),
     {[{K, value_pref(V1, V2), Merged} | NextA], Result};
 merge_extend([{K1, _, _} = NodeA | Rest], [{K2, _, _} = NodeB]) when K1 > K2 ->
     % Keys are ordered so we know this is where the insert branch needs
     % to be inserted into the tree. We also know that this creates a new
     % branch so we have a new leaf to report.
     {[NodeB, NodeA | Rest], new_branch};
 merge_extend([Tree | RestA], NextB) ->
     % Here we're moving on to the next sibling to try and extend our
     % merge even deeper. The length check is due to the fact that the
     % key in NextB might be larger than the largest key in RestA which
     % means we've created a new branch.
     {Merged, Result0} = merge_extend(RestA, NextB),
     Result =
         case length(Merged) == length(RestA) of
             true -> Result0;
             false -> new_branch
         end,
     {[Tree | Merged], Result}.

 find_missing(Tree, SearchKeys) ->
     find_missing_sorted(Tree, lists:sort(SearchKeys)).

 find_missing_sorted(_Tree, []) ->
     [];
 find_missing_sorted([], SearchKeys) ->
     SearchKeys;
 find_missing_sorted([{Start, {Key, Value, SubTree}} | RestTree], SearchKeys) ->
     {Impossible, Possible} = lists:splitwith(fun({K, _}) -> K < Start end, SearchKeys),
     Missing = find_missing_simple(Start, [{Key, Value, SubTree}], Possible),
     find_missing_sorted(RestTree, Impossible ++ Missing).

 find_missing_simple(_Pos, _Tree, []) ->
     [];
 find_missing_simple(_Pos, [], SearchKeys) ->
     SearchKeys;
 find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SearchKeys) ->
     {Impossible, Possible} = lists:splitwith(fun({K, _}) -> K < Pos end, SearchKeys),
     SrcKeys2 = Possible -- [{Pos, Key}],
     SrcKeys3 = find_missing_simple(Pos + 1, SubTree, SrcKeys2),
     Impossible ++ find_missing_simple(Pos, RestTree, SrcKeys3).

 filter_leafs([], _Keys, FilteredAcc, RemovedKeysAcc) ->
     {FilteredAcc, RemovedKeysAcc};
 filter_leafs([{Pos, [{LeafKey, _} | _]} = Path | Rest], Keys, FilteredAcc, RemovedKeysAcc) ->
     FilteredKeys = lists:delete({Pos, LeafKey}, Keys),
     if
         FilteredKeys == Keys ->
             % this leaf is not a key we are looking to remove
             filter_leafs(Rest, Keys, [Path | FilteredAcc], RemovedKeysAcc);
         true ->
             % this did match a key, remove both the node and the input key
             filter_leafs(Rest, FilteredKeys, FilteredAcc, [{Pos, LeafKey} | RemovedKeysAcc])
     end.

 % Removes any branches from the tree whose leaf node(s) are in the Keys
 remove_leafs(Trees, Keys) ->
     % flatten each branch in a tree into a tree path
     Paths = get_all_leafs_full(Trees),

     % filter out any that are in the keys list.
     {FilteredPaths, RemovedKeys} = filter_leafs(Paths, Keys, [], []),

     SortedPaths = lists:sort(
         [{Pos + 1 - length(Path), Path} || {Pos, Path} <- FilteredPaths]
     ),

     % convert paths back to trees
     NewTree = lists:foldl(
         fun({StartPos, Path}, TreeAcc) ->
             [SingleTree] = lists:foldl(
                 fun({K, V}, NewTreeAcc) -> [{K, V, NewTreeAcc}] end, [], Path
             ),
             {NewTrees, _} = merge(TreeAcc, {StartPos, SingleTree}),
             NewTrees
         end,
         [],
         SortedPaths
     ),
     {NewTree, RemovedKeys}.

 % get the leafs in the tree matching the keys. The matching key nodes can be
 % leafs or an inner nodes. If an inner node, then the leafs for that node
 % are returned.
 get_key_leafs(Tree, Keys) ->
     get_key_leafs(Tree, Keys, []).

 get_key_leafs(_, [], Acc) ->
     {Acc, []};
 get_key_leafs([], Keys, Acc) ->
     {Acc, Keys};
 get_key_leafs([{Pos, Tree} | Rest], Keys, Acc) ->
     {Gotten, RemainingKeys} = get_key_leafs_simple(Pos, [Tree], Keys, []),
     get_key_leafs(Rest, RemainingKeys, Gotten ++ Acc).

 get_key_leafs_simple(_Pos, _Tree, [], _PathAcc) ->
     {[], []};
 get_key_leafs_simple(_Pos, [], Keys, _PathAcc) ->
     {[], Keys};
 get_key_leafs_simple(Pos, [{Key, _, SubTree} = Tree | RestTree], Keys, PathAcc) ->
     case lists:delete({Pos, Key}, Keys) of
         Keys ->
             % Same list, key not found
             NewPathAcc = [Key | PathAcc],
             {ChildLeafs, Keys2} = get_key_leafs_simple(Pos + 1, SubTree, Keys, NewPathAcc),
             {SiblingLeafs, Keys3} = get_key_leafs_simple(Pos, RestTree, Keys2, PathAcc),
             {ChildLeafs ++ SiblingLeafs, Keys3};
         Keys2 ->
             % This is a key we were looking for, get all descendant
             % leafs while removing any requested key we find. Notice
             % that this key will be returned by get_key_leafs_simple2
             % if it's a leaf so there's no need to return it here.
             {ChildLeafs, Keys3} = get_key_leafs_simple2(Pos, [Tree], Keys2, PathAcc),
             {SiblingLeafs, Keys4} = get_key_leafs_simple(Pos, RestTree, Keys3, PathAcc),
             {ChildLeafs ++ SiblingLeafs, Keys4}
     end.

 get_key_leafs_simple2(_Pos, [], Keys, _PathAcc) ->
     % No more tree to deal with so no more keys to return.
     {[], Keys};
 get_key_leafs_simple2(Pos, [{Key, Value, []} | RestTree], Keys, PathAcc) ->
     % This is a leaf as defined by having an empty list of
     % child nodes. The assertion is a bit subtle but the function
     % clause match means its a leaf.
     Keys2 = lists:delete({Pos, Key}, Keys),
     {SiblingLeafs, Keys3} = get_key_leafs_simple2(Pos, RestTree, Keys2, PathAcc),
     {[{Value, {Pos, [Key | PathAcc]}} | SiblingLeafs], Keys3};
 get_key_leafs_simple2(Pos, [{Key, _Value, SubTree} | RestTree], Keys, PathAcc) ->
     % This isn't a leaf. Recurse into the subtree and then
     % process any sibling branches.
     Keys2 = lists:delete({Pos, Key}, Keys),
     NewPathAcc = [Key | PathAcc],
     {ChildLeafs, Keys3} = get_key_leafs_simple2(Pos + 1, SubTree, Keys2, NewPathAcc),
     {SiblingLeafs, Keys4} = get_key_leafs_simple2(Pos, RestTree, Keys3, PathAcc),
     {ChildLeafs ++ SiblingLeafs, Keys4}.

 get(Tree, KeysToGet) ->
     {KeyPaths, KeysNotFound} = get_full_key_paths(Tree, KeysToGet),
     FixedResults = [
         {Value, {Pos, [Key0 || {Key0, _} <- Path]}}
      || {Pos, [{_Key, Value} | _] = Path} <- KeyPaths
     ],
     {FixedResults, KeysNotFound}.

 get_full_key_paths(Tree, Keys) ->
     get_full_key_paths(Tree, Keys, []).

 get_full_key_paths(_, [], Acc) ->
     {Acc, []};
 get_full_key_paths([], Keys, Acc) ->
     {Acc, Keys};
 get_full_key_paths([{Pos, Tree} | Rest], Keys, Acc) ->
     {Gotten, RemainingKeys} = get_full_key_paths(Pos, [Tree], Keys, []),
     get_full_key_paths(Rest, RemainingKeys, Gotten ++ Acc).

 get_full_key_paths(_Pos, _Tree, [], _KeyPathAcc) ->
     {[], []};
 get_full_key_paths(_Pos, [], KeysToGet, _KeyPathAcc) ->
     {[], KeysToGet};
 get_full_key_paths(Pos, [{KeyId, Value, SubTree} | RestTree], KeysToGet, KeyPathAcc) ->
     KeysToGet2 = KeysToGet -- [{Pos, KeyId}],
     CurrentNodeResult =
         case length(KeysToGet2) =:= length(KeysToGet) of
             % not in the key list.
             true ->
                 [];
             % this node is the key list. return it
             false ->
                 [{Pos, [{KeyId, Value} | KeyPathAcc]}]
         end,
     {KeysGotten, KeysRemaining} = get_full_key_paths(Pos + 1, SubTree, KeysToGet2, [
         {KeyId, Value} | KeyPathAcc
     ]),
     {KeysGotten2, KeysRemaining2} = get_full_key_paths(Pos, RestTree, KeysRemaining, KeyPathAcc),
     {CurrentNodeResult ++ KeysGotten ++ KeysGotten2, KeysRemaining2}.

 get_all_leafs_full(Tree) ->
     get_all_leafs_full(Tree, []).

 get_all_leafs_full([], Acc) ->
     Acc;
 get_all_leafs_full([{Pos, Tree} | Rest], Acc) ->
     get_all_leafs_full(Rest, get_all_leafs_full_simple(Pos, [Tree], []) ++ Acc).

 get_all_leafs_full_simple(_Pos, [], _KeyPathAcc) ->
     [];
 get_all_leafs_full_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) ->
     [{Pos, [{KeyId, Value} | KeyPathAcc]} | get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc)];
 get_all_leafs_full_simple(Pos, [{KeyId, Value, SubTree} | RestTree], KeyPathAcc) ->
     get_all_leafs_full_simple(Pos + 1, SubTree, [{KeyId, Value} | KeyPathAcc]) ++
         get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc).

 get_all_leafs(Trees) ->
     get_all_leafs(Trees, []).

 get_all_leafs([], Acc) ->
     Acc;
 get_all_leafs([{Pos, Tree} | Rest], Acc) ->
     get_all_leafs(Rest, get_all_leafs_simple(Pos, [Tree], []) ++ Acc).

 get_all_leafs_simple(_Pos, [], _KeyPathAcc) ->
     [];
 get_all_leafs_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) ->
     [{Value, {Pos, [KeyId | KeyPathAcc]}} | get_all_leafs_simple(Pos, RestTree, KeyPathAcc)];
 get_all_leafs_simple(Pos, [{KeyId, _Value, SubTree} | RestTree], KeyPathAcc) ->
     get_all_leafs_simple(Pos + 1, SubTree, [KeyId | KeyPathAcc]) ++
         get_all_leafs_simple(Pos, RestTree, KeyPathAcc).

 count_leafs([]) ->
     0;
 count_leafs([{_Pos, Tree} | Rest]) ->
     count_leafs_simple([Tree]) + count_leafs(Rest).

 count_leafs_simple([]) ->
     0;
 count_leafs_simple([{_Key, _Value, []} | RestTree]) ->
     1 + count_leafs_simple(RestTree);
 count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) ->
     count_leafs_simple(SubTree) + count_leafs_simple(RestTree).

 fold(_Fun, Acc, []) ->
     Acc;
 fold(Fun, Acc0, [{Pos, Tree} | Rest]) ->
     Acc1 = fold_simple(Fun, Acc0, Pos, [Tree]),
     fold(Fun, Acc1, Rest).

 fold_simple(_Fun, Acc, _Pos, []) ->
     Acc;
 fold_simple(Fun, Acc0, Pos, [{Key, Value, SubTree} | RestTree]) ->
     Type =
         if
             SubTree == [] -> leaf;
             true -> branch
         end,
     Acc1 = Fun({Pos, Key}, Value, Type, Acc0),
     Acc2 = fold_simple(Fun, Acc1, Pos + 1, SubTree),
     fold_simple(Fun, Acc2, Pos, RestTree).

 map(_Fun, []) ->
     [];
 map(Fun, [{Pos, Tree} | Rest]) ->
     case erlang:fun_info(Fun, arity) of
         {arity, 2} ->
             [NewTree] = map_simple(fun(A, B, _C) -> Fun(A, B) end, Pos, [Tree]),
             [{Pos, NewTree} | map(Fun, Rest)];
         {arity, 3} ->
             [NewTree] = map_simple(Fun, Pos, [Tree]),
             [{Pos, NewTree} | map(Fun, Rest)]
     end.

 map_simple(_Fun, _Pos, []) ->
     [];
 map_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) ->
     Value2 = Fun(
         {Pos, Key},
         Value,
         if
             SubTree == [] -> leaf;
             true -> branch
         end
     ),
     [{Key, Value2, map_simple(Fun, Pos + 1, SubTree)} | map_simple(Fun, Pos, RestTree)].

 mapfold(_Fun, Acc, []) ->
     {[], Acc};
 mapfold(Fun, Acc, [{Pos, Tree} | Rest]) ->
     {[NewTree], Acc2} = mapfold_simple(Fun, Acc, Pos, [Tree]),
     {Rest2, Acc3} = mapfold(Fun, Acc2, Rest),
     {[{Pos, NewTree} | Rest2], Acc3}.

 mapfold_simple(_Fun, Acc, _Pos, []) ->
     {[], Acc};
 mapfold_simple(Fun, Acc, Pos, [{Key, Value, SubTree} | RestTree]) ->
     {Value2, Acc2} = Fun(
         {Pos, Key},
         Value,
         if
             SubTree == [] -> leaf;
             true -> branch
         end,
         Acc
     ),
     {SubTree2, Acc3} = mapfold_simple(Fun, Acc2, Pos + 1, SubTree),
     {RestTree2, Acc4} = mapfold_simple(Fun, Acc3, Pos, RestTree),
     {[{Key, Value2, SubTree2} | RestTree2], Acc4}.

 map_leafs(_Fun, []) ->
     [];
 map_leafs(Fun, [{Pos, Tree} | Rest]) ->
     [NewTree] = map_leafs_simple(Fun, Pos, [Tree]),
     [{Pos, NewTree} | map_leafs(Fun, Rest)].

 map_leafs_simple(_Fun, _Pos, []) ->
     [];
 map_leafs_simple(Fun, Pos, [{Key, Value, []} | RestTree]) ->
     Value2 = Fun({Pos, Key}, Value),
     [{Key, Value2, []} | map_leafs_simple(Fun, Pos, RestTree)];
 map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) ->
     [{Key, Value, map_leafs_simple(Fun, Pos + 1, SubTree)} | map_leafs_simple(Fun, Pos, RestTree)].

 stem(Trees, Limit) ->
     try
         {_, Branches} = lists:foldl(
             fun(Tree, {Seen, TreeAcc}) ->
                 {NewSeen, NewBranches} = stem_tree(Tree, Limit, Seen),
                 {NewSeen, NewBranches ++ TreeAcc}
             end,
             {#{}, []},
             Trees
         ),
         lists:sort(Branches)
     catch
         throw:dupe_keys ->
             repair_tree(Trees, Limit)
     end.

 stem_tree({Depth, Child}, Limit, Seen) ->
     case stem_tree(Depth, Child, Limit, Seen) of
         {NewSeen, _, NewChild, NewBranches} ->
             {NewSeen, [{Depth, NewChild} | NewBranches]};
         {NewSeen, _, NewBranches} ->
             {NewSeen, NewBranches}
     end.

 stem_tree(_Depth, {Key, _Val, []} = Leaf, Limit, Seen) ->
     {check_key(Key, Seen), Limit - 1, Leaf, []};
 stem_tree(Depth, {Key, Val, Children}, Limit, Seen0) ->
     Seen1 = check_key(Key, Seen0),
     FinalAcc = lists:foldl(
         fun(Child, Acc) ->
             {SeenAcc, LimitPosAcc, ChildAcc, BranchAcc} = Acc,
             case stem_tree(Depth + 1, Child, Limit, SeenAcc) of
                 {NewSeenAcc, LimitPos, NewChild, NewBranches} ->
                     NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc),
                     NewChildAcc = [NewChild | ChildAcc],
                     NewBranchAcc = NewBranches ++ BranchAcc,
                     {NewSeenAcc, NewLimitPosAcc, NewChildAcc, NewBranchAcc};
                 {NewSeenAcc, LimitPos, NewBranches} ->
                     NewLimitPosAcc = erlang:max(LimitPos, LimitPosAcc),
                     NewBranchAcc = NewBranches ++ BranchAcc,
                     {NewSeenAcc, NewLimitPosAcc, ChildAcc, NewBranchAcc}
             end
         end,
         {Seen1, -1, [], []},
         Children
     ),
     {FinalSeen, FinalLimitPos, FinalChildren, FinalBranches} = FinalAcc,
     case FinalLimitPos of
         N when N > 0, FinalChildren =/= [] ->
             FinalNode = {Key, Val, lists:reverse(FinalChildren)},
             {FinalSeen, FinalLimitPos - 1, FinalNode, FinalBranches};
         0 when FinalChildren =/= [] ->
             NewBranches = lists:map(
                 fun(Child) ->
                     {Depth + 1, Child}
                 end,
                 lists:reverse(FinalChildren)
             ),
             {FinalSeen, -1, NewBranches ++ FinalBranches};
         N when N < 0, FinalChildren =:= [] ->
             {FinalSeen, FinalLimitPos - 1, FinalBranches}
     end.

 check_key(Key, Seen) ->
     case Seen of
         #{Key := true} ->
             throw(dupe_keys);
         _ ->
             Seen#{Key => true}
     end.

 repair_tree(Trees, Limit) ->
     % flatten each branch in a tree into a tree path, sort by starting rev #
     Paths = lists:sort(
         lists:map(
             fun({Pos, Path}) ->
                 StemmedPath = lists:sublist(Path, Limit),
                 {Pos + 1 - length(StemmedPath), StemmedPath}
             end,
             get_all_leafs_full(Trees)
         )
     ),

     % convert paths back to trees
     lists:foldl(
         fun({StartPos, Path}, TreeAcc) ->
             [SingleTree] = lists:foldl(
                 fun({K, V}, NewTreeAcc) -> [{K, V, NewTreeAcc}] end, [], Path
             ),
             {NewTrees, _} = merge(TreeAcc, {StartPos, SingleTree}),
             NewTrees
         end,
         [],
         Paths
     ).

 value_pref(Tuple, _) when
     is_tuple(Tuple),
     (tuple_size(Tuple) == 3 orelse tuple_size(Tuple) == 4)
 ->
     Tuple;
 value_pref(_, Tuple) when
     is_tuple(Tuple),
     (tuple_size(Tuple) == 3 orelse tuple_size(Tuple) == 4)
 ->
     Tuple;
 value_pref(?REV_MISSING, Other) ->
     Other;
 value_pref(Other, ?REV_MISSING) ->
     Other;
 value_pref(Last, _) ->
     Last.