| % Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| % use this file except in compliance with the License. You may obtain a copy of |
| % the License at |
| % |
| % http://www.apache.org/licenses/LICENSE-2.0 |
| % |
| % Unless required by applicable law or agreed to in writing, software |
| % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| % License for the specific language governing permissions and limitations under |
| % the License. |
| |
| -module(couch_key_tree). |
| |
| -export([merge/2, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]). |
| -export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2, |
| get_all_leafs_full/1,stem/2,map_leafs/2]). |
| |
| % a key tree looks like this: |
| % Tree -> [] or [{Key, Value, ChildTree} | SiblingTree] |
| % ChildTree -> Tree |
| % SiblingTree -> [] or [{SiblingKey, Value, Tree} | Tree] |
| % And each Key < SiblingKey |
| |
| |
| % partial trees arranged by how much they are cut off. |
| |
| merge(A, B) -> |
| {Merged, HasConflicts} = |
| lists:foldl( |
| fun(InsertTree, {AccTrees, AccConflicts}) -> |
| {ok, Merged, Conflicts} = merge_one(AccTrees, InsertTree, [], false), |
| {Merged, Conflicts or AccConflicts} |
| end, |
| {A, false}, B), |
| if HasConflicts or |
| ((length(Merged) /= length(A)) and (length(Merged) /= length(B))) -> |
| Conflicts = conflicts; |
| true -> |
| Conflicts = no_conflicts |
| end, |
| {lists:sort(Merged), Conflicts}. |
| |
| merge_one([], Insert, OutAcc, ConflictsAcc) -> |
| {ok, [Insert | OutAcc], ConflictsAcc}; |
| merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, OutAcc, ConflictsAcc) -> |
| if Start =< StartInsert -> |
| StartA = Start, |
| StartB = StartInsert, |
| TreeA = Tree, |
| TreeB = TreeInsert; |
| true -> |
| StartB = Start, |
| StartA = StartInsert, |
| TreeB = Tree, |
| TreeA = TreeInsert |
| end, |
| case merge_at([TreeA], StartB - StartA, TreeB) of |
| {ok, [CombinedTrees], Conflicts} -> |
| merge_one(Rest, {StartA, CombinedTrees}, OutAcc, Conflicts or ConflictsAcc); |
| no -> |
| merge_one(Rest, {StartB, TreeB}, [{StartA, TreeA} | OutAcc], ConflictsAcc) |
| end. |
| |
| merge_at([], _Place, _Insert) -> |
| no; |
| merge_at([{Key, Value, SubTree}|Sibs], 0, {InsertKey, InsertValue, InsertSubTree}) -> |
| if Key == InsertKey -> |
| {Merge, Conflicts} = merge_simple(SubTree, InsertSubTree), |
| {ok, [{Key, Value, Merge} | Sibs], Conflicts}; |
| true -> |
| case merge_at(Sibs, 0, {InsertKey, InsertValue, InsertSubTree}) of |
| {ok, Merged, Conflicts} -> |
| {ok, [{Key, Value, SubTree} | Merged], Conflicts}; |
| no -> |
| no |
| end |
| end; |
| merge_at([{Key, Value, SubTree}|Sibs], Place, Insert) -> |
| case merge_at(SubTree, Place - 1,Insert) of |
| {ok, Merged, Conflicts} -> |
| {ok, [{Key, Value, Merged} | Sibs], Conflicts}; |
| no -> |
| case merge_at(Sibs, Place, Insert) of |
| {ok, Merged, Conflicts} -> |
| {ok, [{Key, Value, SubTree} | Merged], Conflicts}; |
| no -> |
| no |
| end |
| end. |
| |
| % key tree functions |
| merge_simple([], B) -> |
| {B, false}; |
| merge_simple(A, []) -> |
| {A, false}; |
| merge_simple([ATree | ANextTree], [BTree | BNextTree]) -> |
| {AKey, AValue, ASubTree} = ATree, |
| {BKey, _BValue, BSubTree} = BTree, |
| if |
| AKey == BKey -> |
| %same key |
| {MergedSubTree, Conflict1} = merge_simple(ASubTree, BSubTree), |
| {MergedNextTree, Conflict2} = merge_simple(ANextTree, BNextTree), |
| {[{AKey, AValue, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; |
| AKey < BKey -> |
| {MTree, _} = merge_simple(ANextTree, [BTree | BNextTree]), |
| {[ATree | MTree], true}; |
| true -> |
| {MTree, _} = merge_simple([ATree | ANextTree], BNextTree), |
| {[BTree | MTree], true} |
| end. |
| |
| find_missing(_Tree, []) -> |
| []; |
| find_missing([], SeachKeys) -> |
| SeachKeys; |
| find_missing([{Start, {Key, Value, SubTree}} | RestTree], SeachKeys) -> |
| PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Start], |
| ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Start], |
| Missing = find_missing_simple(Start, [{Key, Value, SubTree}], PossibleKeys), |
| find_missing(RestTree, ImpossibleKeys ++ Missing). |
| |
| find_missing_simple(_Pos, _Tree, []) -> |
| []; |
| find_missing_simple(_Pos, [], SeachKeys) -> |
| SeachKeys; |
| find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SeachKeys) -> |
| PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Pos], |
| ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Pos], |
| |
| SrcKeys2 = PossibleKeys -- [{Pos, Key}], |
| SrcKeys3 = find_missing_simple(Pos + 1, SubTree, SrcKeys2), |
| ImpossibleKeys ++ find_missing_simple(Pos, RestTree, SrcKeys3). |
| |
| |
| filter_leafs([], _Keys, FilteredAcc, RemovedKeysAcc) -> |
| {FilteredAcc, RemovedKeysAcc}; |
| filter_leafs([{Pos, [{LeafKey, _}|_]} = Path |Rest], Keys, FilteredAcc, RemovedKeysAcc) -> |
| FilteredKeys = lists:delete({Pos, LeafKey}, Keys), |
| if FilteredKeys == Keys -> |
| % this leaf is not a key we are looking to remove |
| filter_leafs(Rest, Keys, [Path | FilteredAcc], RemovedKeysAcc); |
| true -> |
| % this did match a key, remove both the node and the input key |
| filter_leafs(Rest, FilteredKeys, FilteredAcc, [{Pos, LeafKey} | RemovedKeysAcc]) |
| end. |
| |
| % Removes any branches from the tree whose leaf node(s) are in the Keys |
| remove_leafs(Trees, Keys) -> |
| % flatten each branch in a tree into a tree path |
| Paths = get_all_leafs_full(Trees), |
| |
| % filter out any that are in the keys list. |
| {FilteredPaths, RemovedKeys} = filter_leafs(Paths, Keys, [], []), |
| |
| % convert paths back to trees |
| NewTree = lists:foldl( |
| fun({PathPos, Path},TreeAcc) -> |
| [SingleTree] = lists:foldl( |
| fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), |
| {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]), |
| NewTrees |
| end, [], FilteredPaths), |
| {NewTree, RemovedKeys}. |
| |
| |
| % get the leafs in the tree matching the keys. The matching key nodes can be |
| % leafs or an inner nodes. If an inner node, then the leafs for that node |
| % are returned. |
| get_key_leafs(Tree, Keys) -> |
| get_key_leafs(Tree, Keys, []). |
| |
| get_key_leafs(_, [], Acc) -> |
| {Acc, []}; |
| get_key_leafs([], Keys, Acc) -> |
| {Acc, Keys}; |
| get_key_leafs([{Pos, Tree}|Rest], Keys, Acc) -> |
| {Gotten, RemainingKeys} = get_key_leafs_simple(Pos, [Tree], Keys, []), |
| get_key_leafs(Rest, RemainingKeys, Gotten ++ Acc). |
| |
| get_key_leafs_simple(_Pos, _Tree, [], _KeyPathAcc) -> |
| {[], []}; |
| get_key_leafs_simple(_Pos, [], KeysToGet, _KeyPathAcc) -> |
| {[], KeysToGet}; |
| get_key_leafs_simple(Pos, [{Key, _Value, SubTree}=Tree | RestTree], KeysToGet, KeyPathAcc) -> |
| case lists:delete({Pos, Key}, KeysToGet) of |
| KeysToGet -> % same list, key not found |
| {LeafsFound, KeysToGet2} = get_key_leafs_simple(Pos + 1, SubTree, KeysToGet, [Key | KeyPathAcc]), |
| {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc), |
| {LeafsFound ++ RestLeafsFound, KeysRemaining}; |
| KeysToGet2 -> |
| LeafsFound = get_all_leafs_simple(Pos, [Tree], KeyPathAcc), |
| LeafKeysFound = [LeafKeyFound || {LeafKeyFound, _} <- LeafsFound], |
| KeysToGet2 = KeysToGet2 -- LeafKeysFound, |
| {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc), |
| {LeafsFound ++ RestLeafsFound, KeysRemaining} |
| end. |
| |
| get(Tree, KeysToGet) -> |
| {KeyPaths, KeysNotFound} = get_full_key_paths(Tree, KeysToGet), |
| FixedResults = [ {Value, {Pos, [Key0 || {Key0, _} <- Path]}} || {Pos, [{_Key, Value}|_]=Path} <- KeyPaths], |
| {FixedResults, KeysNotFound}. |
| |
| get_full_key_paths(Tree, Keys) -> |
| get_full_key_paths(Tree, Keys, []). |
| |
| get_full_key_paths(_, [], Acc) -> |
| {Acc, []}; |
| get_full_key_paths([], Keys, Acc) -> |
| {Acc, Keys}; |
| get_full_key_paths([{Pos, Tree}|Rest], Keys, Acc) -> |
| {Gotten, RemainingKeys} = get_full_key_paths(Pos, [Tree], Keys, []), |
| get_full_key_paths(Rest, RemainingKeys, Gotten ++ Acc). |
| |
| |
| get_full_key_paths(_Pos, _Tree, [], _KeyPathAcc) -> |
| {[], []}; |
| get_full_key_paths(_Pos, [], KeysToGet, _KeyPathAcc) -> |
| {[], KeysToGet}; |
| get_full_key_paths(Pos, [{KeyId, Value, SubTree} | RestTree], KeysToGet, KeyPathAcc) -> |
| KeysToGet2 = KeysToGet -- [{Pos, KeyId}], |
| CurrentNodeResult = |
| case length(KeysToGet2) == length(KeysToGet) of |
| true -> % not in the key list. |
| []; |
| false -> % this node is the key list. return it |
| [{Pos, [{KeyId, Value} | KeyPathAcc]}] |
| end, |
| {KeysGotten, KeysRemaining} = get_full_key_paths(Pos + 1, SubTree, KeysToGet2, [{KeyId, Value} | KeyPathAcc]), |
| {KeysGotten2, KeysRemaining2} = get_full_key_paths(Pos, RestTree, KeysRemaining, KeyPathAcc), |
| {CurrentNodeResult ++ KeysGotten ++ KeysGotten2, KeysRemaining2}. |
| |
| get_all_leafs_full(Tree) -> |
| get_all_leafs_full(Tree, []). |
| |
| get_all_leafs_full([], Acc) -> |
| Acc; |
| get_all_leafs_full([{Pos, Tree} | Rest], Acc) -> |
| get_all_leafs_full(Rest, get_all_leafs_full_simple(Pos, [Tree], []) ++ Acc). |
| |
| get_all_leafs_full_simple(_Pos, [], _KeyPathAcc) -> |
| []; |
| get_all_leafs_full_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) -> |
| [{Pos, [{KeyId, Value} | KeyPathAcc]} | get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc)]; |
| get_all_leafs_full_simple(Pos, [{KeyId, Value, SubTree} | RestTree], KeyPathAcc) -> |
| get_all_leafs_full_simple(Pos + 1, SubTree, [{KeyId, Value} | KeyPathAcc]) ++ get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc). |
| |
| get_all_leafs(Trees) -> |
| get_all_leafs(Trees, []). |
| |
| get_all_leafs([], Acc) -> |
| Acc; |
| get_all_leafs([{Pos, Tree}|Rest], Acc) -> |
| get_all_leafs(Rest, get_all_leafs_simple(Pos, [Tree], []) ++ Acc). |
| |
| get_all_leafs_simple(_Pos, [], _KeyPathAcc) -> |
| []; |
| get_all_leafs_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) -> |
| [{Value, {Pos, [KeyId | KeyPathAcc]}} | get_all_leafs_simple(Pos, RestTree, KeyPathAcc)]; |
| get_all_leafs_simple(Pos, [{KeyId, _Value, SubTree} | RestTree], KeyPathAcc) -> |
| get_all_leafs_simple(Pos + 1, SubTree, [KeyId | KeyPathAcc]) ++ get_all_leafs_simple(Pos, RestTree, KeyPathAcc). |
| |
| |
| count_leafs([]) -> |
| 0; |
| count_leafs([{_Pos,Tree}|Rest]) -> |
| count_leafs_simple([Tree]) + count_leafs(Rest). |
| |
| count_leafs_simple([]) -> |
| 0; |
| count_leafs_simple([{_Key, _Value, []} | RestTree]) -> |
| 1 + count_leafs_simple(RestTree); |
| count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) -> |
| count_leafs_simple(SubTree) + count_leafs_simple(RestTree). |
| |
| |
| map(_Fun, []) -> |
| []; |
| map(Fun, [{Pos, Tree}|Rest]) -> |
| case erlang:fun_info(Fun, arity) of |
| {arity, 2} -> |
| [NewTree] = map_simple(fun(A,B,_C) -> Fun(A,B) end, Pos, [Tree]), |
| [{Pos, NewTree} | map(Fun, Rest)]; |
| {arity, 3} -> |
| [NewTree] = map_simple(Fun, Pos, [Tree]), |
| [{Pos, NewTree} | map(Fun, Rest)] |
| end. |
| |
| map_simple(_Fun, _Pos, []) -> |
| []; |
| map_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> |
| Value2 = Fun({Pos, Key}, Value, |
| if SubTree == [] -> leaf; true -> branch end), |
| [{Key, Value2, map_simple(Fun, Pos + 1, SubTree)} | map_simple(Fun, Pos, RestTree)]. |
| |
| |
| map_leafs(_Fun, []) -> |
| []; |
| map_leafs(Fun, [{Pos, Tree}|Rest]) -> |
| [NewTree] = map_leafs_simple(Fun, Pos, [Tree]), |
| [{Pos, NewTree} | map_leafs(Fun, Rest)]. |
| |
| map_leafs_simple(_Fun, _Pos, []) -> |
| []; |
| map_leafs_simple(Fun, Pos, [{Key, Value, []} | RestTree]) -> |
| Value2 = Fun({Pos, Key}, Value), |
| [{Key, Value2, []} | map_leafs_simple(Fun, Pos, RestTree)]; |
| map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> |
| [{Key, Value, map_leafs_simple(Fun, Pos + 1, SubTree)} | map_leafs_simple(Fun, Pos, RestTree)]. |
| |
| |
| stem(Trees, Limit) -> |
| % flatten each branch in a tree into a tree path |
| Paths = get_all_leafs_full(Trees), |
| |
| Paths2 = [{Pos, lists:sublist(Path, Limit)} || {Pos, Path} <- Paths], |
| |
| % convert paths back to trees |
| lists:foldl( |
| fun({PathPos, Path},TreeAcc) -> |
| [SingleTree] = lists:foldl( |
| fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), |
| {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]), |
| NewTrees |
| end, [], Paths2). |
| |
| % Tests moved to test/etap/06?-*.t |
| |