blob: f48b181edae7fad6d683550c73d77af0869a8bdb [file] [log] [blame]
// for a better overview of what this is doing, read:
// https://github.com/apache/couchdb-couch/blob/master/src/couch_key_tree.erl
//
// But for a quick intro, CouchDB uses a revision tree to store a documents
// history, A -> B -> C, when a document has conflicts, that is a branch in the
// tree, A -> (B1 | B2 -> C), We store these as a nested array in the format
//
// KeyTree = [Path ... ]
// Path = {pos: position_from_root, ids: Tree}
// Tree = [Key, Opts, [Tree, ...]], in particular single node: [Key, []]
import rootToLeaf from './rootToLeaf';
import traverseRevTree from './traverseRevTree';
function sortByPos(a, b) {
return a.pos - b.pos;
}
// classic binary search
function binarySearch(arr, item, comparator) {
var low = 0;
var high = arr.length;
var mid;
while (low < high) {
mid = (low + high) >>> 1;
if (comparator(arr[mid], item) < 0) {
low = mid + 1;
} else {
high = mid;
}
}
return low;
}
// assuming the arr is sorted, insert the item in the proper place
function insertSorted(arr, item, comparator) {
var idx = binarySearch(arr, item, comparator);
arr.splice(idx, 0, item);
}
// Turn a path as a flat array into a tree with a single branch.
// If any should be stemmed from the beginning of the array, that's passed
// in as the second argument
function pathToTree(path, numStemmed) {
var root;
var leaf;
for (var i = numStemmed, len = path.length; i < len; i++) {
var node = path[i];
var currentLeaf = [node.id, node.opts, []];
if (leaf) {
leaf[2].push(currentLeaf);
leaf = currentLeaf;
} else {
root = leaf = currentLeaf;
}
}
return root;
}
// compare the IDs of two trees
function compareTree(a, b) {
return a[0] < b[0] ? -1 : 1;
}
// Merge two trees together
// The roots of tree1 and tree2 must be the same revision
function mergeTree(in_tree1, in_tree2) {
var queue = [{tree1: in_tree1, tree2: in_tree2}];
var conflicts = false;
while (queue.length > 0) {
var item = queue.pop();
var tree1 = item.tree1;
var tree2 = item.tree2;
if (tree1[1].status || tree2[1].status) {
tree1[1].status =
(tree1[1].status === 'available' ||
tree2[1].status === 'available') ? 'available' : 'missing';
}
for (var i = 0; i < tree2[2].length; i++) {
if (!tree1[2][0]) {
conflicts = 'new_leaf';
tree1[2][0] = tree2[2][i];
continue;
}
var merged = false;
for (var j = 0; j < tree1[2].length; j++) {
if (tree1[2][j][0] === tree2[2][i][0]) {
queue.push({tree1: tree1[2][j], tree2: tree2[2][i]});
merged = true;
}
}
if (!merged) {
conflicts = 'new_branch';
insertSorted(tree1[2], tree2[2][i], compareTree);
}
}
}
return {conflicts: conflicts, tree: in_tree1};
}
function doMerge(tree, path, dontExpand) {
var restree = [];
var conflicts = false;
var merged = false;
var res;
if (!tree.length) {
return {tree: [path], conflicts: 'new_leaf'};
}
for (var i = 0, len = tree.length; i < len; i++) {
var branch = tree[i];
if (branch.pos === path.pos && branch.ids[0] === path.ids[0]) {
// Paths start at the same position and have the same root, so they need
// merged
res = mergeTree(branch.ids, path.ids);
restree.push({pos: branch.pos, ids: res.tree});
conflicts = conflicts || res.conflicts;
merged = true;
} else if (dontExpand !== true) {
// The paths start at a different position, take the earliest path and
// traverse up until it as at the same point from root as the path we
// want to merge. If the keys match we return the longer path with the
// other merged After stemming we dont want to expand the trees
var t1 = branch.pos < path.pos ? branch : path;
var t2 = branch.pos < path.pos ? path : branch;
var diff = t2.pos - t1.pos;
var candidateParents = [];
var trees = [];
trees.push({ids: t1.ids, diff: diff, parent: null, parentIdx: null});
while (trees.length > 0) {
var item = trees.pop();
if (item.diff === 0) {
if (item.ids[0] === t2.ids[0]) {
candidateParents.push(item);
}
continue;
}
var elements = item.ids[2];
for (var j = 0, elementsLen = elements.length; j < elementsLen; j++) {
trees.push({
ids: elements[j],
diff: item.diff - 1,
parent: item.ids,
parentIdx: j
});
}
}
var el = candidateParents[0];
if (!el) {
restree.push(branch);
} else {
res = mergeTree(el.ids, t2.ids);
el.parent[2][el.parentIdx] = res.tree;
restree.push({pos: t1.pos, ids: t1.ids});
conflicts = conflicts || res.conflicts;
merged = true;
}
} else {
restree.push(branch);
}
}
// We didnt find
if (!merged) {
restree.push(path);
}
restree.sort(sortByPos);
return {
tree: restree,
conflicts: conflicts || 'internal_node'
};
}
// To ensure we dont grow the revision tree infinitely, we stem old revisions
function stem(tree, depth) {
// First we break out the tree into a complete list of root to leaf paths
var paths = rootToLeaf(tree);
var maybeStem = {};
var result;
for (var i = 0, len = paths.length; i < len; i++) {
// Then for each path, we cut off the start of the path based on the
// `depth` to stem to, and generate a new set of flat trees
var path = paths[i];
var stemmed = path.ids;
var numStemmed = Math.max(0, stemmed.length - depth);
var stemmedNode = {
pos: path.pos + numStemmed,
ids: pathToTree(stemmed, numStemmed)
};
for (var s = 0; s < numStemmed; s++) {
var rev = (path.pos + s) + '-' + stemmed[s].id;
maybeStem[rev] = true;
}
// Then we remerge all those flat trees together, ensuring that we dont
// connect trees that would go beyond the depth limit
if (result) {
result = doMerge(result, stemmedNode, true).tree;
} else {
result = [stemmedNode];
}
}
traverseRevTree(result, function (isLeaf, pos, revHash) {
// some revisions may have been removed in a branch but not in another
delete maybeStem[pos + '-' + revHash];
});
return {
tree: result,
revs: Object.keys(maybeStem)
};
}
function merge(tree, path, depth) {
var newTree = doMerge(tree, path);
var stemmed = stem(newTree.tree, depth);
return {
tree: stemmed.tree,
stemmedRevs: stemmed.revs,
conflicts: newTree.conflicts
};
}
export default merge;