| "use strict"; |
| |
| // Simulations show these probabilities for a single change |
| // 93.1% that one group is invalidated |
| // 4.8% that two groups are invalidated |
| // 1.1% that 3 groups are invalidated |
| // 0.1% that 4 or more groups are invalidated |
| // |
| // And these for removing/adding 10 lexically adjacent files |
| // 64.5% that one group is invalidated |
| // 24.8% that two groups are invalidated |
| // 7.8% that 3 groups are invalidated |
| // 2.7% that 4 or more groups are invalidated |
| // |
| // And these for removing/adding 3 random files |
| // 0% that one group is invalidated |
| // 3.7% that two groups are invalidated |
| // 80.8% that 3 groups are invalidated |
| // 12.3% that 4 groups are invalidated |
| // 3.2% that 5 or more groups are invalidated |
| |
| /** |
| * |
| * @param {string} a key |
| * @param {string} b key |
| * @returns {number} the similarity as number |
| */ |
| const similarity = (a, b) => { |
| const l = Math.min(a.length, b.length); |
| let dist = 0; |
| for (let i = 0; i < l; i++) { |
| const ca = a.charCodeAt(i); |
| const cb = b.charCodeAt(i); |
| dist += Math.max(0, 10 - Math.abs(ca - cb)); |
| } |
| return dist; |
| }; |
| |
| /** |
| * @param {string} a key |
| * @param {string} b key |
| * @returns {string} the common part and a single char for the difference |
| */ |
| const getName = (a, b) => { |
| const l = Math.min(a.length, b.length); |
| let r = ""; |
| for (let i = 0; i < l; i++) { |
| const ca = a.charAt(i); |
| const cb = b.charAt(i); |
| r += ca; |
| if (ca === cb) { |
| continue; |
| } |
| return r; |
| } |
| return a; |
| }; |
| |
| /** |
| * @template T |
| */ |
| class Node { |
| /** |
| * @param {T} item item |
| * @param {string} key key |
| * @param {number} size size |
| */ |
| constructor(item, key, size) { |
| this.item = item; |
| this.key = key; |
| this.size = size; |
| } |
| } |
| |
| /** |
| * @template T |
| */ |
| class Group { |
| /** |
| * @param {Node<T>[]} nodes nodes |
| * @param {number[]} similarities similarities between the nodes (length = nodes.length - 1) |
| */ |
| constructor(nodes, similarities) { |
| this.nodes = nodes; |
| this.similarities = similarities; |
| this.size = nodes.reduce((size, node) => size + node.size, 0); |
| /** @type {string} */ |
| this.key = undefined; |
| } |
| } |
| |
| /** |
| * @template T |
| * @typedef {Object} GroupedItems<T> |
| * @property {string} key |
| * @property {T[]} items |
| * @property {number} size |
| */ |
| |
| /** |
| * @template T |
| * @typedef {Object} Options |
| * @property {number} maxSize maximum size of a group |
| * @property {number} minSize minimum size of a group (preferred over maximum size) |
| * @property {Iterable<T>} items a list of items |
| * @property {function(T): number} getSize function to get size of an item |
| * @property {function(T): string} getKey function to get the key of an item |
| */ |
| |
| /** |
| * @template T |
| * @param {Options<T>} options options object |
| * @returns {GroupedItems<T>[]} grouped items |
| */ |
| module.exports = ({ maxSize, minSize, items, getSize, getKey }) => { |
| /** @type {Group<T>[]} */ |
| const result = []; |
| |
| const nodes = Array.from( |
| items, |
| item => new Node(item, getKey(item), getSize(item)) |
| ); |
| |
| /** @type {Node<T>[]} */ |
| const initialNodes = []; |
| |
| // lexically ordering of keys |
| nodes.sort((a, b) => { |
| if (a.key < b.key) return -1; |
| if (a.key > b.key) return 1; |
| return 0; |
| }); |
| |
| // return nodes bigger than maxSize directly as group |
| for (const node of nodes) { |
| if (node.size >= maxSize) { |
| result.push(new Group([node], [])); |
| } else { |
| initialNodes.push(node); |
| } |
| } |
| |
| if (initialNodes.length > 0) { |
| // calculate similarities between lexically adjacent nodes |
| /** @type {number[]} */ |
| const similarities = []; |
| for (let i = 1; i < initialNodes.length; i++) { |
| const a = initialNodes[i - 1]; |
| const b = initialNodes[i]; |
| similarities.push(similarity(a.key, b.key)); |
| } |
| |
| const initialGroup = new Group(initialNodes, similarities); |
| |
| if (initialGroup.size < minSize) { |
| // We hit an edgecase where the working set is already smaller than minSize |
| // We merge it with the smallest result node to keep minSize intact |
| if (result.length > 0) { |
| const smallestGroup = result.reduce((min, group) => |
| min.size > group.size ? group : min |
| ); |
| for (const node of initialGroup.nodes) smallestGroup.nodes.push(node); |
| smallestGroup.nodes.sort((a, b) => { |
| if (a.key < b.key) return -1; |
| if (a.key > b.key) return 1; |
| return 0; |
| }); |
| } else { |
| // There are no other nodes |
| // We use all nodes and have to accept that it's smaller than minSize |
| result.push(initialGroup); |
| } |
| } else { |
| const queue = [initialGroup]; |
| |
| while (queue.length) { |
| const group = queue.pop(); |
| // only groups bigger than maxSize need to be splitted |
| if (group.size < maxSize) { |
| result.push(group); |
| continue; |
| } |
| |
| // find unsplittable area from left and right |
| // going minSize from left and right |
| // at least one node need to be included otherwise we get stuck |
| let left = 0; |
| let leftSize = 0; |
| while (leftSize <= minSize) { |
| leftSize += group.nodes[left].size; |
| left++; |
| } |
| let right = group.nodes.length - 1; |
| let rightSize = 0; |
| while (rightSize <= minSize) { |
| rightSize += group.nodes[right].size; |
| right--; |
| } |
| |
| if (left - 1 > right) { |
| // can't split group while holding minSize |
| // because minSize is preferred of maxSize we return |
| // the group here even while it's too big |
| // To avoid this make sure maxSize > minSize * 3 |
| result.push(group); |
| continue; |
| } |
| if (left <= right) { |
| // when there is a area between left and right |
| // we look for best split point |
| // we split at the minimum similarity |
| // here key space is separated the most |
| let best = left - 1; |
| let bestSimilarity = group.similarities[best]; |
| for (let i = left; i <= right; i++) { |
| const similarity = group.similarities[i]; |
| if (similarity < bestSimilarity) { |
| best = i; |
| bestSimilarity = similarity; |
| } |
| } |
| left = best + 1; |
| right = best; |
| } |
| |
| // create two new groups for left and right area |
| // and queue them up |
| const rightNodes = [group.nodes[right + 1]]; |
| /** @type {number[]} */ |
| const rightSimilaries = []; |
| for (let i = right + 2; i < group.nodes.length; i++) { |
| rightSimilaries.push(group.similarities[i - 1]); |
| rightNodes.push(group.nodes[i]); |
| } |
| queue.push(new Group(rightNodes, rightSimilaries)); |
| |
| const leftNodes = [group.nodes[0]]; |
| /** @type {number[]} */ |
| const leftSimilaries = []; |
| for (let i = 1; i < left; i++) { |
| leftSimilaries.push(group.similarities[i - 1]); |
| leftNodes.push(group.nodes[i]); |
| } |
| queue.push(new Group(leftNodes, leftSimilaries)); |
| } |
| } |
| } |
| |
| // lexically ordering |
| result.sort((a, b) => { |
| if (a.nodes[0].key < b.nodes[0].key) return -1; |
| if (a.nodes[0].key > b.nodes[0].key) return 1; |
| return 0; |
| }); |
| |
| // give every group a name |
| for (let i = 0; i < result.length; i++) { |
| const group = result[i]; |
| const first = group.nodes[0]; |
| const last = group.nodes[group.nodes.length - 1]; |
| let name = getName(first.key, last.key); |
| group.key = name; |
| } |
| |
| // return the results |
| return result.map(group => { |
| /** @type {GroupedItems} */ |
| return { |
| key: group.key, |
| items: group.nodes.map(node => node.item), |
| size: group.size |
| }; |
| }); |
| }; |