blob: 784881c3c14d551c27988f66cc3978d29808fe9b [file] [log] [blame]
/**
* @license
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import seek from 'dom-seek';
import { TextQuoteSelector } from '../../../selector/src';
import { DomScope } from '../types';
import { ownerDocument, rangeFromScope } from '../scope';
export async function describeTextQuote(
range: Range,
scope: DomScope = ownerDocument(range).documentElement,
): Promise<TextQuoteSelector> {
const exact = range.toString();
const result: TextQuoteSelector = { type: 'TextQuoteSelector', exact };
const { prefix, suffix } = calculateContextForDisambiguation(range, result, scope);
result.prefix = prefix;
result.suffix = suffix;
return result
}
function calculateContextForDisambiguation(
range: Range,
selector: TextQuoteSelector,
scope: DomScope,
): { prefix?: string, suffix?: string } {
const exactText = selector.exact;
const scopeText = rangeFromScope(scope).toString();
const targetStartIndex = getRangeTextPosition(range, scope);
const targetEndIndex = targetStartIndex + exactText.length;
// Find all matches of the text in the scope.
const stringMatches: number[] = [];
let fromIndex = 0;
while (fromIndex <= scopeText.length) {
const matchIndex = scopeText.indexOf(exactText, fromIndex);
if (matchIndex === -1) break;
stringMatches.push(matchIndex);
fromIndex = matchIndex + 1;
}
// Count for each undesired match the required prefix and suffix lengths, such that either of them
// would have invalidated the match.
const affixLengthPairs: Array<[number, number]> = [];
for (const matchStartIndex of stringMatches) {
const matchEndIndex = matchStartIndex + exactText.length
// Skip the found match if it is the actual target.
if (matchStartIndex === targetStartIndex)
continue;
// Count how many characters before & after them the false match and target have in common.
const sufficientPrefixLength = charactersNeededToBeUnique(
scopeText.substring(0, targetStartIndex),
scopeText.substring(0, matchStartIndex),
true,
);
const sufficientSuffixLength = charactersNeededToBeUnique(
scopeText.substring(targetEndIndex),
scopeText.substring(matchEndIndex),
false,
);
affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]);
}
// Find the prefix and suffix that would invalidate all mismatches, using the minimal characters
// for prefix and suffix combined.
const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
const prefix = scopeText.substring(targetStartIndex - prefixLength, targetStartIndex);
const suffix = scopeText.substring(targetEndIndex, targetEndIndex + suffixLength);
return { prefix, suffix };
}
function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) {
// Count how many characters the two strings have in common.
let overlap = 0;
while (reverse
? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap]
: target[overlap] === impostor[overlap]
)
overlap++;
if (overlap === target.length)
return Infinity; // (no substring of target can make it distinguishable from its impostor)
else
return overlap + 1;
}
function minimalSolution(requirements: Array<[number, number]>): [number, number] {
// Ensure we try solutions with an empty prefix or suffix.
requirements.push([0, 0]);
// Build all the pairs and order them by their sums.
const pairs = requirements.flatMap(l => requirements.map<[number, number]>(r => [l[0], r[1]]));
pairs.sort((a, b) => a[0] + a[1] - (b[0] + b[1]));
// Find the first pair that satisfies every requirement.
for (const pair of pairs) {
const [p0, p1] = pair;
if (requirements.every(([r0, r1]) => r0 <= p0 || r1 <= p1)) {
return pair;
}
}
// Return the largest pairing (unreachable).
return pairs[pairs.length - 1];
}
// Get the index of the first character of range within the text of scope.
function getRangeTextPosition(range: Range, scope: DomScope): number {
const scopeAsRange = rangeFromScope(scope);
const iter = document.createNodeIterator(
scopeAsRange.commonAncestorContainer,
NodeFilter.SHOW_TEXT,
{
acceptNode(node: Text) {
// Only reveal nodes within the range
return scopeAsRange.intersectsNode(node)
? NodeFilter.FILTER_ACCEPT
: NodeFilter.FILTER_REJECT
},
},
);
if (isTextNode(range.startContainer))
return seek(iter, range.startContainer) + range.startOffset;
else
return seek(iter, firstTextNodeInRange(range));
}
function firstTextNodeInRange(range: Range): Text {
// Find the first text node inside the range.
const iter = document.createNodeIterator(
range.commonAncestorContainer,
NodeFilter.SHOW_TEXT,
{
acceptNode(node: Text) {
// Only reveal nodes within the range; and skip any empty text nodes.
return range.intersectsNode(node) && node.length > 0
? NodeFilter.FILTER_ACCEPT
: NodeFilter.FILTER_REJECT
},
},
);
const node = iter.nextNode() as Text | null;
if (node === null) throw new Error('Range contains no text nodes');
return node;
}
function isTextNode(node: Node): node is Text {
return node.nodeType === Node.TEXT_NODE;
}