| /** |
| * @license |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| import seek from 'dom-seek'; |
| |
| import { TextQuoteSelector } from '../../../selector/src'; |
| import { DomScope } from '../types'; |
| import { ownerDocument, rangeFromScope } from '../scope'; |
| |
| export async function describeTextQuote( |
| range: Range, |
| scope: DomScope = ownerDocument(range).documentElement, |
| ): Promise<TextQuoteSelector> { |
| const exact = range.toString(); |
| |
| const result: TextQuoteSelector = { type: 'TextQuoteSelector', exact }; |
| |
| const { prefix, suffix } = calculateContextForDisambiguation(range, result, scope); |
| result.prefix = prefix; |
| result.suffix = suffix; |
| |
| return result |
| } |
| |
| function calculateContextForDisambiguation( |
| range: Range, |
| selector: TextQuoteSelector, |
| scope: DomScope, |
| ): { prefix?: string, suffix?: string } { |
| const exactText = selector.exact; |
| const scopeText = rangeFromScope(scope).toString(); |
| const targetStartIndex = getRangeTextPosition(range, scope); |
| const targetEndIndex = targetStartIndex + exactText.length; |
| |
| // Find all matches of the text in the scope. |
| const stringMatches: number[] = []; |
| let fromIndex = 0; |
| while (fromIndex <= scopeText.length) { |
| const matchIndex = scopeText.indexOf(exactText, fromIndex); |
| if (matchIndex === -1) break; |
| stringMatches.push(matchIndex); |
| fromIndex = matchIndex + 1; |
| } |
| |
| // Count for each undesired match the required prefix and suffix lengths, such that either of them |
| // would have invalidated the match. |
| const affixLengthPairs: Array<[number, number]> = []; |
| for (const matchStartIndex of stringMatches) { |
| const matchEndIndex = matchStartIndex + exactText.length |
| |
| // Skip the found match if it is the actual target. |
| if (matchStartIndex === targetStartIndex) |
| continue; |
| |
| // Count how many characters before & after them the false match and target have in common. |
| const sufficientPrefixLength = charactersNeededToBeUnique( |
| scopeText.substring(0, targetStartIndex), |
| scopeText.substring(0, matchStartIndex), |
| true, |
| ); |
| const sufficientSuffixLength = charactersNeededToBeUnique( |
| scopeText.substring(targetEndIndex), |
| scopeText.substring(matchEndIndex), |
| false, |
| ); |
| affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]); |
| } |
| |
| // Find the prefix and suffix that would invalidate all mismatches, using the minimal characters |
| // for prefix and suffix combined. |
| const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs); |
| const prefix = scopeText.substring(targetStartIndex - prefixLength, targetStartIndex); |
| const suffix = scopeText.substring(targetEndIndex, targetEndIndex + suffixLength); |
| return { prefix, suffix }; |
| } |
| |
| function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) { |
| // Count how many characters the two strings have in common. |
| let overlap = 0; |
| while (reverse |
| ? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap] |
| : target[overlap] === impostor[overlap] |
| ) |
| overlap++; |
| if (overlap === target.length) |
| return Infinity; // (no substring of target can make it distinguishable from its impostor) |
| else |
| return overlap + 1; |
| } |
| |
| function minimalSolution(requirements: Array<[number, number]>): [number, number] { |
| // Ensure we try solutions with an empty prefix or suffix. |
| requirements.push([0, 0]); |
| |
| // Build all the pairs and order them by their sums. |
| const pairs = requirements.flatMap(l => requirements.map<[number, number]>(r => [l[0], r[1]])); |
| pairs.sort((a, b) => a[0] + a[1] - (b[0] + b[1])); |
| |
| // Find the first pair that satisfies every requirement. |
| for (const pair of pairs) { |
| const [p0, p1] = pair; |
| if (requirements.every(([r0, r1]) => r0 <= p0 || r1 <= p1)) { |
| return pair; |
| } |
| } |
| |
| // Return the largest pairing (unreachable). |
| return pairs[pairs.length - 1]; |
| } |
| |
| // Get the index of the first character of range within the text of scope. |
| function getRangeTextPosition(range: Range, scope: DomScope): number { |
| const scopeAsRange = rangeFromScope(scope); |
| const iter = document.createNodeIterator( |
| scopeAsRange.commonAncestorContainer, |
| NodeFilter.SHOW_TEXT, |
| { |
| acceptNode(node: Text) { |
| // Only reveal nodes within the range |
| return scopeAsRange.intersectsNode(node) |
| ? NodeFilter.FILTER_ACCEPT |
| : NodeFilter.FILTER_REJECT |
| }, |
| }, |
| ); |
| if (isTextNode(range.startContainer)) |
| return seek(iter, range.startContainer) + range.startOffset; |
| else |
| return seek(iter, firstTextNodeInRange(range)); |
| } |
| |
| function firstTextNodeInRange(range: Range): Text { |
| // Find the first text node inside the range. |
| const iter = document.createNodeIterator( |
| range.commonAncestorContainer, |
| NodeFilter.SHOW_TEXT, |
| { |
| acceptNode(node: Text) { |
| // Only reveal nodes within the range; and skip any empty text nodes. |
| return range.intersectsNode(node) && node.length > 0 |
| ? NodeFilter.FILTER_ACCEPT |
| : NodeFilter.FILTER_REJECT |
| }, |
| }, |
| ); |
| const node = iter.nextNode() as Text | null; |
| if (node === null) throw new Error('Range contains no text nodes'); |
| return node; |
| } |
| |
| function isTextNode(node: Node): node is Text { |
| return node.nodeType === Node.TEXT_NODE; |
| } |