packages/dom/src/text-quote/describe.ts - incubator-annotator - Git at Google

 /**
  * @license
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 import seek from 'dom-seek';

 import { TextQuoteSelector } from '../../../selector/src';
 import { DomScope } from '../types';
 import { ownerDocument, rangeFromScope } from '../scope';

 export async function describeTextQuote(
   range: Range,
   scope: DomScope = ownerDocument(range).documentElement,
 ): Promise<TextQuoteSelector> {
   const exact = range.toString();

   const result: TextQuoteSelector = { type: 'TextQuoteSelector', exact };

   const { prefix, suffix } = calculateContextForDisambiguation(range, result, scope);
   result.prefix = prefix;
   result.suffix = suffix;

   return result
 }

 function calculateContextForDisambiguation(
   range: Range,
   selector: TextQuoteSelector,
   scope: DomScope,
 ): { prefix?: string, suffix?: string } {
   const exactText = selector.exact;
   const scopeText = rangeFromScope(scope).toString();
   const targetStartIndex = getRangeTextPosition(range, scope);
   const targetEndIndex = targetStartIndex + exactText.length;

   // Find all matches of the text in the scope.
   const stringMatches: number[] = [];
   let fromIndex = 0;
   while (fromIndex <= scopeText.length) {
     const matchIndex = scopeText.indexOf(exactText, fromIndex);
     if (matchIndex === -1) break;
     stringMatches.push(matchIndex);
     fromIndex = matchIndex + 1;
   }

   // Count for each undesired match the required prefix and suffix lengths, such that either of them
   // would have invalidated the match.
   const affixLengthPairs: Array<[number, number]> = [];
   for (const matchStartIndex of stringMatches) {
     const matchEndIndex = matchStartIndex + exactText.length

     // Skip the found match if it is the actual target.
     if (matchStartIndex === targetStartIndex)
       continue;

     // Count how many characters before & after them the false match and target have in common.
     const sufficientPrefixLength = charactersNeededToBeUnique(
       scopeText.substring(0, targetStartIndex),
       scopeText.substring(0, matchStartIndex),
       true,
     );
     const sufficientSuffixLength = charactersNeededToBeUnique(
       scopeText.substring(targetEndIndex),
       scopeText.substring(matchEndIndex),
       false,
     );
     affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]);
   }

   // Find the prefix and suffix that would invalidate all mismatches, using the minimal characters
   // for prefix and suffix combined.
   const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
   const prefix = scopeText.substring(targetStartIndex - prefixLength, targetStartIndex);
   const suffix = scopeText.substring(targetEndIndex, targetEndIndex + suffixLength);
   return { prefix, suffix };
 }

 function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) {
   // Count how many characters the two strings have in common.
   let overlap = 0;
   while (reverse
     ? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap]
     : target[overlap] === impostor[overlap]
   )
     overlap++;
   if (overlap === target.length)
     return Infinity; // (no substring of target can make it distinguishable from its impostor)
   else
     return overlap + 1;
 }

 function minimalSolution(requirements: Array<[number, number]>): [number, number] {
   // Ensure we try solutions with an empty prefix or suffix.
   requirements.push([0, 0]);

   // Build all the pairs and order them by their sums.
   const pairs = requirements.flatMap(l => requirements.map<[number, number]>(r => [l[0], r[1]]));
   pairs.sort((a, b) => a[0] + a[1] - (b[0] + b[1]));

   // Find the first pair that satisfies every requirement.
   for (const pair of pairs) {
     const [p0, p1] = pair;
     if (requirements.every(([r0, r1]) => r0 <= p0 || r1 <= p1)) {
       return pair;
     }
   }

   // Return the largest pairing (unreachable).
   return pairs[pairs.length - 1];
 }

 // Get the index of the first character of range within the text of scope.
 function getRangeTextPosition(range: Range, scope: DomScope): number {
   const scopeAsRange = rangeFromScope(scope);
   const iter = document.createNodeIterator(
     scopeAsRange.commonAncestorContainer,
     NodeFilter.SHOW_TEXT,
     {
       acceptNode(node: Text) {
         // Only reveal nodes within the range
         return scopeAsRange.intersectsNode(node)
           ? NodeFilter.FILTER_ACCEPT
           : NodeFilter.FILTER_REJECT
       },
     },
   );
   if (isTextNode(range.startContainer))
     return seek(iter, range.startContainer) + range.startOffset;
   else
     return seek(iter, firstTextNodeInRange(range));
 }

 function firstTextNodeInRange(range: Range): Text {
   // Find the first text node inside the range.
   const iter = document.createNodeIterator(
     range.commonAncestorContainer,
     NodeFilter.SHOW_TEXT,
     {
       acceptNode(node: Text) {
         // Only reveal nodes within the range; and skip any empty text nodes.
         return range.intersectsNode(node) && node.length > 0
           ? NodeFilter.FILTER_ACCEPT
           : NodeFilter.FILTER_REJECT
       },
     },
   );
   const node = iter.nextNode() as Text | null;
   if (node === null) throw new Error('Range contains no text nodes');
   return node;
 }

 function isTextNode(node: Node): node is Text {
   return node.nodeType === Node.TEXT_NODE;
 }
	/**
	* @license
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	import seek from 'dom-seek';

	import { TextQuoteSelector } from '../../../selector/src';
	import { DomScope } from '../types';
	import { ownerDocument, rangeFromScope } from '../scope';

	export async function describeTextQuote(
	range: Range,
	scope: DomScope = ownerDocument(range).documentElement,
	): Promise<TextQuoteSelector> {
	const exact = range.toString();

	const result: TextQuoteSelector = { type: 'TextQuoteSelector', exact };

	const { prefix, suffix } = calculateContextForDisambiguation(range, result, scope);
	result.prefix = prefix;
	result.suffix = suffix;

	return result
	}

	function calculateContextForDisambiguation(
	range: Range,
	selector: TextQuoteSelector,
	scope: DomScope,
	): { prefix?: string, suffix?: string } {
	const exactText = selector.exact;
	const scopeText = rangeFromScope(scope).toString();
	const targetStartIndex = getRangeTextPosition(range, scope);
	const targetEndIndex = targetStartIndex + exactText.length;

	// Find all matches of the text in the scope.
	const stringMatches: number[] = [];
	let fromIndex = 0;
	while (fromIndex <= scopeText.length) {
	const matchIndex = scopeText.indexOf(exactText, fromIndex);
	if (matchIndex === -1) break;
	stringMatches.push(matchIndex);
	fromIndex = matchIndex + 1;
	}

	// Count for each undesired match the required prefix and suffix lengths, such that either of them
	// would have invalidated the match.
	const affixLengthPairs: Array<[number, number]> = [];
	for (const matchStartIndex of stringMatches) {
	const matchEndIndex = matchStartIndex + exactText.length

	// Skip the found match if it is the actual target.
	if (matchStartIndex === targetStartIndex)
	continue;

	// Count how many characters before & after them the false match and target have in common.
	const sufficientPrefixLength = charactersNeededToBeUnique(
	scopeText.substring(0, targetStartIndex),
	scopeText.substring(0, matchStartIndex),
	true,
	);
	const sufficientSuffixLength = charactersNeededToBeUnique(
	scopeText.substring(targetEndIndex),
	scopeText.substring(matchEndIndex),
	false,
	);
	affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]);
	}

	// Find the prefix and suffix that would invalidate all mismatches, using the minimal characters
	// for prefix and suffix combined.
	const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
	const prefix = scopeText.substring(targetStartIndex - prefixLength, targetStartIndex);
	const suffix = scopeText.substring(targetEndIndex, targetEndIndex + suffixLength);
	return { prefix, suffix };
	}

	function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) {
	// Count how many characters the two strings have in common.
	let overlap = 0;
	while (reverse
	? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap]
	: target[overlap] === impostor[overlap]
	)
	overlap++;
	if (overlap === target.length)
	return Infinity; // (no substring of target can make it distinguishable from its impostor)
	else
	return overlap + 1;
	}

	function minimalSolution(requirements: Array<[number, number]>): [number, number] {
	// Ensure we try solutions with an empty prefix or suffix.
	requirements.push([0, 0]);

	// Build all the pairs and order them by their sums.
	const pairs = requirements.flatMap(l => requirements.map<[number, number]>(r => [l[0], r[1]]));
	pairs.sort((a, b) => a[0] + a[1] - (b[0] + b[1]));

	// Find the first pair that satisfies every requirement.
	for (const pair of pairs) {
	const [p0, p1] = pair;
	if (requirements.every(([r0, r1]) => r0 <= p0 \|\| r1 <= p1)) {
	return pair;
	}
	}

	// Return the largest pairing (unreachable).
	return pairs[pairs.length - 1];
	}

	// Get the index of the first character of range within the text of scope.
	function getRangeTextPosition(range: Range, scope: DomScope): number {
	const scopeAsRange = rangeFromScope(scope);
	const iter = document.createNodeIterator(
	scopeAsRange.commonAncestorContainer,
	NodeFilter.SHOW_TEXT,
	{
	acceptNode(node: Text) {
	// Only reveal nodes within the range
	return scopeAsRange.intersectsNode(node)
	? NodeFilter.FILTER_ACCEPT
	: NodeFilter.FILTER_REJECT
	},
	},
	);
	if (isTextNode(range.startContainer))
	return seek(iter, range.startContainer) + range.startOffset;
	else
	return seek(iter, firstTextNodeInRange(range));
	}

	function firstTextNodeInRange(range: Range): Text {
	// Find the first text node inside the range.
	const iter = document.createNodeIterator(
	range.commonAncestorContainer,
	NodeFilter.SHOW_TEXT,
	{
	acceptNode(node: Text) {
	// Only reveal nodes within the range; and skip any empty text nodes.
	return range.intersectsNode(node) && node.length > 0
	? NodeFilter.FILTER_ACCEPT
	: NodeFilter.FILTER_REJECT
	},
	},
	);
	const node = iter.nextNode() as Text \| null;
	if (node === null) throw new Error('Range contains no text nodes');
	return node;
	}

	function isTextNode(node: Node): node is Text {
	return node.nodeType === Node.TEXT_NODE;
	}