| /** |
| * @license |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| import { assert } from 'chai'; |
| import { createTextQuoteSelectorMatcher } from '../../src/text-quote/match'; |
| import { TextQuoteSelector } from '../../../selector/src/types'; |
| import { DomScope } from '../../src/types'; |
| import testCases from './match-cases'; |
| import { evaluateXPath, RangeInfo } from './utils'; |
| |
| const domParser = new window.DOMParser(); |
| |
| describe('createTextQuoteSelectorMatcher', () => { |
| for (const [name, { html, selector, expected }] of Object.entries(testCases)) { |
| it(`works for case: '${name}'`, async () => { |
| const doc = domParser.parseFromString(html, 'text/html'); |
| await testMatcher(doc, doc, selector, expected); |
| }); |
| } |
| |
| it('handles adjacent text nodes', async () => { |
| const { html, selector } = testCases['simple']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| const textNode = evaluateXPath(doc, '//b/text()') as Text; |
| |
| for (let index = textNode.length - 1; index > 0; index--) |
| textNode.splitText(index); |
| // console.log([...textNode.parentNode.childNodes].map(node => node.textContent)) |
| // → 'l', 'o', 'r', 'e', 'm', … |
| |
| await testMatcher(doc, doc, selector, [ |
| { |
| startContainerXPath: '//b/text()[13]', |
| startOffset: 0, |
| endContainerXPath: '//b/text()[21]', |
| endOffset: 0, |
| }, |
| ]); |
| }); |
| |
| it('handles empty text nodes', async () => { |
| const { html, selector } = testCases['simple']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| const textNode = evaluateXPath(doc, '//b/text()') as Text; |
| textNode.splitText(textNode.length); |
| textNode.splitText(20); |
| textNode.splitText(20); |
| textNode.splitText(17); |
| textNode.splitText(17); |
| textNode.splitText(12); |
| textNode.splitText(12); |
| textNode.splitText(0); |
| // console.log([...textNode.parentNode.childNodes].map(node => node.textContent)) |
| // → '', 'lorem ipsum ', '', 'dolor', '', ' am', '', 'et yada yada', '' |
| |
| await testMatcher(doc, doc, selector, [ |
| { |
| startContainerXPath: '//b/text()[4]', // "dolor" |
| startOffset: 0, |
| endContainerXPath: '//b/text()[8]', // "et yada yada" |
| endOffset: 0, |
| }, |
| ]); |
| }); |
| |
| it('works with parent of text as scope', async () => { |
| const { html, selector, expected } = testCases['simple']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| await testMatcher(doc, evaluateXPath(doc, '//b'), selector, expected); |
| }); |
| |
| it('works with parent of text as scope, when matching its first characters', async () => { |
| const { html, selector, expected } = testCases['first characters']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| await testMatcher(doc, evaluateXPath(doc, '//b'), selector, expected); |
| }); |
| |
| it('works with parent of text as scope, when matching its first characters, with an empty text node', async () => { |
| const { html, selector } = testCases['first characters']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| const textNode = evaluateXPath(doc, '//b/text()') as Text; |
| textNode.splitText(0); |
| |
| await testMatcher(doc, evaluateXPath(doc, '//b'), selector, [ |
| { |
| startContainerXPath: '//b/text()[2]', |
| startOffset: 0, |
| endContainerXPath: '//b/text()[2]', |
| endOffset: 11, |
| }, |
| ]); |
| }); |
| |
| it('works when scope is a Range within one text node', async () => { |
| const { html, selector, expected } = testCases['simple']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| // Use the substring ‘ipsum dolor amet’ as scope. |
| const scope = document.createRange(); |
| scope.setStart(evaluateXPath(doc, '//b/text()'), 6); |
| scope.setEnd(evaluateXPath(doc, '//b/text()'), 22); |
| await testMatcher(doc, scope, selector, expected); |
| }); |
| |
| it('works when scope is a Range with both ends inside text nodes', async () => { |
| const { html, selector, expected } = testCases['across elements']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| // Use the substring ‘sum dolor am’ as scope. |
| const scope = document.createRange(); |
| scope.setStart(evaluateXPath(doc, '//i/text()'), 2); |
| scope.setEnd(evaluateXPath(doc, '//u/text()'), 2); |
| await testMatcher(doc, scope, selector, expected); |
| }); |
| |
| it('works when scope is a Range with both ends inside elements', async () => { |
| const { html, selector, expected } = testCases['across elements']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| const scope = document.createRange(); |
| scope.setStart(evaluateXPath(doc, '//b'), 1); // before the <i> |
| scope.setEnd(evaluateXPath(doc, '//b'), 4); // before the " yada yada" |
| await testMatcher(doc, scope, selector, expected); |
| }); |
| |
| it('ignores quote when scope is an empty range', async () => { |
| const { html, selector } = testCases['simple']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| const scope = document.createRange(); |
| await testMatcher(doc, scope, selector, []); |
| }); |
| |
| it('ignores quote extending just beyond scope', async () => { |
| const { html, selector } = testCases['simple']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| const scope = document.createRange(); |
| scope.setStart(evaluateXPath(doc, '//b/text()'), 0); |
| scope.setEnd(evaluateXPath(doc, '//b/text()'), 19); |
| await testMatcher(doc, scope, selector, []); |
| }); |
| |
| it('ignores quote starting just before scope', async () => { |
| const { html, selector } = testCases['simple']; |
| const doc = domParser.parseFromString(html, 'text/html'); |
| |
| const scope = document.createRange(); |
| scope.setStart(evaluateXPath(doc, '//b/text()'), 13); |
| scope.setEnd(evaluateXPath(doc, '//b/text()'), 32); |
| await testMatcher(doc, scope, selector, []); |
| }); |
| }); |
| |
| async function testMatcher( |
| doc: Document, |
| scope: DomScope, |
| selector: TextQuoteSelector, |
| expected: RangeInfo[], |
| ) { |
| const matcher = createTextQuoteSelectorMatcher(selector); |
| const matches = []; |
| for await (const value of matcher(scope)) |
| matches.push(value); |
| assert.equal(matches.length, expected.length); |
| matches.forEach((match, i) => { |
| const expectedRange = expected[i]; |
| const expectedStartContainer = evaluateXPath(doc, expectedRange.startContainerXPath); |
| const expectedEndContainer = evaluateXPath(doc, expectedRange.endContainerXPath); |
| assert(match.startContainer === expectedStartContainer, |
| `unexpected start container: ${prettyNodeName(match.startContainer)}; ` |
| + `expected ${prettyNodeName(expectedStartContainer)}` |
| ); |
| assert.equal(match.startOffset, expectedRange.startOffset); |
| assert(match.endContainer === evaluateXPath(doc, expectedRange.endContainerXPath), |
| `unexpected end container: ${prettyNodeName(match.endContainer)}; ` |
| + `expected ${prettyNodeName(expectedEndContainer)}` |
| ); |
| assert.equal(match.endOffset, expectedRange.endOffset); |
| }); |
| } |
| |
| function prettyNodeName(node: Node) { |
| switch (node.nodeType) { |
| case Node.TEXT_NODE: |
| const text = (node as Text).nodeValue; |
| return `#text "${text.length > 50 ? text.substring(0, 50) + '…' : text}"`; |
| case Node.ELEMENT_NODE: |
| return `<${(node as Element).tagName.toLowerCase()}>`; |
| default: |
| return node.nodeName.toLowerCase(); |
| } |
| } |