blob: 2ca29e66d5291621c77993a068282e51137652a9 [file] [log] [blame]
/**
* @license
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import type {
TextQuoteSelector,
DescribeTextQuoteOptions,
} from '@apache-annotator/selector';
import type { RangeInfo } from '../utils';
export interface DescribeTextQuoteTestCases {
[name: string]: {
html: string;
range: RangeInfo;
options: DescribeTextQuoteOptions;
expected: TextQuoteSelector;
};
}
export const testCasesWithoutOptions: DescribeTextQuoteTestCases = {
'no context': {
html: '<b>lorem ipsum dolor amet yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 12,
endContainerXPath: '//b/text()',
endOffset: 22,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'dolor amet',
prefix: '',
suffix: '',
},
},
'use prefix to complete word': {
html: '<b>lorem ipsum dolor amet yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 14,
endContainerXPath: '//b/text()',
endOffset: 22,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'lor amet',
prefix: 'do',
suffix: '',
},
},
'use suffix to complete word': {
html: '<b>lorem ipsum dolor amet yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 12,
endContainerXPath: '//b/text()',
endOffset: 20,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'dolor am',
prefix: '',
suffix: 'et',
},
},
'add context to disambiguate': {
html: '<b>To annotate or not to annotate</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 15,
endContainerXPath: '//b/text()',
endOffset: 18,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'not',
prefix: 'or ',
suffix: ' to',
},
},
'only prefix for end of text': {
html: '<b>To annotate or not to annotate</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 22,
endContainerXPath: '//b/text()',
endOffset: 30,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'annotate',
prefix: 'to ',
suffix: '',
},
},
'only suffix for start of text': {
html: '<b>annotate or not to annotate, yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 0,
endContainerXPath: '//b/text()',
endOffset: 8,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'annotate',
prefix: '',
suffix: ' or',
},
},
'multiple, overlapping false matches': {
html: '<b>a a a a a a a a a a</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 8,
endContainerXPath: '//b/text()',
endOffset: 13,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'a a a',
prefix: 'a a a a ',
suffix: ' a a a',
},
},
'empty quote': {
html: '<b>To annotate or not to annotate</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 11,
endContainerXPath: '//b/text()',
endOffset: 11,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: '',
prefix: 'To annotate',
suffix: ' or',
},
},
'across elements': {
html: '<b>To annotate or <i>not</i> to <u>anno</u>tat</b>e',
range: {
startContainerXPath: '//u/text()',
startOffset: 0,
endContainerXPath: '//b/text()[3]',
endOffset: 2,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'annota',
prefix: 'to ',
suffix: 'te',
},
},
};
export const testCasesWithMinimalContext: DescribeTextQuoteTestCases = {
'no context': {
html: '<b>lorem ipsum dolor amet yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 12,
endContainerXPath: '//b/text()',
endOffset: 20,
},
options: {
minimalContext: true,
},
expected: {
type: 'TextQuoteSelector',
exact: 'dolor am',
prefix: '',
suffix: '',
},
},
'minimal prefix': {
html: '<b>To annotate or not to annotate.</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 22,
endContainerXPath: '//b/text()',
endOffset: 26,
},
options: {
minimalContext: true,
},
expected: {
type: 'TextQuoteSelector',
exact: 'anno',
prefix: 'to ',
suffix: '',
},
},
'minimal suffix': {
html: '<b>To annotate or not to annotate.</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 7,
endContainerXPath: '//b/text()',
endOffset: 11,
},
options: {
minimalContext: true,
},
expected: {
type: 'TextQuoteSelector',
exact: 'tate',
prefix: '',
suffix: ' ',
},
},
'use suffix for start of text': {
html: '<b>to annotate or not to annotate.</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 0,
endContainerXPath: '//b/text()',
endOffset: 2,
},
options: {
minimalContext: true,
},
expected: {
type: 'TextQuoteSelector',
exact: 'to',
prefix: '',
suffix: ' annotate ',
},
},
'use prefix for end of text': {
html: '<b>To annotate or not to annotate</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 26,
endContainerXPath: '//b/text()',
endOffset: 30,
},
options: {
minimalContext: true,
},
expected: {
type: 'TextQuoteSelector',
exact: 'tate',
prefix: 'to anno',
suffix: '',
},
},
'multiple, overlapping false matches': {
html: '<b>aaaaaaaaaa</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 4,
endContainerXPath: '//b/text()',
endOffset: 7,
},
options: {
minimalContext: true,
},
expected: {
type: 'TextQuoteSelector',
exact: 'aaa',
prefix: 'aaaa',
suffix: 'aaa',
},
},
'empty quote': {
html: '<b>To annotate or not to annotate</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 11,
endContainerXPath: '//b/text()',
endOffset: 11,
},
options: {
minimalContext: true,
},
expected: {
type: 'TextQuoteSelector',
exact: '',
prefix: 'e',
suffix: ' ',
},
},
};
export const testCasesWithMinimumQuoteLength: DescribeTextQuoteTestCases = {
'balance prefix and suffix': {
html: '<b>lorem ipsum dolor amet yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 12,
endContainerXPath: '//b/text()',
endOffset: 17,
},
options: {
minimumQuoteLength: 10,
},
expected: {
type: 'TextQuoteSelector',
exact: 'dolor',
prefix: 'ipsum ',
suffix: ' amet',
},
},
'use prefix for end of text': {
html: '<b>lorem ipsum dolor amet yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 28,
endContainerXPath: '//b/text()',
endOffset: 30,
},
options: {
minimumQuoteLength: 10,
},
expected: {
type: 'TextQuoteSelector',
exact: 'ya',
prefix: 'amet yada ',
suffix: 'da',
},
},
'use suffix for start of text': {
html: '<b>lorem ipsum dolor amet yada yada</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 2,
endContainerXPath: '//b/text()',
endOffset: 3,
},
options: {
minimumQuoteLength: 10,
},
expected: {
type: 'TextQuoteSelector',
exact: 'r',
prefix: 'lo',
suffix: 'em ipsum',
},
},
};
export const testCasesWithMaxWordLength: DescribeTextQuoteTestCases = {
'too long prefix': {
html: '<b>Surely counterantidisintermediationism is too long to quote.</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 28,
endContainerXPath: '//b/text()',
endOffset: 31,
},
options: {
maxWordLength: 10,
},
expected: {
type: 'TextQuoteSelector',
exact: 'dia',
prefix: 'disinterme',
suffix: 'tionism',
},
},
'too long suffix': {
html: '<b>Surely counterantidisintermediationism is too long to quote.</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 14,
endContainerXPath: '//b/text()',
endOffset: 18,
},
options: {
maxWordLength: 10,
},
expected: {
type: 'TextQuoteSelector',
exact: 'anti',
prefix: 'counter',
suffix: 'disinterme',
},
},
'default should be 50': {
html:
'<b>The chromosome is ACATATTACGTTAGATATGACACCCATATAGTTATTTATAAGATGGGACAGATATTAGTTTAAAAA</b>',
range: {
startContainerXPath: '//b/text()',
startOffset: 18,
endContainerXPath: '//b/text()',
endOffset: 27,
},
options: {},
expected: {
type: 'TextQuoteSelector',
exact: 'ACATATTAC',
prefix: '',
suffix: 'GTTAGATATGACACCCATATAGTTATTTATAAGATGGGACAGATATTAGT',
},
},
};