Make match & describe handle empty quotes ‘correctly’ What constitutes correct behaviour could be debated, but it seems reasonable to match at every possible position.

commit: 1ae61df90a5b070c225abc56666d601f94128dd5 [log] [tgz]
author: Gerben <gerben@treora.com> Wed May 27 17:32:30 2020 +0200
committer: Gerben <gerben@treora.com> Thu Jun 18 16:23:43 2020 +0200
tree: 581e7367db87805b2ae67c92002b468a94f5807a
parent: 771b9a3eb1752357585b9d4c5e4e3bf217380746 [diff]
diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts
index 572e218..784881c 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts

@@ -52,7 +52,7 @@
   // Find all matches of the text in the scope.
   const stringMatches: number[] = [];
   let fromIndex = 0;
-  while (fromIndex < scopeText.length) {
+  while (fromIndex <= scopeText.length) {
     const matchIndex = scopeText.indexOf(exactText, fromIndex);
     if (matchIndex === -1) break;
     stringMatches.push(matchIndex);

diff --git a/packages/dom/src/text-quote/match.ts b/packages/dom/src/text-quote/match.ts
index f32afce..18b077e 100644
--- a/packages/dom/src/text-quote/match.ts
+++ b/packages/dom/src/text-quote/match.ts

@@ -54,7 +54,7 @@
       : 0;
 
     let fromIndex = 0;
-    while (fromIndex < scopeText.length) {
+    while (fromIndex <= scopeText.length) {
       // Find the quote with its prefix and suffix in the string.
       const patternStartIndex = scopeText.indexOf(searchPattern, fromIndex);
       if (patternStartIndex === -1) return;

diff --git a/packages/dom/test/text-quote-describe-cases.ts b/packages/dom/test/text-quote-describe-cases.ts
index cfc1435..4f34c92 100644
--- a/packages/dom/test/text-quote-describe-cases.ts
+++ b/packages/dom/test/text-quote-describe-cases.ts

@@ -83,9 +83,23 @@
       suffix: '',
     },
   },
+  'empty quote': {
+    html: '<b>To annotate or not to annotate</b>',
+    range: {
+      startContainerXPath: '//b/text()',
+      startOffset: 11,
+      endContainerXPath: '//b/text()',
+      endOffset: 11,
+    },
+    expected: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'e',
+      suffix: ' ',
+    },
+  },
 
   // TODO test for:
-  // emtpy range
   // empty scope
   // custom scope
   // element edges, across elements, etc.

diff --git a/packages/dom/test/text-quote-match-cases.ts b/packages/dom/test/text-quote-match-cases.ts
index 33d66de..0fd757a 100644
--- a/packages/dom/test/text-quote-match-cases.ts
+++ b/packages/dom/test/text-quote-match-cases.ts

@@ -264,6 +264,72 @@
       },
     ],
   },
+  'empty quote': {
+    html: '<b>lorem</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+    },
+    // A five character string contains six spots to find an empty string
+    expected: Array(6).fill(null).map((_, i) => ({
+      startContainerXPath: '//b/text()',
+      startOffset: i,
+      endContainerXPath: '//b/text()',
+      endOffset: i,
+    }))
+  },
+  'empty quote, with prefix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'dolor',
+    },
+    expected: [{
+      startContainerXPath: '//b/text()',
+      startOffset: 17,
+      endContainerXPath: '//b/text()',
+      endOffset: 17,
+    }]
+  },
+  'empty quote, with suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      suffix: 'i',
+    },
+    expected: [{
+      startContainerXPath: '//b/text()',
+      startOffset: 6,
+      endContainerXPath: '//b/text()',
+      endOffset: 6,
+    }]
+  },
+  'empty quote, with prefix and suffix': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'lorem ',
+      suffix: 'ipsum',
+    },
+    expected: [{
+      startContainerXPath: '//b/text()',
+      startOffset: 6,
+      endContainerXPath: '//b/text()',
+      endOffset: 6,
+    }]
+  },
+  'empty quote, no matches': {
+    html: '<b>lorem ipsum dolor amet yada yada</b>',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '',
+      prefix: 'X',
+    },
+    expected: [],
+  }
 };
 
 export default testCases;
commit	1ae61df90a5b070c225abc56666d601f94128dd5	[log] [tgz]
author	Gerben <gerben@treora.com>	Wed May 27 17:32:30 2020 +0200
committer	Gerben <gerben@treora.com>	Thu Jun 18 16:23:43 2020 +0200
tree	581e7367db87805b2ae67c92002b468a94f5807a
parent	771b9a3eb1752357585b9d4c5e4e3bf217380746 [diff]