Merge pull request #73 from apache/fragment-tests

Add tests for fragment-identifier and (attempt to) improve it
diff --git a/packages/fragment-identifier/src/fragment.pegjs b/packages/fragment-identifier/src/fragment.pegjs
index 871e94a..7e4d5b5 100644
--- a/packages/fragment-identifier/src/fragment.pegjs
+++ b/packages/fragment-identifier/src/fragment.pegjs
@@ -1,15 +1,13 @@
 {
-    function collect() {
-      var ret = {};
-      var len = arguments.length;
-      for (var i=0; i<len; i++) {
-        for (var p in arguments[i]) {
-          if (arguments[i].hasOwnProperty(p)) {
-            ret[p] = arguments[i][p];
-          }
-        }
-      }
-      return ret;
+    function debug(input, range) {
+      // Prints the location of the parser.
+      // Use e.g. in an action: { debug(input, range); return text(); }
+      const [start, end] = range();
+      const underline = (end > start + 1)
+        ? ' '.repeat(start) + '\\' + '_'.repeat(end - start - 2) + '/'
+        : ' '.repeat(start) + '^';
+      console.log(input);
+      console.log(underline);
     }
 }
 
@@ -26,11 +24,7 @@
 params
     = k1: key_value_pair k2:("," key_value_pair)*
         {
-            var f = k1;
-            for( var i = 0; i < k2.length; i++ ) {
-                f = collect(f, k2[i][1])
-            }
-            return f;
+            return k2.reduce((acc, cur) => Object.assign(acc, cur[1]), k1);
         }
 
 key_value_pair
@@ -67,5 +61,15 @@
 atom
     = chars:validchar+ { return chars.join(""); }
 
+// FIXME
+// While an opening parenthesis is always valid, a closing parenthesis
+// *might* be part of the value, but could also be the delimiter that
+// closes a selector(…) or state(…). The attempt below uses a look-ahead
+// to not match ')' before a comma or before another ')', or at the end
+// of the input. However, it will fail if a last param’s value ends with
+// ')', or if a key or value contains '))'. For example:
+//   selector(type=TextQuoteSelector,exact=example%20(that%20fails))
+//   selector(type=TextQuoteSelector,exact=another))failure)
 validchar
-    = [a-zA-Z0-9\<\>\/\[\]\:%+@.\-!\$\&\;*_]
+    = [a-zA-Z0-9<>/[\]:%+@.\-!$&;*_~';(]
+    / $( ")" &[^,)] )
diff --git a/packages/fragment-identifier/src/index.js b/packages/fragment-identifier/src/index.js
index 0ba2e4d..39513ae 100644
--- a/packages/fragment-identifier/src/index.js
+++ b/packages/fragment-identifier/src/index.js
@@ -31,10 +31,10 @@
       let value = resource[key];
       if (value instanceof Object) value = value.valueOf();
       if (value instanceof Object) {
-        value = stringify(value);
-        return `${encodeURIComponent(key)}=${value}`;
+        return `${encode(key)}=${stringify(value)}`;
+      } else {
+        return `${encode(key)}=${encode(value)}`;
       }
-      return [key, value].map(encodeURIComponent).join('=');
     })
     .join(',');
 
@@ -42,3 +42,9 @@
   if (/State$/.test(resource.type)) return `state(${data})`;
   throw new TypeError('Resource must be a Selector or State');
 }
+
+function encode(string) {
+  return encodeURIComponent(string)
+    .replace(/\(/g, '%28')
+    .replace(/\)/g, '%29')
+}
diff --git a/packages/fragment-identifier/test/index.js b/packages/fragment-identifier/test/index.js
new file mode 100644
index 0000000..cfba7f9
--- /dev/null
+++ b/packages/fragment-identifier/test/index.js
@@ -0,0 +1,126 @@
+/**
+ * @license
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { parse, stringify } from '../src';
+
+// Test examples from the spec: https://www.w3.org/TR/2017/NOTE-selectors-states-20170223/#json-examples-converted-to-fragment-identifiers
+import specExamplesRaw from './spec-examples.json';
+
+// The JSON file has the full examples; pull out the parts we need.
+const specExamples = Object.fromEntries(Object.entries(specExamplesRaw).map(
+  ([name, { uri, obj: { selector, state } }]) =>
+    [name, { fragId: uri.split('#')[1], selector, state }]
+));
+
+const specialCasesToStringify = {
+  'Value with parentheses (to be percent-encoded)': {
+    fragId: 'selector(type=TextQuoteSelector,exact=example%20%28with%20parentheses%29)',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'example (with parentheses)',
+    },
+  },
+};
+
+describe('stringify', () => {
+  // Test examples in the spec, ignoring their URI encoding
+  for (const [name, example] of Object.entries(specExamples)) {
+    it(`should properly stringify (disregarding URI-encoding): '${name}'`, () => {
+      const result = stringify(example.selector || example.state);
+      assert.equal(
+        decodeURIComponent(result),
+        decodeURIComponent(example.fragId)
+      );
+    });
+  }
+
+  for (const [name, example] of Object.entries(specialCasesToStringify)) {
+    it(`should properly stringify: '${name}'`, () => {
+      const result = stringify(example.selector || example.state);
+      assert.equal(result, example.fragId);
+    });
+  }
+});
+
+const specialCasesToParse = {
+  'One closing parenthesis inside a value': {
+    fragId: 'selector(type=TextQuoteSelector,exact=(not)%20a%20problem)',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: '(not) a problem',
+    },
+  },
+
+  'Two closing parenthesis inside a value': {
+    fragId: 'selector(type=TextQuoteSelector,exact=Hey))%20this%20breaks)',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'Hey)) this breaks',
+    },
+  },
+
+  'Two closing parentheses: one of value, one of selector': {
+    fragId: 'selector(type=TextQuoteSelector,exact=example%20(that%20fails))',
+    selector: {
+      type: 'TextQuoteSelector',
+      exact: 'example (that fails)',
+    },
+  },
+
+  'Three closing parentheses: one of the value, two of nested selectors': {
+    // Example from <https://github.com/w3c/web-annotation/issues/443>
+    fragId: `
+      selector(
+        type=RangeSelector,
+        startSelector=selector(type=TextQuoteSelector,exact=(but),
+        endSelector=selector(type=TextQuoteSelector,exact=crazy))
+      )
+      `.replace(/\s/g, ''),
+    selector: {
+      type: 'RangeSelector',
+      startSelector: {
+        type: 'TextQuoteSelector',
+        exact: '(but',
+      },
+      endSelector: {
+        type: 'TextQuoteSelector',
+        exact: 'crazy)',
+      },
+    },
+  },
+};
+
+describe('parse', () => {
+  const allCasesToParse = { ...specExamples, ...specialCasesToParse };
+  for (const [name, example] of Object.entries(allCasesToParse)) {
+    it(`should properly parse: ${name}`, () => {
+      const expected = (example.selector !== undefined)
+        ? { selector: example.selector }
+        : { state: example.state };
+      const result = parse(example.fragId);
+      assert.deepEqual(result, expected);
+    });
+  }
+
+  it('should throw when given an unknown type of fragment identifier', () => {
+    assert.throws(() => parse('section4'));
+    assert.throws(() => parse('t=3,8'));
+  });
+});
diff --git a/packages/fragment-identifier/test/spec-examples.json b/packages/fragment-identifier/test/spec-examples.json
new file mode 100644
index 0000000..832e02b
--- /dev/null
+++ b/packages/fragment-identifier/test/spec-examples.json
@@ -0,0 +1,180 @@
+{
+  "Example 2 <=> 16: Fragment selector": {
+    "uri": "http://example.org/video1#selector(type=FragmentSelector,conformsTo=http://www.w3.org/TR/media-frags/,value=t%3D30%2C60)",
+    "obj": {
+      "source": "http://example.org/video1",
+      "selector": {
+        "type": "FragmentSelector",
+        "conformsTo": "http://www.w3.org/TR/media-frags/",
+        "value": "t=30,60"
+      }
+    }
+  },
+
+  "Example 3 <=> 17: CSS selector": {
+    "uri": "http://example.org/page1.html#selector(type=CssSelector,value=%23elemid%20>%20.elemclass%20+%20p)",
+    "obj": {
+      "source": "http://example.org/page1.html",
+      "selector": {
+        "type": "CssSelector",
+        "value": "#elemid > .elemclass + p"
+      }
+    }
+  },
+
+  "Example 4 <=> 18: XPath selector": {
+    "uri": "http://example.org/page1.html#selector(type=XPathSelector,value=/html/body/p[2]/table/tr[2]/td[3]/span)",
+    "obj": {
+      "source": "http://example.org/page1.html",
+      "selector": {
+        "type": "XPathSelector",
+        "value": "/html/body/p[2]/table/tr[2]/td[3]/span"
+      }
+    }
+  },
+
+  "Example 5 <=> 19: TextQuote selector": {
+    "uri": "http://example.org/page1#selector(type=TextQuoteSelector,exact=annotation,prefix=this%20is%20an%20,suffix=%20that%20has%20some)",
+    "obj": {
+      "source": "http://example.org/page1",
+      "selector": {
+        "type": "TextQuoteSelector",
+        "exact": "annotation",
+        "prefix": "this is an ",
+        "suffix": " that has some"
+      }
+    }
+  },
+
+  "Example 6 <=> 20: TextPosition selector": {
+    "uri": "http://example.org/ebook1#selector(type=TextPositionSelector,start=412,end=795)",
+    "obj": {
+      "source": "http://example.org/ebook1",
+      "selector": {
+        "type": "TextPositionSelector",
+        "start": 412,
+        "end": 795
+      }
+    }
+  },
+
+  "Example 7 <=> 21: Data position selector": {
+    "uri": "http://example.org/diskimg1#selector(type=DataPositionSelector,start=4096,end=4104)",
+    "obj": {
+      "source": "http://example.org/diskimg1",
+      "selector": {
+        "type": "DataPositionSelector",
+        "start": 4096,
+        "end": 4104
+      }
+    }
+  },
+
+  "Example 8 <=> 22: SVG selector; external SVG": {
+    "uri": "http://example.org/map1#selector(type=SvgSelector,id=http://example.org/svg1)",
+    "obj": {
+      "source": "http://example.org/map1",
+      "selector": {
+        "type": "SvgSelector",
+        "id": "http://example.org/svg1"
+      }
+    }
+  },
+
+  "Example 9 <=> 23: SVG selector; embedded SVG": {
+    "uri": "http://example.org/map1#selector(type=SvgSelector,value=<svg:svg>%20...%20</svg:svg>)",
+    "obj": {
+      "source": "http://example.org/map1",
+      "selector": {
+        "type": "SvgSelector",
+        "value": "<svg:svg> ... </svg:svg>"
+      }
+    }
+  },
+
+  "Example 10 <=> 24: Range selector": {
+    "uri": "http://example.org/page1.html#selector(type=RangeSelector,startSelector=selector(type=XPathSelector,value=//table[1]/tr[1]/td[2]),endSelector=selector(type=XPathSelector,value=//table[1]/tr[1]/td[4]))",
+    "obj": {
+      "source": "http://example.org/page1.html",
+      "selector": {
+        "type": "RangeSelector",
+        "startSelector": {
+        "type": "XPathSelector",
+        "value": "//table[1]/tr[1]/td[2]"
+        },
+        "endSelector": {
+        "type": "XPathSelector",
+        "value": "//table[1]/tr[1]/td[4]"
+        }
+      }
+    }
+  },
+
+  "Example 11 <=> 25: Selector refinement": {
+    "uri": "http://example.org/page1#selector(type=FragmentSelector,value=para5,refinedBy=selector(type=TextQuoteSelector,exact=Selected%20Text,prefix=text%20before%20the%20,suffix=%20and%20text%20after%20it))",
+    "obj": {
+      "source": "http://example.org/page1",
+      "selector": {
+        "type": "FragmentSelector",
+        "value": "para5",
+        "refinedBy": {
+        "type": "TextQuoteSelector",
+        "exact": "Selected Text",
+        "prefix": "text before the ",
+        "suffix": " and text after it"
+        }
+      }
+    }
+  },
+
+  "Example 13 <=> 26: Time state": {
+    "uri": "http://example.org/page1#state(type=TimeState,cached=http://archive.example.org/copy1,sourceDate=2015-07-20T13:30:00Z)",
+    "obj": {
+      "source": "http://example.org/page1",
+      "state": {
+        "type": "TimeState",
+        "cached": "http://archive.example.org/copy1",
+        "sourceDate": "2015-07-20T13:30:00Z"
+      }
+    }
+  },
+
+  "Example 14 <=> 27: HTTP request state": {
+    "uri": "http://example.org/resource1#state(type=HttpRequestState,value=Accept:%20application/pdf)",
+    "obj": {
+      "source": "http://example.org/resource1",
+      "state": {
+        "type": "HttpRequestState",
+        "value": "Accept: application/pdf"
+      }
+    }
+  },
+
+  "Example 15 <=> 28: Refinement of states": {
+    "uri": "http://example.org/ebook1#state(type=TimeState,sourceDate=2016-02-01T12:05:23Z,refinedBy=state(type=HttpRequestState,value=Accept:%20application/epub+zip))",
+    "obj": {
+      "source": "http://example.org/ebook1",
+      "state": {
+        "type": "TimeState",
+        "sourceDate": "2016-02-01T12:05:23Z",
+        "refinedBy": {
+        "type": "HttpRequestState",
+        "value": "Accept: application/epub+zip"
+        }
+      }
+    }
+  },
+
+  "Example 29: Serializing IRI to URL (and vice versa)": {
+    "uri": "http://jp.example.org/page1#selector(type=TextQuoteSelector,exact=%E3%83%9A%E3%83%B3%E3%82%92,prefix=%E7%A7%81%E3%81%AF%E3%80%81,suffix=%E6%8C%81%E3%81%A3%E3%81%A6%E3%81%84%E3%81%BE%E3%81%99)",
+    "obj": {
+      "source": "http://jp.example.org/page1",
+      "selector": {
+        "type": "TextQuoteSelector",
+        "exact": "ペンを",
+        "prefix": "私は、",
+        "suffix": "持っています"
+      }
+    }
+  }
+}