Fix for bug XMLBEANS-412. Contributed by Jerry Sy. All checkin tests pass. git-svn-id: https://svn.apache.org/repos/asf/xmlbeans/trunk@1330466 13f79535-47bb-0310-9956-ffa450edef68

commit: b89130603bf0c27c57ba143bfa7340ccd7865b67 [log] [tgz]
author: Cezar Cristian Andrei <cezar@apache.org> Wed Apr 25 18:24:37 2012 +0000
committer: Cezar Cristian Andrei <cezar@apache.org> Wed Apr 25 18:24:37 2012 +0000
tree: 4dc50562a88432e81029bbb7c96b68a05d8b6567
parent: ded094b8937b3eee9a8cc5045eb2e2f451cd387b [diff]
diff --git a/src/typeimpl/org/apache/xmlbeans/impl/regex/ParserForXMLSchema.java b/src/typeimpl/org/apache/xmlbeans/impl/regex/ParserForXMLSchema.java
index c040df2..ff41545 100644
--- a/src/typeimpl/org/apache/xmlbeans/impl/regex/ParserForXMLSchema.java
+++ b/src/typeimpl/org/apache/xmlbeans/impl/regex/ParserForXMLSchema.java

@@ -19,7 +19,7 @@
 import java.util.Locale;
 
 /**
- * A regular expression parser for the XML Shema.
+ * A regular expression parser for the XML Schema.
  *
  * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;
  */
@@ -159,7 +159,7 @@
      * from-to-range    ::= cc-normal-c '-' cc-normal-c
      *
      * @param useNrage Ignored.
-     * @return This returns no NrageToken.
+     * @return This returns no NrangeToken.
      */
     protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
         this.setContext(S_INBRACKETS);
@@ -234,9 +234,9 @@
                 if (type == T_CHAR) {
                     if (c == '[')  throw this.ex("parser.cc.6", this.offset-2);
                     if (c == ']')  throw this.ex("parser.cc.7", this.offset-2);
-                    // (radup) XMLSchema 1.0 allows the '-' as the first character of a range,
-                    // but it looks like XMLSchema 1.1 will prohibit it - track this
-                    if (c == '-' && !firstloop)  throw this.ex("parser.cc.8", this.offset-2);
+                    //https://issues.apache.org/jira/browse/XMLBEANS-412
+                    //unescaped single char '-' is a valid char after '[' and before ']' positive range only
+                    if (c== '-' && !firstloop && this.chardata!=']') throw this.ex("parser.cc.8", this.offset-2);
                 }
                 if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
                     tok.addRange(c, c);
@@ -245,19 +245,27 @@
                     this.next(); // Skips '-'
                     if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);
                                                 // c '-' ']' -> '-' is a single-range.
-                    if ((type == T_CHAR && this.chardata == ']')
-                        || type == T_XMLSCHEMA_CC_SUBTRACTION) {
+                    if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
                         throw this.ex("parser.cc.8", this.offset-1);
+                    } else if (type == T_CHAR && this.chardata == ']') {
+                        //'-' occurs after a single-range but before ']'
+                        tok.addRange(c,c);
+                        tok.addRange('-','-');
                     } else {
                         int rangeend = this.chardata;
                         if (type == T_CHAR) {
                             if (rangeend == '[')  throw this.ex("parser.cc.6", this.offset-1);
                             if (rangeend == ']')  throw this.ex("parser.cc.7", this.offset-1);
-                            if (rangeend == '-')  throw this.ex("parser.cc.8", this.offset-2);
+                            if (rangeend == '-')  {
+                                this.next();
+                                if (this.chardata!=']')
+                                    throw this.ex("parser.cc.8", this.offset-2);
+                            }
                         }
                         else if (type == T_BACKSOLIDUS)
                             rangeend = this.decodeEscaped();
-                        this.next();
+                        if (rangeend!='-' || this.chardata!=']')
+                            this.next();
 
                         if (c > rangeend)  throw this.ex("parser.ope.3", this.offset-1);
                         tok.addRange(c, rangeend);

diff --git a/test/src/misc/checkin/XMLBEANS412Test.java b/test/src/misc/checkin/XMLBEANS412Test.java
new file mode 100644
index 0000000..096b255
--- /dev/null
+++ b/test/src/misc/checkin/XMLBEANS412Test.java

@@ -0,0 +1,99 @@
+package misc.checkin;

+

+import junit.framework.Assert;

+import junit.framework.Test;

+import junit.framework.TestCase;

+import junit.framework.TestSuite;

+import org.apache.xmlbeans.impl.regex.ParseException;

+import org.apache.xmlbeans.impl.regex.RegularExpression;

+

+import java.util.StringTokenizer;

+

+public class XMLBEANS412Test extends TestCase

+{

+    static String PassedPosCharGroups = "-,\\-,--,\\--,---,\\---,--\\-,\\--\\-,-\\--,\\-\\--,-a,\\-a,a-,"+

+            "a\\-,a-b,a\\-b,a\\--,-a-z,\\-a-z,a-z-,a-z\\-,a-z\\-0-9,a\\-z-,a\\-z\\-,a\\-z\\-0-9,"+

+            "-0-9,0-9-,0-9aaa,0-9a-,a-z\\--/,A-F0-9.+-,-A-F0-9.+,A-F0-9.+\\-,\\-A-F0-9.+";

+

+    static String FailedPosCharGroups =  "[a--],[a-z-0-9],[a\\-z-0-9],[0-9--],[0-9a--],[0-9-a],[0-9-a-z]";

+    static String MiscPassedPatterns = "([\\.a-zA-Z0-9_-])+@([a-zA-Z0-9_-])+(([a-zA-Z0-9_-])*\\.([a-zA-Z0-9_-])+)+";

+

+    public XMLBEANS412Test(String name)

+    {

+        super(name);

+    }

+

+    public static Test suite()

+    {

+        return new TestSuite(XMLBEANS412Test.class);

+    }

+

+    public void testPassedPosCharGroupPatterns()

+    {

+        StringTokenizer tok = new StringTokenizer(PassedPosCharGroups,",");

+        while (tok.hasMoreElements()) {

+            String pattern = "[" + tok.nextToken() + "]";

+            try {

+                new RegularExpression(pattern, "X");

+            } catch (ParseException e) {

+                Assert.fail("Pattern " + pattern + " failed due to " + e.getMessage());

+            }

+        }

+    }

+

+    public void testNegatedPassedPosCharGroupPatterns()

+    {

+        StringTokenizer tok = new StringTokenizer(PassedPosCharGroups,",");

+        while (tok.hasMoreElements()) {

+            String pattern = "[^" + tok.nextToken() + "]";

+            try {

+                new RegularExpression(pattern, "X");

+            } catch (ParseException e) {

+                Assert.fail("Pattern " + pattern + " failed due to " + e.getMessage());

+            }

+        }

+

+

+    }

+

+    public void testFailedPosCharGroupPatterns()

+    {

+        StringTokenizer tok = new StringTokenizer(FailedPosCharGroups,",");

+        while (tok.hasMoreElements()) {

+            String pattern = "[" + tok.nextToken() + "]";

+            try {

+                new RegularExpression(pattern,"X");

+            } catch (ParseException e) {

+                continue;

+            }

+            Assert.fail("Pattern " + pattern + " did not fail.");

+        }

+    }

+

+    public void testNegatedFailedPosCharGroupPatterns()

+    {

+        StringTokenizer tok = new StringTokenizer(FailedPosCharGroups,",");

+        while (tok.hasMoreElements()) {

+            String pattern = "[^" + tok.nextToken() + "]";

+            try {

+                new RegularExpression(pattern,"X");

+            } catch (ParseException e) {

+                continue;

+            }

+            Assert.fail("Pattern " + pattern + " did not fail.");

+        }

+    }

+

+    public void testMiscPassedPatterns() {

+        StringTokenizer tok = new StringTokenizer(MiscPassedPatterns,",");

+        while (tok.hasMoreElements()) {

+            String pattern = tok.nextToken();

+            try {

+                new RegularExpression(pattern, "X");

+            } catch (ParseException e) {

+                Assert.fail("Pattern " + pattern + " failed due to " + e.getMessage());

+            }

+        }

+

+    }

+}
commit	b89130603bf0c27c57ba143bfa7340ccd7865b67	[log] [tgz]
author	Cezar Cristian Andrei <cezar@apache.org>	Wed Apr 25 18:24:37 2012 +0000
committer	Cezar Cristian Andrei <cezar@apache.org>	Wed Apr 25 18:24:37 2012 +0000
tree	4dc50562a88432e81029bbb7c96b68a05d8b6567
parent	ded094b8937b3eee9a8cc5045eb2e2f451cd387b [diff]