PIG-4889: Replacing backslash fails as lexical error (knoguchi)


git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1743583 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index b266a68..1694098 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -123,6 +123,8 @@
 
 BUG FIXES
 
+PIG-4889: Replacing backslash fails as lexical error (knoguchi)
+
 PIG-4880: Overlapping of parameter substitution names inside&outside a macro fails with NPE (knoguchi)
 
 PIG-4881: TestBuiltin.testUniqueID failing on hadoop-1.x (knoguchi)
diff --git a/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj b/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
index af882ea..fbb92f4 100644
--- a/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
+++ b/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
@@ -23,6 +23,7 @@
   STATIC = false;
   // Case is ignored in keywords
   IGNORE_CASE = true;
+  // DEBUG_PARSER = true;
   JAVA_UNICODE_ESCAPE = true;
 }
 
@@ -292,7 +293,8 @@
 
 <IN_STRING> MORE :
 {
-	<"\\'">
+	<"\\\\">
+|	<"\\'">
 |	<"'"> { SwitchTo(prevState);}
 |	<("\n" | "\r" | "\r\n")> {secondary_prompt();}
 |	<(~[])>
diff --git a/test/org/apache/pig/test/TestPigScriptParser.java b/test/org/apache/pig/test/TestPigScriptParser.java
index d7f8f32..7824ff1 100644
--- a/test/org/apache/pig/test/TestPigScriptParser.java
+++ b/test/org/apache/pig/test/TestPigScriptParser.java
@@ -30,6 +30,9 @@
 
 import org.apache.pig.ExecType;
 import org.apache.pig.PigServer;
+import org.apache.pig.builtin.mock.Storage;
+import org.apache.pig.builtin.mock.Storage.Data;
+import static org.apache.pig.builtin.mock.Storage.tuple;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -156,6 +159,79 @@
         }
     }
 
+    @Test
+    public void testBackSlashOnly() throws Exception {
+        PigServer pig = new PigServer(Util.getLocalTestMode());
+        Data data = Storage.resetData(pig);
+        data.set("input", tuple("abc"), tuple("\\bcd"), tuple("'cde"), tuple("def\\\\"));
+
+        String query =
+            "A = load 'input' USING mock.Storage() as (a0:chararray);\n"
+            // java String is escaping "\" so the following line is equivalent of
+            // B = FILTER A by STARTSWITH(a0,'\\'); in the pig script
+            + "B = FILTER A by STARTSWITH(a0,'\\\\');\n"
+            + "store B into 'out' using mock.Storage;" ;
+
+        Util.registerMultiLineQuery(pig, query);
+        List<Tuple> list = data.get("out");
+
+        assertEquals("There should be only one match", 1, list.size());
+        Tuple t = list.get(0);
+        assertEquals("result should have only one field", 1, t.size() );
+        assertEquals("\\bcd",(String) t.get(0));
+    }
+
+
+    @Test
+    public void testBackSlashSingleQuote() throws Exception {
+        PigServer pig = new PigServer(Util.getLocalTestMode());
+        Data data = Storage.resetData(pig);
+        data.set("input", tuple("abc"), tuple("\\bcd"), tuple("'cde"), tuple("def\\\\"));
+
+        String query =
+            "A = load 'input' USING mock.Storage() as (a0:chararray);\n"
+            // java String is escaping "\" so the following line is equivalent of
+            // B = FILTER A by STARTSWITH(a0,'\''); in the pig script
+            + "B = FILTER A by STARTSWITH(a0,'\\'');\n"
+            + "store B into 'out' using mock.Storage;" ;
+
+        Util.registerMultiLineQuery(pig, query);
+        List<Tuple> list = data.get("out");
+
+        assertEquals("There should be only one match", 1, list.size());
+        Tuple t = list.get(0);
+        assertEquals("result should have only one field", 1, t.size() );
+        assertEquals("'cde",(String) t.get(0));
+    }
+
+    @Test
+    public void testBackSlashReplace() throws Exception {
+        PigServer pig = new PigServer(Util.getLocalTestMode());
+        Data data = Storage.resetData(pig);
+        //After java escaping, these tuples have
+        //'abc', '\bcd' and 'def\\' respectively
+        data.set("input", tuple("abc"), tuple("\\bcd"), tuple("def\\\\"));
+
+        String query =
+            "A = load 'input' USING mock.Storage() as (a0:chararray);\n"
+            // java String is escaping "\" so the following line is equivalent of
+            //"B = FOREACH A GENERATE REPLACE(a0,'\\\\','+');\n"
+            + "B = FOREACH A GENERATE REPLACE(a0,'\\\\\\\\','+');\n"
+            + "store B into 'out' using mock.Storage;" ;
+
+            // REPLACE(a0,'\\\\','+')
+            // --> Pig parser unescape and pass "\\" to REPLACE UDF.
+            // --> REPLACE UDF calls, Pattern.compile("\\"); which
+            // matches "\"
+
+        Util.registerMultiLineQuery(pig, query);
+        List<Tuple> list = data.get("out");
+
+        List<Tuple> expectedRes =
+                Util.getTuplesFromConstantTupleStrings(
+                        new String[] {"('abc')","('+bcd')", "('def++')"});
+        Util.checkQueryOutputsAfterSort(list, expectedRes);
+    }
     private void checkParsedConstContent(PigServer pigServer,
                                          PigContext pigContext,
                                          String query,