PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash addendum (szita)

git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1876880 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index d2d2442..e55dcf2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -100,6 +100,8 @@
  
 BUG FIXES
 
+PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash addendum (szita)
+
 PIG-5395: Pig build is failing due to maven repo access point change (szita)
 
 PIG-5375: NullPointerException for multi-level self unions with Tez UnionOptimizer (knoguchi)
diff --git a/src/org/apache/pig/tools/parameters/ParamLoader.jj b/src/org/apache/pig/tools/parameters/ParamLoader.jj
index 36b4620..d8e1736 100644
--- a/src/org/apache/pig/tools/parameters/ParamLoader.jj
+++ b/src/org/apache/pig/tools/parameters/ParamLoader.jj
@@ -43,16 +43,6 @@
     public void setContext(PreprocessorContext pc) {
         this.pc = pc;
     }
-
-    private static String unquote(String s)
-    {
-        if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'')
-            return s.substring(1, s.length()-1);
-        else if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"')
-            return s.substring(1, s.length()-1);
-        else
-            return s;
-    }
 }
 
 PARSER_END(ParamLoader)
@@ -81,16 +71,52 @@
     |
     <OTHER: ~["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r"] (~["\n","\r"])* >
     |
-    <LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") >
-    |
     <SHELLCMD: "`" (~["`"])* "`" >
     |
     <EQUALS: "="  >
     |
     <COMMENT: "#" (~["\n"])* ("\n"|"\r")  >
-
 }
 
+MORE :
+{
+    <DOUBLE_QUOTE_START: ("\"") > : DOUBLE_QUOTE
+    |
+    <SINGLE_QUOTE_START: ("'") > : SINGLE_QUOTE
+}
+
+<DOUBLE_QUOTE> TOKEN :
+{
+    <DOUBLE_QUOTE_LITERAL: ("\"") > {
+        image.deleteCharAt(image.length()-1);
+        image.deleteCharAt(0);
+        matchedToken.image = image.toString();
+    } : DEFAULT
+} 
+
+<SINGLE_QUOTE> TOKEN :
+{
+    <SINGLE_QUOTE_LITERAL: ("'") > {
+        image.deleteCharAt(image.length()-1);
+        image.deleteCharAt(0);
+        matchedToken.image = image.toString();
+    } : DEFAULT 
+}
+
+<DOUBLE_QUOTE > MORE :
+{
+    <ESCAPED_DOUBLE_QUOTE: ("\\\"")> { image.replace(image.length()-2, image.length(), "\""); }
+}
+
+<SINGLE_QUOTE > MORE :
+{
+    <ESCAPED_SINGLE_QUOTE: ("\\'")> { image.replace(image.length()-2, image.length(), "'"); }
+}
+
+<DOUBLE_QUOTE,SINGLE_QUOTE> MORE:
+{
+    < (~[]) >
+}
 
 
 boolean Parse() throws IOException  :
@@ -117,7 +143,10 @@
                 |
                 val=<SHELLCMD>  { pc.processShellCmd(id.image , val.image);}
                 |
-                val=<LITERAL> { s = unquote(val.image); pc.processOrdLine(id.image , s); }
+                val=<SINGLE_QUOTE_LITERAL> { pc.processOrdLine(id.image , val.image); }
+                |
+                val=<DOUBLE_QUOTE_LITERAL> { pc.processOrdLine(id.image , val.image); }
+            
             )
         )
         |
diff --git a/src/org/apache/pig/tools/parameters/PigFileParser.jj b/src/org/apache/pig/tools/parameters/PigFileParser.jj
index beb322a..cb584dc 100644
--- a/src/org/apache/pig/tools/parameters/PigFileParser.jj
+++ b/src/org/apache/pig/tools/parameters/PigFileParser.jj
@@ -223,25 +223,22 @@
     }: DEFAULT
 }
 
-TOKEN : 
+< DEFAULT, IN_DECLARE > TOKEN : 
 {
-    <NEWLINE: "\n" | "\r">
+    <NEWLINE: "\n" | "\r"> : DEFAULT
     |    
     <SPACE: " " | "\t">
     |
     <WS: "\n" | "\r" | " " | "\t">
-}
-
-// comments(single line and multi-line)
-TOKEN : 
-{
-   <COMMENT: "--"(~["\r","\n"])*
+    |   
+    // comments(single line and multi-line)
+    <COMMENT: "--"(~["\r","\n"])*
           |  "#!" (~["\r","\n"])*
           | "/*" (~["*"])* "*" ("*" | (~["*","/"] (~["*"])* "*"))* "/"
-   >   
+    >   
 }
 
-TOKEN:
+< DEFAULT, IN_DECLARE >TOKEN:
 {
     <#LETTER : ["a"-"z", "A"-"Z"] >
     |
@@ -254,22 +251,31 @@
 
 TOKEN :
 {
-    <DECLARE: "%declare" >
+    <DECLARE: "%declare" > : IN_DECLARE
     |
-    <PIGDEFAULT: "%default" > 
-}
-
-
-TOKEN : 
-{
+    <PIGDEFAULT: "%default" > : IN_DECLARE
+    |
     <REGISTER: "register"> : IN_REGISTER
     |
+    <LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") >
+}
+
+< DEFAULT, IN_DECLARE > TOKEN:
+{
     <IDENTIFIER: (<SPECIALCHAR>)*<LETTER>(<DIGIT> | <LETTER> | <SPECIALCHAR>)*>
     |
-    <LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") >
+    <SHELLCMD: "`" (~["`"])* "`" > : DEFAULT
+}
+
+< IN_DECLARE > MORE :
+{
+    <DOUBLE_QUOTE_START: ("\"") > : DOUBLE_QUOTE
     |
-    <SHELLCMD: "`" (~["`"])* "`" >
-    |
+    <SINGLE_QUOTE_START: ("'") > : SINGLE_QUOTE
+}
+
+< DEFAULT, IN_DECLARE > TOKEN:
+{
     // see others() rule for use of OTHER and NOT_OTHER_CHAR
     // others() is supposed to match 'everything else'. To ensure that others()
     // don't swallow other(all the ones above) tokens, it uses two tokens OTHER and NOT_OTHER_CHAR
@@ -281,6 +287,39 @@
     <NOT_OTHER_CHAR: ["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r", "%", "/", "-", "$"] >
 }
 
+< DOUBLE_QUOTE > TOKEN :
+{
+    <DOUBLE_QUOTE_LITERAL: ("\"") > {
+        image.deleteCharAt(image.length()-1);
+        image.deleteCharAt(0);
+        matchedToken.image = image.toString();
+    } : DEFAULT
+} 
+
+< SINGLE_QUOTE > TOKEN :
+{
+    <SINGLE_QUOTE_LITERAL: ("'") > {
+        image.deleteCharAt(image.length()-1);
+        image.deleteCharAt(0);
+        matchedToken.image = image.toString();
+    } : DEFAULT 
+}
+
+< DOUBLE_QUOTE > MORE :
+{
+    <ESCAPED_DOUBLE_QUOTE: ("\\\"")> { image.replace(image.length()-2, image.length(), "\""); }
+}
+
+< SINGLE_QUOTE > MORE :
+{
+    <ESCAPED_SINGLE_QUOTE: ("\\'")> { image.replace(image.length()-2, image.length(), "'"); }
+}
+
+< DOUBLE_QUOTE, SINGLE_QUOTE > MORE:
+{
+    < (~[]) >
+}
+
 <IN_REGISTER> MORE : { " " |  "\t" | "\r" | "\n"}
 
 <IN_REGISTER> TOKEN: {
@@ -373,8 +412,9 @@
             |
             val=<SHELLCMD>  { pc.processShellCmd(id.image , val.image, overwrite); }
             |
-            val=<LITERAL> { s = unquote(val.image); pc.processOrdLine(id.image, s, overwrite); }
-
+            val=<SINGLE_QUOTE_LITERAL> { pc.processOrdLine(id.image, val.image, overwrite); }
+            |
+            val = <DOUBLE_QUOTE_LITERAL> { pc.processOrdLine(id.image, val.image, overwrite); }
         )
 }