HDFS-4329. DFSShell issues with directories with spaces in name (Cristina L. Abad via jeagles)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2.1.0-beta@1516916 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
index 88a90c6..84bb234 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
@@ -106,10 +106,12 @@
 
   /**
    * Validates the given Windows path.
-   * Throws IOException on failure.
    * @param pathString a String of the path suppliued by the user.
+   * @return true if the URI scheme was not present in the pathString but
+   * inferred; false, otherwise.
+   * @throws IOException if anything goes wrong
    */
-  private void ValidateWindowsPath(String pathString)
+  private static boolean checkIfSchemeInferredFromPath(String pathString)
   throws IOException
   {
     if (windowsNonUriAbsolutePath1.matcher(pathString).find()) {
@@ -118,23 +120,21 @@
         throw new IOException("Invalid path string " + pathString);
       }
 
-      inferredSchemeFromPath = true;
-      return;
+      return true;
     }
 
     // Is it a forward slash-separated absolute path?
     if (windowsNonUriAbsolutePath2.matcher(pathString).find()) {
-      inferredSchemeFromPath = true;
-      return;
+      return true;
     }
 
     // Does it look like a URI? If so then just leave it alone.
     if (potentialUri.matcher(pathString).find()) {
-      return;
+      return false;
     }
 
     // Looks like a relative path on Windows.
-    return;
+    return false;
   }
 
   /**
@@ -153,7 +153,7 @@
     setStat(stat);
 
     if (Path.WINDOWS) {
-      ValidateWindowsPath(pathString);
+      inferredSchemeFromPath = checkIfSchemeInferredFromPath(pathString);
     }
   }
 
@@ -302,7 +302,7 @@
     // check getPath() so scheme slashes aren't considered part of the path
     String separator = uri.getPath().endsWith(Path.SEPARATOR)
         ? "" : Path.SEPARATOR;
-    return uri + separator + basename;
+    return uriToString(uri, inferredSchemeFromPath) + separator + basename;
   }
   
   protected enum PathType { HAS_SCHEME, SCHEMELESS_ABSOLUTE, RELATIVE };
@@ -356,7 +356,7 @@
             if (globUri.getAuthority() == null) {
               matchUri = removeAuthority(matchUri);
             }
-            globMatch = matchUri.toString();
+            globMatch = uriToString(matchUri, false);
             break;
           case SCHEMELESS_ABSOLUTE: // take just the uri's path
             globMatch = matchUri.getPath();
@@ -438,6 +438,10 @@
    */
   @Override
   public String toString() {
+    return uriToString(uri, inferredSchemeFromPath);
+  }
+ 
+  private static String uriToString(URI uri, boolean inferredSchemeFromPath) {
     String scheme = uri.getScheme();
     // No interpretation of symbols. Just decode % escaped chars.
     String decodedRemainder = uri.getSchemeSpecificPart();
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/util/CommandExecutor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/util/CommandExecutor.java
index a250e24..79df284 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/util/CommandExecutor.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/util/CommandExecutor.java
@@ -24,6 +24,9 @@
 import java.io.File;
 import java.io.PrintStream;
 import java.util.StringTokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.ArrayList;
 
 /**
  *
@@ -32,23 +35,31 @@
 public abstract class CommandExecutor {  
   protected String[] getCommandAsArgs(final String cmd, final String masterKey,
 		                                       final String master) {
-    StringTokenizer tokenizer = new StringTokenizer(cmd, " ");
-    String[] args = new String[tokenizer.countTokens()];
-    
-    int i = 0;
-    while (tokenizer.hasMoreTokens()) {
-      args[i] = tokenizer.nextToken();
+    String regex = "\'([^\']*)\'|\"([^\"]*)\"|(\\S+)";
+    Matcher matcher = Pattern.compile(regex).matcher(cmd);
 
-      args[i] = args[i].replaceAll(masterKey, master);
-      args[i] = args[i].replaceAll("CLITEST_DATA", 
-        new File(CLITestHelper.TEST_CACHE_DATA_DIR).
-        toURI().toString().replace(' ', '+'));
-      args[i] = args[i].replaceAll("USERNAME", System.getProperty("user.name"));
+    ArrayList<String> args = new ArrayList<String>();
+    String arg = null;
 
-      i++;
-    }
-    
-    return args;
+    while (matcher.find()) {
+      if (matcher.group(1) != null) {
+        arg = matcher.group(1);
+      } else if (matcher.group(2) != null) {
+        arg = matcher.group(2);
+      } else {
+        arg = matcher.group(3);
+      }
+
+      arg = arg.replaceAll(masterKey, master);
+      arg = arg.replaceAll("CLITEST_DATA",
+         new File(CLITestHelper.TEST_CACHE_DATA_DIR).
+         toURI().toString().replace(' ', '+'));
+      arg = arg.replaceAll("USERNAME", System.getProperty("user.name"));
+
+      args.add(arg);
+     }
+
+    return args.toArray(new String[0]);
   }
   
   public Result executeCommand(final String cmd) throws Exception {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 4c6bd99..f379da6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -3114,6 +3114,9 @@
 
     HDFS-4998. TestUnderReplicatedBlocks fails intermittently (kihwal)
 
+    HDFS-4329. DFSShell issues with directories with spaces in name (Cristina
+    L. Abad via jeagles)
+
 Release 0.23.9 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml
index d4a7bdc..1a36be6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml
@@ -443,6 +443,153 @@
       </comparators>
     </test>
 
+    <test> <!-- TESTED -->
+      <description>ls: whitespaces in an absolute path to a file</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir -p "/a path with/whitespaces in directories"</command>
+        <command>-fs NAMENODE -touchz "/a path with/whitespaces in directories/and file names"</command>
+        <command>-fs NAMENODE -ls "/a path with/whitespaces in directories"</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r "/a path with"</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>TokenComparator</type>
+          <expected-output>Found 1 items</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/a path with/whitespaces in directories/and file names</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+
+    <test> <!-- TESTED -->
+      <description>ls: whitespaces in a relative path to a file</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir -p "a path with/whitespaces in directories"</command>
+        <command>-fs NAMENODE -touchz "a path with/whitespaces in directories/and file names"</command>
+        <command>-fs NAMENODE -ls "a path with/whitespaces in directories"</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r "a path with"</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>TokenComparator</type>
+          <expected-output>Found 1 items</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*a path with/whitespaces in directories/and file names</expected-output>
+        </comparator>
+      </comparators>
+    </test> 
+
+    <test> <!-- TESTED -->
+      <description>ls: whitespaces in a scheme-qualified path to a file</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir -p "NAMENODE/a path with/whitespaces in directories"</command>
+        <command>-fs NAMENODE -touchz "NAMENODE/a path with/whitespaces in directories/and file names"</command>
+        <command>-fs NAMENODE -ls "NAMENODE/a path with/whitespaces in directories"</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r "NAMENODE/a path with"</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>TokenComparator</type>
+          <expected-output>Found 1 items</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*NAMENODE/a path with/whitespaces in directories/and file names</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+
+    <test> <!-- TESTED -->
+      <description>ls: whitespaces in an absolute path to a file, using globbing</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir -p "/a path with/whitespaces in directories"</command>
+        <command>-fs NAMENODE -touchz "/a path with/whitespaces in directories/and file names"</command>
+        <command>-fs NAMENODE -touchz "/a path with/whitespaces in directories/and file names 2"</command>
+        <command>-fs NAMENODE -ls "/a*/w*"</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r "/a path with"</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>TokenComparator</type>
+          <expected-output>Found 2 items</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/a path with/whitespaces in directories/and file names</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/a path with/whitespaces in directories/and file names 2</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+
+    <test> <!-- TESTED -->
+      <description>ls: whitespaces in a relative path to a file, using globbing</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir -p "a path with/whitespaces in directories"</command>
+        <command>-fs NAMENODE -touchz "a path with/whitespaces in directories/and file names"</command>
+        <command>-fs NAMENODE -touchz "a path with/whitespaces in directories/and file names 2"</command>
+        <command>-fs NAMENODE -ls "a*/w*"</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r "a path with"</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>TokenComparator</type>
+          <expected-output>Found 2 items</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*a path with/whitespaces in directories/and file names</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*a path with/whitespaces in directories/and file names 2</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+
+    <test> <!-- TESTED -->
+      <description>ls: whitespaces in a scheme-qualified path to a file, using globbing</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir -p "NAMENODE/a path with/whitespaces in directories"</command>
+        <command>-fs NAMENODE -touchz "NAMENODE/a path with/whitespaces in directories/and file names"</command>
+        <command>-fs NAMENODE -touchz "NAMENODE/a path with/whitespaces in directories/and file names 2"</command>
+        <command>-fs NAMENODE -ls "NAMENODE/a*/w*"</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r "NAMENODE/a path with"</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>TokenComparator</type>
+          <expected-output>Found 2 items</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*NAMENODE/a path with/whitespaces in directories/and file names</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^-rw-r--r--( )*1( )*[a-z]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*NAMENODE/a path with/whitespaces in directories/and file names 2</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+
     <!-- Tests for ls -R -->
     <test> <!-- TESTED -->
       <description>ls: files/directories using absolute path</description>
@@ -6503,23 +6650,23 @@
       <comparators>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data15bytes-15"</expected-output>
+          <expected-output>data15bytes-15</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data30bytes-30"</expected-output>
+          <expected-output>data30bytes-30</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data60bytes-60"</expected-output>
+          <expected-output>data60bytes-60</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data120bytes-120"</expected-output>
+          <expected-output>data120bytes-120</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"datadir-0"</expected-output>
+          <expected-output>datadir-0</expected-output>
         </comparator>
       </comparators>
     </test>
@@ -6542,23 +6689,23 @@
       <comparators>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data15bytes-15"</expected-output>
+          <expected-output>data15bytes-15</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data30bytes-30"</expected-output>
+          <expected-output>data30bytes-30</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data60bytes-60"</expected-output>
+          <expected-output>data60bytes-60</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data120bytes-120"</expected-output>
+          <expected-output>data120bytes-120</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"datadir-0"</expected-output>
+          <expected-output>datadir-0</expected-output>
         </comparator>
       </comparators>
     </test>
@@ -6644,23 +6791,23 @@
       <comparators>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data15bytes-15"</expected-output>
+          <expected-output>data15bytes-15</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data30bytes-30"</expected-output>
+          <expected-output>data30bytes-30</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data60bytes-60"</expected-output>
+          <expected-output>data60bytes-60</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data120bytes-120"</expected-output>
+          <expected-output>data120bytes-120</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"datadir-0"</expected-output>
+          <expected-output>datadir-0</expected-output>
         </comparator>
       </comparators>
     </test>
@@ -6731,23 +6878,23 @@
       <comparators>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data15bytes-15"</expected-output>
+          <expected-output>data15bytes-15</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data30bytes-30"</expected-output>
+          <expected-output>data30bytes-30</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data60bytes-60"</expected-output>
+          <expected-output>data60bytes-60</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"data120bytes-120"</expected-output>
+          <expected-output>data120bytes-120</expected-output>
         </comparator>
         <comparator>
           <type>TokenComparator</type>
-          <expected-output>"datadir-0"</expected-output>
+          <expected-output>datadir-0</expected-output>
         </comparator>
       </comparators>
     </test>