MAPREDUCE-1670. RAID policies should not scan their own destination path.
(Ramkumar Vadali via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/hadoop/mapreduce/trunk@990684 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 99bb30c..3b161c3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -259,6 +259,9 @@
 
     MAPREDUCE-2022. Fixes compilation errors in TestSubmitJob. (amareshwari)
 
+    MAPREDUCE-1670. RAID policies should not scan their own destination path.
+    (Ramkumar Vadali via dhruba)
+
 Release 0.21.0 - Unreleased
 
   INCOMPATIBLE CHANGES
diff --git a/src/contrib/raid/src/java/org/apache/hadoop/raid/RaidNode.java b/src/contrib/raid/src/java/org/apache/hadoop/raid/RaidNode.java
index 84bda77..f390305 100644
--- a/src/contrib/raid/src/java/org/apache/hadoop/raid/RaidNode.java
+++ b/src/contrib/raid/src/java/org/apache/hadoop/raid/RaidNode.java
@@ -561,7 +561,7 @@
  /**
   * Returns a list of pathnames that needs raiding.
   */
-  private List<FileStatus> selectFiles(Configuration conf, Path p, String destPrefix,
+  List<FileStatus> selectFiles(Configuration conf, Path p, String destPrefix,
                                        long modTimePeriod, short srcReplication, long now) throws IOException {
 
     List<FileStatus> returnSet = new LinkedList<FileStatus>();
@@ -571,11 +571,26 @@
     FileSystem fs = FileSystem.get(destp.toUri(), conf);
     destp = destp.makeQualified(fs);
 
+    // Expand destination prefix path.
+    String destpstr = destp.toString();
+    if (!destpstr.endsWith(Path.SEPARATOR)) {
+      destpstr += Path.SEPARATOR;
+    }
+
     fs = p.getFileSystem(conf);
     FileStatus[] gpaths = fs.globStatus(p);
-    if (gpaths != null){
+    if (gpaths != null) {
       for (FileStatus onepath: gpaths) {
-        recurse(fs, conf, destp, onepath, returnSet, modTimePeriod, srcReplication, now);
+        String pathstr = onepath.getPath().makeQualified(fs).toString();
+        if (!pathstr.endsWith(Path.SEPARATOR)) {
+          pathstr += Path.SEPARATOR;
+        }
+        if (pathstr.startsWith(destpstr) || destpstr.startsWith(pathstr)) {
+          LOG.info("Skipping source " + pathstr +
+                   " because it conflicts with raid directory " + destpstr);
+        } else {
+         recurse(fs, conf, destp, onepath, returnSet, modTimePeriod, srcReplication, now);
+        }
       }
     }
     return returnSet;
diff --git a/src/contrib/raid/src/test/org/apache/hadoop/raid/TestRaidNode.java b/src/contrib/raid/src/test/org/apache/hadoop/raid/TestRaidNode.java
index c845241..6613da0 100644
--- a/src/contrib/raid/src/test/org/apache/hadoop/raid/TestRaidNode.java
+++ b/src/contrib/raid/src/test/org/apache/hadoop/raid/TestRaidNode.java
@@ -450,6 +450,59 @@
     LOG.info("Test testDistRaid completed.");
   }
   
+  /**
+   * Test the case where the source and destination paths conflict.
+   * @throws Exception
+   */
+  public void testConflictingPaths() throws Exception {
+    LOG.info("Test testConflictingPaths started");
+    long targetReplication = 2;
+    long metaReplication   = 2;
+    long stripeLength      = 3;
+    short srcReplication = 1;
+    long modTimePeriod = 0;
+    try {
+      createClusters(false);
+      mySetup("/user/d/raidtest", srcReplication, targetReplication,
+          metaReplication, stripeLength);
+      // We dont need this to run, just need the object.
+      RaidNode cnode = RaidNode.createRaidNode(null, conf);
+      cnode.stop();
+      cnode.join();
+
+      createOldFile(fileSys, new Path("/user/d/raidtest/f1"), 2, 7, 8192L);
+      LOG.info("Test testConflictingPaths created test files");
+
+      long now = System.currentTimeMillis();
+
+      // Test the regular case.
+      LOG.info("Test testConflictingPaths testing the regular case");
+      List<FileStatus> selected = cnode.selectFiles(conf,
+          new Path("/user/d/raidtest*"), "/raid",
+          modTimePeriod, srcReplication, now);
+      assertTrue(selected.size() > 0);
+
+      // Test the conflicting case: src under dest.
+      LOG.info("Test testConflictingPaths testing src under dest");
+      selected = cnode.selectFiles(conf,
+          new Path("/user/d/raidtest*"), "/user/d",
+          modTimePeriod, srcReplication, now);
+      assertEquals(0, selected.size());
+
+      // Test the conflicting case: dest under src.
+      LOG.info("Test testConflictingPaths testing dest under src");
+      selected = cnode.selectFiles(conf,
+          new Path("/user/d*"), "/user/d/raidtest",
+          modTimePeriod, srcReplication, now);
+      assertEquals(0, selected.size());
+
+      LOG.info("Test testConflictingPaths succeeded.");
+    } finally {
+      stopClusters();
+    }
+    LOG.info("Test testConflictingPaths completed.");
+  }
+
   //
   // simulate a corruption at specified offset and verify that eveyrthing is good
   //