HBASE-24273 HBCK's "Orphan Regions on FileSystem" reports regions with referenced HFiles (#1613)

Signed-off-by: stack <stack@apache.org>
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
index b25bb15..6d41008 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
@@ -28,6 +28,7 @@
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.ScheduledChore;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.RegionInfo;
@@ -134,7 +135,7 @@
       loadRegionsFromInMemoryState();
       loadRegionsFromRSReport();
       try {
-        loadRegionsFromFS();
+        loadRegionsFromFS(scanForMergedParentRegions());
       } catch (IOException e) {
         LOG.warn("Failed to load the regions from filesystem", e);
       }
@@ -187,6 +188,31 @@
     }
   }
 
+  /**
+   * Scan hbase:meta to get set of merged parent regions, this is a very heavy scan.
+   *
+   * @return Return generated {@link HashSet}
+   */
+  private HashSet<String> scanForMergedParentRegions() throws IOException {
+    HashSet<String> mergedParentRegions = new HashSet<>();
+    // Null tablename means scan all of meta.
+    MetaTableAccessor.scanMetaForTableRegions(this.master.getConnection(),
+      r -> {
+        List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(r.rawCells());
+        if (mergeParents != null) {
+          for (RegionInfo mergeRegion : mergeParents) {
+            if (mergeRegion != null) {
+              // This region is already being merged
+              mergedParentRegions.add(mergeRegion.getEncodedName());
+            }
+          }
+        }
+        return true;
+        },
+      null);
+    return mergedParentRegions;
+  }
+
   private void loadRegionsFromInMemoryState() {
     List<RegionState> regionStates =
         master.getAssignmentManager().getRegionStates().getRegionStates();
@@ -256,7 +282,7 @@
     }
   }
 
-  private void loadRegionsFromFS() throws IOException {
+  private void loadRegionsFromFS(final HashSet<String> mergedParentRegions) throws IOException {
     Path rootDir = master.getMasterFileSystem().getRootDir();
     FileSystem fs = master.getMasterFileSystem().getFileSystem();
 
@@ -271,12 +297,12 @@
           continue;
         }
         HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
-        if (hri == null) {
+        // If it is not in in-memory database and not a merged region,
+        // report it as an orphan region.
+        if (hri == null && !mergedParentRegions.contains(encodedRegionName)) {
           orphanRegionsOnFS.put(encodedRegionName, regionDir);
           continue;
         }
-        HbckRegionInfo.HdfsEntry hdfsEntry = new HbckRegionInfo.HdfsEntry(regionDir);
-        hri.setHdfsEntry(hdfsEntry);
       }
       numRegions += regionDirs.size();
     }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java
index 07d2a73..861d310 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java
@@ -17,8 +17,8 @@
  */
 package org.apache.hadoop.hbase.client;
 
+import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ZooKeeperConnectionException;
 import org.mockito.Mockito;
 
 /**
@@ -39,12 +39,18 @@
    * probably not what you want.
    * @param conf configuration
    * @return ConnectionImplementation object for <code>conf</code>
-   * @throws ZooKeeperConnectionException
    */
   public static Connection getMockedConnection(final Configuration conf)
-      throws ZooKeeperConnectionException {
+      throws IOException {
     Connection connection = Mockito.mock(Connection.class);
     Mockito.when(connection.getConfiguration()).thenReturn(conf);
+
+    // Some test cases need Mock of getTable and getScanner 
+    Table t = Mockito.mock(Table.class);
+    Mockito.when(connection.getTable(Mockito.any())).thenReturn(t);
+    ResultScanner rs = Mockito.mock(ResultScanner.class);
+    Mockito.when(t.getScanner((Scan)Mockito.any())).thenReturn(rs);
+
     return connection;
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
index f1531a5..ee67110 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
@@ -31,6 +31,7 @@
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
@@ -147,10 +148,12 @@
   @Test
   public void testOverlap() throws Exception {
     TableName tn = TableName.valueOf(this.name.getMethodName());
-    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
+    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
+    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
     List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
     assertTrue(ris.size() > 5);
-    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
+    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
+    HbckChore hbckChore = services.getHbckChore();
     services.getCatalogJanitor().scan();
     CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
     assertTrue(report.isEmpty());
@@ -174,6 +177,9 @@
         throw new RuntimeException(e);
       }
     });
+
+    hbckChore.chore();
+    assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
   }
 
   /**