HBASE-23172 HBase Canary region success count metrics reflect column family successes, not region successes
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java
index de40b81..6302df5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java
@@ -273,7 +273,7 @@
   public static class RegionStdOutSink extends StdOutSink {
     private Map<String, LongAdder> perTableReadLatency = new HashMap<>();
     private LongAdder writeLatency = new LongAdder();
-    private final Map<String, RegionTaskResult> regionMap = new ConcurrentHashMap<>();
+    private final Map<String, List<RegionTaskResult>> regionMap = new ConcurrentHashMap<>();
 
     public void publishReadFailure(ServerName serverName, RegionInfo region, Exception e) {
       incReadFailureCount();
@@ -289,10 +289,13 @@
 
     public void publishReadTiming(ServerName serverName, RegionInfo region,
         ColumnFamilyDescriptor column, long msTime) {
+      RegionTaskResult rtr = new RegionTaskResult(region, region.getTable(), serverName, column);
+      rtr.setReadSuccess();
+      rtr.setReadLatency(msTime);
+      List<RegionTaskResult> rtrs = regionMap.get(region.getRegionNameAsString());
+      rtrs.add(rtr);
+      // Note that read success count will be equal to total column family read successes.
       incReadSuccessCount();
-      RegionTaskResult res = this.regionMap.get(region.getRegionNameAsString());
-      res.setReadSuccess();
-      res.setReadLatency(msTime);
       LOG.info("Read from {} on {} {} in {}ms", region.getRegionNameAsString(), serverName,
           column.getNameAsString(), msTime);
     }
@@ -311,10 +314,13 @@
 
     public void publishWriteTiming(ServerName serverName, RegionInfo region,
         ColumnFamilyDescriptor column, long msTime) {
+      RegionTaskResult rtr = new RegionTaskResult(region, region.getTable(), serverName, column);
+      rtr.setWriteSuccess();
+      rtr.setWriteLatency(msTime);
+      List<RegionTaskResult> rtrs = regionMap.get(region.getRegionNameAsString());
+      rtrs.add(rtr);
+      // Note that write success count will be equal to total column family write successes.
       incWriteSuccessCount();
-      RegionTaskResult res = this.regionMap.get(region.getRegionNameAsString());
-      res.setWriteSuccess();
-      res.setWriteLatency(msTime);
       LOG.info("Write to {} on {} {} in {}ms",
         region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime);
     }
@@ -337,7 +343,7 @@
       return this.writeLatency;
     }
 
-    public Map<String, RegionTaskResult> getRegionMap() {
+    public Map<String, List<RegionTaskResult>> getRegionMap() {
       return this.regionMap;
     }
 
@@ -1046,15 +1052,18 @@
     private RegionInfo region;
     private TableName tableName;
     private ServerName serverName;
+    private ColumnFamilyDescriptor column;
     private AtomicLong readLatency = null;
     private AtomicLong writeLatency = null;
     private boolean readSuccess = false;
     private boolean writeSuccess = false;
 
-    public RegionTaskResult(RegionInfo region, TableName tableName, ServerName serverName) {
+    public RegionTaskResult(RegionInfo region, TableName tableName, ServerName serverName,
+        ColumnFamilyDescriptor column) {
       this.region = region;
       this.tableName = tableName;
       this.serverName = serverName;
+      this.column = column;
     }
 
     public RegionInfo getRegionInfo() {
@@ -1081,6 +1090,14 @@
       return this.serverName.getServerName();
     }
 
+    public ColumnFamilyDescriptor getColumnFamily() {
+      return this.column;
+    }
+
+    public String getColumnFamilyNameAsString() {
+      return this.column.getNameAsString();
+    }
+
     public long getReadLatency() {
       if (this.readLatency == null) {
         return -1;
@@ -1566,9 +1583,8 @@
           RegionInfo region = location.getRegion();
           tasks.add(new RegionTask(admin.getConnection(), region, rs, (RegionStdOutSink)sink,
               taskType, rawScanEnabled, rwLatency));
-          Map<String, RegionTaskResult> regionMap = ((RegionStdOutSink) sink).getRegionMap();
-          regionMap.put(region.getRegionNameAsString(), new RegionTaskResult(region,
-              region.getTable(), rs));
+          Map<String, List<RegionTaskResult>> regionMap = ((RegionStdOutSink) sink).getRegionMap();
+          regionMap.put(region.getRegionNameAsString(), new ArrayList<RegionTaskResult>());
         }
         return executor.invokeAll(tasks);
       }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
index dbbdf61..2c6c4c2 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
@@ -32,6 +32,7 @@
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
@@ -146,6 +147,10 @@
     String[] args = { "-writeSniffing", "-t", "10000", "testCanaryRegionTaskResult" };
     assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
 
+    assertTrue("canary should expect to scan at least 1 region",
+        sink.getTotalExpectedRegions() > 0);
+    assertTrue("there should be no read failures", sink.getReadFailureCount() == 0);
+    assertTrue("there should be no write failures", sink.getWriteFailureCount() == 0);
     assertTrue("verify read success count > 0", sink.getReadSuccessCount() > 0);
     assertTrue("verify write success count > 0", sink.getWriteSuccessCount() > 0);
     verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class),
@@ -153,27 +158,29 @@
     verify(sink, atLeastOnce()).publishWriteTiming(isA(ServerName.class), isA(RegionInfo.class),
         isA(ColumnFamilyDescriptor.class), anyLong());
 
-    assertTrue("canary should expect to scan at least 1 region",
-        sink.getTotalExpectedRegions() > 0);
-    Map<String, CanaryTool.RegionTaskResult> regionMap = sink.getRegionMap();
+    assertEquals("canary region success count should equal total expected regions",
+        sink.getReadSuccessCount() + sink.getWriteSuccessCount(), sink.getTotalExpectedRegions());
+    Map<String, List<CanaryTool.RegionTaskResult>> regionMap = sink.getRegionMap();
     assertFalse("verify region map has size > 0", regionMap.isEmpty());
 
     for (String regionName : regionMap.keySet()) {
-      CanaryTool.RegionTaskResult res = regionMap.get(regionName);
-      assertNotNull("verify each expected region has a RegionTaskResult object in the map", res);
-      assertNotNull("verify getRegionNameAsString()", regionName);
-      assertNotNull("verify getRegionInfo()", res.getRegionInfo());
-      assertNotNull("verify getTableName()", res.getTableName());
-      assertNotNull("verify getTableNameAsString()", res.getTableNameAsString());
-      assertNotNull("verify getServerName()", res.getServerName());
-      assertNotNull("verify getServerNameAsString()", res.getServerNameAsString());
+      for (CanaryTool.RegionTaskResult res: regionMap.get(regionName)) {
+        assertNotNull("verify getRegionNameAsString()", regionName);
+        assertNotNull("verify getRegionInfo()", res.getRegionInfo());
+        assertNotNull("verify getTableName()", res.getTableName());
+        assertNotNull("verify getTableNameAsString()", res.getTableNameAsString());
+        assertNotNull("verify getServerName()", res.getServerName());
+        assertNotNull("verify getServerNameAsString()", res.getServerNameAsString());
+        assertNotNull("verify getColumnFamily()", res.getColumnFamily());
+        assertNotNull("verify getColumnFamilyNameAsString()", res.getColumnFamilyNameAsString());
 
-      if (regionName.contains(CanaryTool.DEFAULT_WRITE_TABLE_NAME.getNameAsString())) {
-        assertTrue("write to region " + regionName + " succeeded", res.isWriteSuccess());
-        assertTrue("write took some time", res.getWriteLatency() > -1);
-      } else {
-        assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess());
-        assertTrue("read took some time", res.getReadLatency() > -1);
+        if (regionName.contains(CanaryTool.DEFAULT_WRITE_TABLE_NAME.getNameAsString())) {
+          assertTrue("write to region " + regionName + " succeeded", res.isWriteSuccess());
+          assertTrue("write took some time", res.getWriteLatency() > -1);
+        } else {
+          assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess());
+          assertTrue("read took some time", res.getReadLatency() > -1);
+        }
       }
     }
   }