ZOOKEEPER-4566: Create tool for recursive snapshot analysis

add a tool to recursively collect and display child count and data
stored in sub-trees

Author: Szabolcs Bukros <szabolcs@cloudera.com>

Reviewers: Andor Molnar <andor@apache.org>, Mate Szalay-Beko <symat@apache.org>

Closes #1902 from BukrosSzabolcs/ZOOKEEPER-4566
diff --git a/bin/zkSnapshotRecursiveSummaryToolkit.cmd b/bin/zkSnapshotRecursiveSummaryToolkit.cmd
new file mode 100755
index 0000000..ee89310
--- /dev/null
+++ b/bin/zkSnapshotRecursiveSummaryToolkit.cmd
@@ -0,0 +1,24 @@
+@echo off
+REM Licensed to the Apache Software Foundation (ASF) under one or more
+REM contributor license agreements.  See the NOTICE file distributed with
+REM this work for additional information regarding copyright ownership.
+REM The ASF licenses this file to You under the Apache License, Version 2.0
+REM (the "License"); you may not use this file except in compliance with
+REM the License.  You may obtain a copy of the License at
+REM
+REM     http://www.apache.org/licenses/LICENSE-2.0
+REM
+REM Unless required by applicable law or agreed to in writing, software
+REM distributed under the License is distributed on an "AS IS" BASIS,
+REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+REM See the License for the specific language governing permissions and
+REM limitations under the License.
+
+setlocal
+call "%~dp0zkEnv.cmd"
+
+set ZOOMAIN=org.apache.zookeeper.server.SnapshotRecursiveSummary
+call %JAVA% -cp "%CLASSPATH%" %ZOOMAIN% %*
+
+endlocal
+
diff --git a/bin/zkSnapshotRecursiveSummaryToolkit.sh b/bin/zkSnapshotRecursiveSummaryToolkit.sh
new file mode 100755
index 0000000..f765997
--- /dev/null
+++ b/bin/zkSnapshotRecursiveSummaryToolkit.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# If this scripted is run out of /usr/bin or some other system bin directory
+# it should be linked to and not copied. Things like java jar files are found
+# relative to the canonical path of this script.
+#
+
+# use POSIX interface, symlink is followed automatically
+ZOOBIN="${BASH_SOURCE-$0}"
+ZOOBIN="$(dirname "${ZOOBIN}")"
+ZOOBINDIR="$(cd "${ZOOBIN}"; pwd)"
+
+if [ -e "$ZOOBIN/../libexec/zkEnv.sh" ]; then
+  # shellcheck source=/bin/zkEnv.sh
+  . "$ZOOBINDIR"/../libexec/zkEnv.sh
+else
+  . "$ZOOBINDIR"/zkEnv.sh
+fi
+
+"$JAVA" -cp "$CLASSPATH" "$JVMFLAGS" \
+     org.apache.zookeeper.server.SnapshotRecursiveSummary "$@"
+
+
diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperTools.md b/zookeeper-docs/src/main/resources/markdown/zookeeperTools.md
index 5141766..d4abe38 100644
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperTools.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperTools.md
@@ -23,6 +23,7 @@
     * [zkCleanup.sh](#zkCleanup)
     * [zkTxnLogToolkit.sh](#zkTxnLogToolkit)
     * [zkSnapShotToolkit.sh](#zkSnapShotToolkit)
+    * [zkSnapshotRecursiveSummaryToolkit.sh](#zkSnapshotRecursiveSummaryToolkit)
     * [zkSnapshotComparer.sh](#zkSnapshotComparer)
 
 * [Benchmark](#Benchmark)
@@ -218,6 +219,43 @@
 ./zkSnapShotToolkit.sh -json /data/zkdata/version-2/snapshot.fa01000186d
 [[1,0,{"progname":"SnapshotFormatter.java","progver":"0.01","timestamp":1559788148637},[{"name":"\/","asize":0,"dsize":0,"dev":0,"ino":1001},[{"name":"zookeeper","asize":0,"dsize":0,"dev":0,"ino":1002},{"name":"config","asize":0,"dsize":0,"dev":0,"ino":1003},[{"name":"quota","asize":0,"dsize":0,"dev":0,"ino":1004},[{"name":"test","asize":0,"dsize":0,"dev":0,"ino":1005},{"name":"zookeeper_limits","asize":52,"dsize":52,"dev":0,"ino":1006},{"name":"zookeeper_stats","asize":15,"dsize":15,"dev":0,"ino":1007}]]],{"name":"test","asize":0,"dsize":0,"dev":0,"ino":1008}]]
 ```
+<a name="zkSnapshotRecursiveSummaryToolkit"></a>
+
+### zkSnapshotRecursiveSummaryToolkit.sh
+Recursively collect and display child count and data size for a selected node.
+
+    $./zkSnapshotRecursiveSummaryToolkit.sh
+    USAGE:
+    
+    SnapshotRecursiveSummary  <snapshot_file>  <starting_node>  <max_depth>
+    
+    snapshot_file:    path to the zookeeper snapshot
+    starting_node:    the path in the zookeeper tree where the traversal should begin
+    max_depth:        defines the depth where the tool still writes to the output. 0 means there is no depth limit, every non-leaf node's stats will be displayed, 1 means it will only contain the starting node's and it's children's stats, 2 ads another level and so on. This ONLY affects the level of details displayed, NOT the calculation.
+
+```bash
+# recursively collect and display child count and data for the root node and 2 levels below it
+./zkSnapshotRecursiveSummaryToolkit.sh /data/zkdata/version-2/snapshot.fa01000186d / 2
+
+/
+   children: 1250511
+   data: 1952186580
+-- /zookeeper
+--   children: 1
+--   data: 0
+-- /solr
+--   children: 1773
+--   data: 8419162
+---- /solr/configs
+----   children: 1640
+----   data: 8407643
+---- /solr/overseer
+----   children: 6
+----   data: 0
+---- /solr/live_nodes
+----   children: 3
+----   data: 0
+```
 
 <a name="zkSnapshotComparer"></a>
 
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/SnapshotRecursiveSummary.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/SnapshotRecursiveSummary.java
new file mode 100644
index 0000000..2d5f30b
--- /dev/null
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/SnapshotRecursiveSummary.java
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import org.apache.jute.BinaryInputArchive;
+import org.apache.jute.InputArchive;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.zookeeper.server.persistence.FileSnap;
+import org.apache.zookeeper.server.persistence.SnapStream;
+
+/**
+ * Recursively processes a snapshot file collecting child node count and summarizes the data size
+ * below each node.
+ * "starting_node" defines the node where the recursion starts
+ * "max_depth" defines the depth where the tool still writes to the output.
+ * 0 means there is no depth limit, every non-leaf node's stats will be displayed, 1 means it will
+ * only contain the starting node's and it's children's stats, 2 ads another level and so on.
+ * This ONLY affects the level of details displayed, NOT the calculation.
+ */
+@InterfaceAudience.Public public class SnapshotRecursiveSummary {
+
+  /**
+   * USAGE: SnapsotRecursiveSummary snapshot_file starting_node max_depth
+   *
+   */
+  public static void main(String[] args) throws Exception {
+    if (args.length != 3) {
+      System.err.println(getUsage());
+      System.exit(2);
+    }
+    int maxDepth = 0;
+    try {
+      maxDepth = Integer.parseInt(args[2]);
+    } catch (NumberFormatException e) {
+      System.err.println(getUsage());
+      System.exit(2);
+    }
+
+    new SnapshotRecursiveSummary().run(args[0], args[1], maxDepth);
+  }
+
+  public void run(String snapshotFileName, String startingNode, int maxDepth) throws IOException {
+    File snapshotFile = new File(snapshotFileName);
+    try (InputStream is = SnapStream.getInputStream(snapshotFile)) {
+      InputArchive ia = BinaryInputArchive.getArchive(is);
+
+      FileSnap fileSnap = new FileSnap(null);
+
+      DataTree dataTree = new DataTree();
+      Map<Long, Integer> sessions = new HashMap<Long, Integer>();
+
+      fileSnap.deserialize(dataTree, sessions, ia);
+
+      printZnodeDetails(dataTree, startingNode, maxDepth);
+    }
+  }
+
+  private void printZnodeDetails(DataTree dataTree, String startingNode, int maxDepth) {
+    StringBuilder builder = new StringBuilder();
+    printZnode(dataTree, startingNode, builder, 0, maxDepth);
+    System.out.println(builder);
+  }
+
+  private long[] printZnode(DataTree dataTree, String name, StringBuilder builder, int level,
+      int maxDepth) {
+    DataNode n = dataTree.getNode(name);
+    Set<String> children;
+    long dataSum = 0L;
+    synchronized (n) { // keep findbugs happy
+      if (n.data != null) {
+        dataSum += n.data.length;
+      }
+      children = n.getChildren();
+    }
+
+    long[] result = {1L, dataSum};
+    if (children.size() == 0) {
+      return result;
+    }
+    StringBuilder childBuilder = new StringBuilder();
+    for (String child : children) {
+      long[] childResult =
+          printZnode(dataTree, name + (name.equals("/") ? "" : "/") + child, childBuilder,
+              level + 1, maxDepth);
+      result[0] = result[0] + childResult[0];
+      result[1] = result[1] + childResult[1];
+    }
+
+    if (maxDepth == 0 || level <= maxDepth) {
+      String tab = String.join("", Collections.nCopies(level, "--"));
+      builder.append(tab + " " + name + "\n");
+      builder.append(tab + "   children: " + (result[0] - 1) + "\n");
+      builder.append(tab + "   data: " + result[1] + "\n");
+      builder.append(childBuilder);
+    }
+    return result;
+  }
+
+  public static String getUsage() {
+    String newLine = System.getProperty("line.separator");
+    return String.join(newLine,
+        "USAGE:",
+        newLine,
+        "SnapshotRecursiveSummary  <snapshot_file>  <starting_node>  <max_depth>",
+        newLine,
+        "snapshot_file:    path to the zookeeper snapshot",
+        "starting_node:    the path in the zookeeper tree where the traversal should begin",
+        "max_depth:        defines the depth where the tool still writes to the output. "
+            + "0 means there is no depth limit, every non-leaf node's stats will be displayed, "
+            + "1 means it will only contain the starting node's and it's children's stats, "
+            + "2 ads another level and so on. This ONLY affects the level of details displayed, "
+            + "NOT the calculation.");
+  }
+}