upgrade to Apache Bookkeeper 4.7.3 to fix DistributedLog based stateful storage (#3219)

* upgrade to bookkeeper 4.7.3

* update kubernetes deployment manifest files
diff --git a/WORKSPACE b/WORKSPACE
index 6e4f4b0..e56e40f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -27,7 +27,7 @@
 powermock_version = "1.6.2"
 reef_version = "0.14.0"
 slf4j_version = "1.7.7"
-distributedlog_version = "0.5.0"
+distributedlog_version = "4.7.3"
 http_client_version = "4.5.2"
 
 # heron API server
@@ -338,7 +338,7 @@
 
 maven_jar(
   name = "io_netty_netty_all",
-  artifact = "io.netty:netty-all:4.0.21.Final"
+  artifact = "io.netty:netty-all:4.1.22.Final"
 )
 
 maven_jar(
@@ -738,7 +738,7 @@
 # bookkeeper & distributedlog dependencies
 maven_jar(
   name = "org_apache_distributedlog_core",
-  artifact = "org.apache.distributedlog:distributedlog-core:jar:shaded:" + distributedlog_version
+  artifact = "org.apache.distributedlog:distributedlog-core-shaded:" + distributedlog_version
 )
 # end bookkeeper & distributedlog dependencies
 
diff --git a/deploy/kubernetes/general/apiserver.yaml b/deploy/kubernetes/general/apiserver.yaml
index 9fc080f..b11f7ac 100644
--- a/deploy/kubernetes/general/apiserver.yaml
+++ b/deploy/kubernetes/general/apiserver.yaml
@@ -77,6 +77,10 @@
           operator: "Equal"
           effect: "NoExecute"
           tolerationSeconds: 10
+      initContainers:
+        - name: init-heron-apiserver
+          image: apache/bookkeeper:4.7.3
+          command: ['sh', '-c', '/opt/bookkeeper/bin/dlog admin bind -l /ledgers -s zookeeper:2181 -c distributedlog://zookeeper:2181/heron']
       containers:
         - name: heron-apiserver
           image: heron/heron:latest
@@ -90,7 +94,9 @@
               -D heron.kubernetes.scheduler.uri=http://localhost:8001
               -D heron.executor.docker.image=heron/heron:latest
               -D heron.class.uploader=org.apache.heron.uploader.dlog.DLUploader
-              -D heron.uploader.dlog.topologies.namespace.uri=distributedlog://zookeeper:2181/distributedlog
+              -D heron.uploader.dlog.topologies.namespace.uri=distributedlog://zookeeper:2181/heron
+              -D heron.statefulstorage.classname=org.apache.heron.statefulstorage.dlog.DlogStorage
+              -D heron.statefulstorage.dlog.namespace.uri=distributedlog://zookeeper:2181/heron
         - name: kubectl-proxy
           image: heron/kubectl:latest
           command: ["sh", "-c"]
diff --git a/deploy/kubernetes/general/bookkeeper.statefulset.yaml b/deploy/kubernetes/general/bookkeeper.statefulset.yaml
index 1476e91..89aaf2d 100644
--- a/deploy/kubernetes/general/bookkeeper.statefulset.yaml
+++ b/deploy/kubernetes/general/bookkeeper.statefulset.yaml
@@ -30,8 +30,6 @@
     BK_ledgerDirectories: "/bookkeeper/data/ledgers"
     BK_indexDirectories: "/bookkeeper/data/ledgers"
     BK_zkServers: zookeeper
-    # the default manager is flat, which is not good for supporting large number of ledgers
-    BK_ledgerManagerType: "hierarchical"
     # TODO: Issue 458: https://github.com/apache/bookkeeper/issues/458
     #BK_statsProviderClass: org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider
     # use hostname as bookie id for StatefulSets deployment
@@ -75,7 +73,7 @@
       terminationGracePeriodSeconds: 0
       containers:
         - name: bookie
-          image: apachedistributedlog/distributedlog:0.5.0
+          image: apache/bookkeeper:4.7.3
           resources:
             requests:
               memory: "3Gi"
@@ -83,8 +81,7 @@
             limits:
               memory: "5Gi"
               cpu: "2000m"
-          # use the patched entrypoint.sh - it will automatically created the desired distributedlog namespace
-          command: [ "/bin/bash", "/opt/distributedlog/bin/entrypoint.sh" ]
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "bookie"]
           ports:
             - name: bookie
@@ -162,8 +159,8 @@
     spec:
       containers:
         - name: replication-worker
-          image: apachedistributedlog/distributedlog:0.5.0
-          command: [ "/bin/bash", "/opt/bookkeeper/entrypoint.sh" ]
+          image: apache/bookkeeper:4.7.3
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "autorecovery"]
           envFrom:
             - configMapRef:
diff --git a/deploy/kubernetes/general/bookkeeper.statefulset_empty.yaml b/deploy/kubernetes/general/bookkeeper.statefulset_empty.yaml
index 29ce127..219e3c2 100644
--- a/deploy/kubernetes/general/bookkeeper.statefulset_empty.yaml
+++ b/deploy/kubernetes/general/bookkeeper.statefulset_empty.yaml
@@ -30,8 +30,6 @@
     BK_ledgerDirectories: "/bookkeeper/data/ledgers"
     BK_indexDirectories: "/bookkeeper/data/ledgers"
     BK_zkServers: zookeeper
-    # the default manager is flat, which is not good for supporting large number of ledgers
-    BK_ledgerManagerType: "hierarchical"
     # TODO: Issue 458: https://github.com/apache/bookkeeper/issues/458
     #BK_statsProviderClass: org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider
     # use hostname as bookie id for StatefulSets deployment
@@ -75,7 +73,7 @@
       terminationGracePeriodSeconds: 0
       containers:
         - name: bookie
-          image: apachedistributedlog/distributedlog:0.5.0
+          image: apache/bookkeeper:4.7.3
           resources:
             requests:
               memory: "3Gi"
@@ -83,8 +81,7 @@
             limits:
               memory: "5Gi"
               cpu: "2000m"
-          # use the patched entrypoint.sh - it will automatically created the desired distributedlog namespace
-          command: [ "/bin/bash", "/opt/distributedlog/bin/entrypoint.sh" ]
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "bookie"]
           ports:
             - name: bookie
@@ -146,8 +143,8 @@
     spec:
       containers:
         - name: replication-worker
-          image: apachedistributedlog/distributedlog:0.5.0
-          command: [ "/bin/bash", "/opt/bookkeeper/entrypoint.sh" ]
+          image: apache/bookkeeper:4.7.3
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "autorecovery"]
           envFrom:
             - configMapRef:
diff --git a/deploy/kubernetes/general/bookkeeper.yaml b/deploy/kubernetes/general/bookkeeper.yaml
index 66f92da..4fd985b 100644
--- a/deploy/kubernetes/general/bookkeeper.yaml
+++ b/deploy/kubernetes/general/bookkeeper.yaml
@@ -30,8 +30,6 @@
   BK_ledgerDirectories: "/bookkeeper/data/ledgers"
   BK_indexDirectories: "/bookkeeper/data/ledgers" 
   BK_zkServers: zookeeper
-  # the default manager is flat, which is not good for supporting large number of ledgers
-  BK_ledgerManagerType: "hierarchical"
   # TODO: Issue 458: https://github.com/apache/bookkeeper/issues/458
   #BK_statsProviderClass: org.apache.bookkeeper.stats.PrometheusMetricsProvider
 ---
@@ -63,7 +61,7 @@
     spec:
       containers:
         - name: bookie
-          image: apachedistributedlog/distributedlog:0.5.0
+          image: apache/bookkeeper:4.7.3
           resources:
             requests:
               memory: "3Gi"
@@ -71,8 +69,7 @@
             limits:
               memory: "5Gi"
               cpu: "2000m"
-          # use the patched entrypoint.sh - it will automatically created the desired distributedlog namespace
-          command: [ "/bin/bash", "/opt/distributedlog/bin/entrypoint.sh" ]
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "bookie"]
           ports:
             - name: client
@@ -147,8 +144,8 @@
     spec:
       containers:
         - name: replication-worker
-          image: apachedistributedlog/distributedlog:0.5.0
-          command: [ "/bin/bash", "/opt/bookkeeper/entrypoint.sh" ]
+          image: apache/bookkeeper:4.7.3
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "autorecovery"]
           envFrom:
             - configMapRef:
diff --git a/deploy/kubernetes/helm/templates/bookie.yaml b/deploy/kubernetes/helm/templates/bookie.yaml
index 92667d2..76e5d9a 100644
--- a/deploy/kubernetes/helm/templates/bookie.yaml
+++ b/deploy/kubernetes/helm/templates/bookie.yaml
@@ -40,8 +40,6 @@
   BK_ledgerDirectories: "/bookkeeper/data/ledgers"
   BK_indexDirectories: "/bookkeeper/data/ledgers" 
   BK_zkServers: {{ .Release.Name }}-zookeeper:{{ .Values.zookeeper.clientPort }}
-  # the default manager is flat, which is not good for supporting large number of ledgers
-  BK_ledgerManagerType: "hierarchical"
   BK_autoRecoveryDaemonEnabled: "true"
   # TODO: Issue 458: https://github.com/apache/bookkeeper/issues/458
   #BK_statsProviderClass: org.apache.bookkeeper.stats.PrometheusMetricsProvider
@@ -126,7 +124,7 @@
             limits:
               cpu: {{ $bookieCpuMax | quote }}
           # use the patched entrypoint.sh - it will automatically created the desired distributedlog namespace
-          command: [ "/bin/bash", "/opt/distributedlog/bin/entrypoint.sh" ]
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "bookie"]
           ports:
             - name: client
diff --git a/deploy/kubernetes/helm/templates/tools.yaml b/deploy/kubernetes/helm/templates/tools.yaml
index 2c7b614..a6b8287 100644
--- a/deploy/kubernetes/helm/templates/tools.yaml
+++ b/deploy/kubernetes/helm/templates/tools.yaml
@@ -68,6 +68,9 @@
             - sh
             - -c
             - /opt/zookeeper/scripts/wait-for-zookeeper.sh {{ .Release.Name }}-zookeeper {{ .Values.zookeeper.clientPort }}
+        - name: init-heron-apiserver
+          image: {{ .Values.bookkeeper.image }}
+          command: ['sh', '-c', '/opt/bookkeeper/bin/dlog admin bind -l /ledgers -s {{ .Release.Name }}-zookeeper {{ .Values.zookeeper.clientPort }}:2181 -c distributedlog://{{ .Release.Name }}-zookeeper {{ .Values.zookeeper.clientPort }}/heron']
       containers:
         - name: heron-tracker
           image: {{ .Values.image }}
@@ -127,10 +130,12 @@
               -D heron.kubernetes.scheduler.uri=http://localhost:8001
               -D heron.kubernetes.scheduler.namespace={{ .Release.Namespace }}
               -D heron.executor.docker.image={{ .Values.image }}
+              -D heron.statefulstorage.classname=org.apache.heron.statefulstorage.dlog.DlogStorage
+              -D heron.statefulstorage.dlog.namespace.uri=distributedlog://{{ .Release.Name }}-zookeeper {{ .Values.zookeeper.clientPort }}/heron
               {{- if eq .Values.uploader.class "dlog" }}
               -D heron.class.uploader=org.apache.heron.uploader.dlog.DLUploader
               -D heron.uploader.dlog.topologies.num.replicas={{ $jobReplicas }}
-              -D heron.uploader.dlog.topologies.namespace.uri=distributedlog://{{ .Release.Name }}-zookeeper:2181/distributedlog
+              -D heron.uploader.dlog.topologies.namespace.uri=distributedlog://{{ .Release.Name }}-zookeeper {{ .Values.zookeeper.clientPort }}/heron
               {{- else if eq .Values.uploader.class "s3" }}
               -D heron.class.uploader=org.apache.heron.uploader.s3.S3Uploader
               -D heron.uploader.s3.bucket={{ .Values.uploader.s3Bucket }}
diff --git a/deploy/kubernetes/helm/values.yaml.template b/deploy/kubernetes/helm/values.yaml.template
index fec912f..5fc2a15 100644
--- a/deploy/kubernetes/helm/values.yaml.template
+++ b/deploy/kubernetes/helm/values.yaml.template
@@ -66,7 +66,7 @@
 zkReplicas: 1
 
 bookkeeper:
-  image: apachedistributedlog/distributedlog:latest
+  image: apache/bookkeeper:4.7.3
   imagePullPolicy: IfNotPresent
 
 zookeeper:
diff --git a/deploy/kubernetes/minikube/apiserver.yaml b/deploy/kubernetes/minikube/apiserver.yaml
index 307ef0f..2a89c1a 100644
--- a/deploy/kubernetes/minikube/apiserver.yaml
+++ b/deploy/kubernetes/minikube/apiserver.yaml
@@ -63,6 +63,10 @@
         app: heron-apiserver
     spec:
       serviceAccountName: heron-apiserver
+      initContainers:
+        - name: init-heron-apiserver
+          image: apache/bookkeeper:4.7.3
+          command: ['sh', '-c', '/opt/bookkeeper/bin/dlog admin bind -l /ledgers -s zookeeper:2181 -c distributedlog://zookeeper:2181/heron']
       containers:
         - name: heron-apiserver
           image: heron/heron:latest
@@ -77,7 +81,9 @@
               -D heron.executor.docker.image=heron/heron:latest
               -D heron.class.uploader=org.apache.heron.uploader.dlog.DLUploader
               -D heron.uploader.dlog.topologies.num.replicas=1
-              -D heron.uploader.dlog.topologies.namespace.uri=distributedlog://zookeeper:2181/distributedlog
+              -D heron.uploader.dlog.topologies.namespace.uri=distributedlog://zookeeper:2181/heron
+              -D heron.statefulstorage.classname=org.apache.heron.statefulstorage.dlog.DlogStorage
+              -D heron.statefulstorage.dlog.namespace.uri=distributedlog://zookeeper:2181/heron
         - name: kubectl-proxy
           image: heron/kubectl:latest
           command: ["sh", "-c"]
diff --git a/deploy/kubernetes/minikube/bookkeeper.yaml b/deploy/kubernetes/minikube/bookkeeper.yaml
index 23fad85..d5ec668 100644
--- a/deploy/kubernetes/minikube/bookkeeper.yaml
+++ b/deploy/kubernetes/minikube/bookkeeper.yaml
@@ -30,8 +30,6 @@
   BK_ledgerDirectories: "/bookkeeper/data/ledgers"
   BK_indexDirectories: "/bookkeeper/data/ledgers" 
   BK_zkServers: zookeeper
-  # the default manager is flat, which is not good for supporting large number of ledgers
-  BK_ledgerManagerType: "hierarchical"
 ---
 
 ## BookKeeper servers need to access the local disks and the pods
@@ -56,15 +54,15 @@
       initContainers:
         # The first time, initialize BK to wipe data for minikube
         - name: bookie-format
-          image: apachedistributedlog/distributedlog:0.5.0
+          image: apache/bookkeeper:4.7.3
           args: ["/opt/bookkeeper/bin/bookkeeper", "shell", "bookieformat", "--nonInteractive", "-f", "-deleteCookie"]
-          command: [ "/bin/bash", "/opt/distributedlog/bin/entrypoint.sh" ]
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           envFrom:
             - configMapRef:
                 name: bookie-config
       containers:
         - name: bookie
-          image: apachedistributedlog/distributedlog:0.5.0
+          image: apache/bookkeeper:4.7.3
           resources:
             requests:
               memory: "1Gi"
@@ -72,8 +70,7 @@
             limits:
               memory: "2Gi"
               cpu: "2000m"
-          # use the patched entrypoint.sh - it will automatically created the desired distributedlog namespace
-          command: [ "/bin/bash", "/opt/distributedlog/bin/entrypoint.sh" ]
+          command: [ "/bin/bash", "/opt/bookkeeper/scripts/entrypoint.sh" ]
           args: ["/opt/bookkeeper/bin/bookkeeper", "bookie"]
           ports:
             - name: client
diff --git a/heron/downloaders/src/java/BUILD b/heron/downloaders/src/java/BUILD
index c819ecf..67a5cd7 100644
--- a/heron/downloaders/src/java/BUILD
+++ b/heron/downloaders/src/java/BUILD
@@ -13,6 +13,7 @@
   "//third_party/java:commons-compress",
   "@commons_cli_commons_cli//jar",
   "@org_apache_distributedlog_core//jar",
+  "@io_netty_netty_all//jar",
 ]
 
 java_library(
diff --git a/heron/downloaders/src/java/org/apache/heron/downloader/DLDownloader.java b/heron/downloaders/src/java/org/apache/heron/downloader/DLDownloader.java
index 1618a29..6b173be 100644
--- a/heron/downloaders/src/java/org/apache/heron/downloader/DLDownloader.java
+++ b/heron/downloaders/src/java/org/apache/heron/downloader/DLDownloader.java
@@ -64,6 +64,9 @@
         parentName,
         uri.getQuery(),
         uri.getFragment());
+
+    CONF.addProperty("bkc.allowShadedLedgerManagerFactoryClass", true);
+
     Namespace ns = builder
         .clientId("heron-downloader")
         .conf(CONF)
diff --git a/heron/io/dlog/src/java/BUILD b/heron/io/dlog/src/java/BUILD
index a016f06..6c918fe 100644
--- a/heron/io/dlog/src/java/BUILD
+++ b/heron/io/dlog/src/java/BUILD
@@ -2,6 +2,7 @@
 
 dlog_deps = [
   "@org_apache_distributedlog_core//jar",
+  "@io_netty_netty_all//jar",
   "//third_party/java:dlog-java",
 ]
 
diff --git a/heron/io/dlog/src/java/org/apache/heron/dlog/Util.java b/heron/io/dlog/src/java/org/apache/heron/dlog/Util.java
index a8ff797..5ff452a 100644
--- a/heron/io/dlog/src/java/org/apache/heron/dlog/Util.java
+++ b/heron/io/dlog/src/java/org/apache/heron/dlog/Util.java
@@ -126,10 +126,12 @@
   }
 
   private static Namespace openNamespace(URI uri) throws IOException {
+    DistributedLogConfiguration distributedLogConfiguration = new DistributedLogConfiguration();
+    distributedLogConfiguration.addProperty("bkc.allowShadedLedgerManagerFactoryClass", true);
     return NamespaceBuilder.newBuilder()
         .uri(uri)
         .clientId("dlog-util")
-        .conf(new DistributedLogConfiguration())
+        .conf(distributedLogConfiguration)
         .build();
   }
 
diff --git a/heron/io/dlog/tests/java/BUILD b/heron/io/dlog/tests/java/BUILD
index 274ed23..a4fe483 100644
--- a/heron/io/dlog/tests/java/BUILD
+++ b/heron/io/dlog/tests/java/BUILD
@@ -7,6 +7,7 @@
   common_deps_files + [
     "@com_google_guava_guava//jar",
     "@org_apache_distributedlog_core//jar",
+    "@io_netty_netty_all//jar",
     "//heron/io/dlog/src/java:dlog-lib",
   ]
   
diff --git a/heron/statefulstorages/src/java/org/apache/heron/statefulstorage/dlog/DlogStorage.java b/heron/statefulstorages/src/java/org/apache/heron/statefulstorage/dlog/DlogStorage.java
index 9999f36..e53e45f 100644
--- a/heron/statefulstorages/src/java/org/apache/heron/statefulstorage/dlog/DlogStorage.java
+++ b/heron/statefulstorages/src/java/org/apache/heron/statefulstorage/dlog/DlogStorage.java
@@ -105,6 +105,8 @@
         .setNumWorkerThreads(1)                           // use 1 worker thread
         .setBKClientNumberIOThreads(1);
 
+    conf.addProperty("bkc.allowShadedLedgerManagerFactoryClass", true);
+
     return this.nsBuilderSupplier.get()
         .clientId("heron-stateful-storage")
         .conf(conf)
diff --git a/heron/statefulstorages/tests/java/BUILD b/heron/statefulstorages/tests/java/BUILD
index 629337e..c5671bc 100644
--- a/heron/statefulstorages/tests/java/BUILD
+++ b/heron/statefulstorages/tests/java/BUILD
@@ -50,6 +50,7 @@
   "//heron/statefulstorages/src/java:dlog-statefulstorage-java",
   "@com_google_guava_guava//jar",
   "@org_apache_distributedlog_core//jar",
+  "@io_netty_netty_all//jar",
 ]
 
 java_library(
diff --git a/heron/uploaders/src/java/BUILD b/heron/uploaders/src/java/BUILD
index 109ce56..0c41e4a 100644
--- a/heron/uploaders/src/java/BUILD
+++ b/heron/uploaders/src/java/BUILD
@@ -23,6 +23,7 @@
 dlog_deps_files = \
     uploader_spi_files + [
         "@org_apache_distributedlog_core//jar",
+        "@io_netty_netty_all//jar",
         "//heron/io/dlog/src/java:dlog-lib",
     ]
     
diff --git a/heron/uploaders/src/java/org/apache/heron/uploader/dlog/DLUploader.java b/heron/uploaders/src/java/org/apache/heron/uploader/dlog/DLUploader.java
index 8a537cd..c0c1bca 100644
--- a/heron/uploaders/src/java/org/apache/heron/uploader/dlog/DLUploader.java
+++ b/heron/uploaders/src/java/org/apache/heron/uploader/dlog/DLUploader.java
@@ -130,6 +130,8 @@
         .setAckQuorumSize(numReplicas)
         .setUseDaemonThread(true);                        // use daemon thread
 
+    conf.addProperty("bkc.allowShadedLedgerManagerFactoryClass", true);
+
     URI uri = URI.create(DLContext.dlTopologiesNamespaceURI(this.config));
     LOG.info(String.format(
         "Initializing distributedlog namespace for uploading topologies : %s",
diff --git a/heron/uploaders/tests/java/BUILD b/heron/uploaders/tests/java/BUILD
index 51e2e1c..c98747c 100644
--- a/heron/uploaders/tests/java/BUILD
+++ b/heron/uploaders/tests/java/BUILD
@@ -22,6 +22,7 @@
     common_deps_files + \
     spi_deps_files + [
         "@org_apache_distributedlog_core//jar",
+        "@io_netty_netty_all//jar",
         "//heron/uploaders/src/java:dlog-uploader-java",
     ]
 
diff --git a/third_party/java/BUILD b/third_party/java/BUILD
index ba59451..af66048 100644
--- a/third_party/java/BUILD
+++ b/third_party/java/BUILD
@@ -350,9 +350,11 @@
     srcs = [ "Empty.java" ],
     exports = [ 
         "@org_apache_distributedlog_core//jar",
+        "@io_netty_netty_all//jar",
     ],
     deps = [ 
         "@org_apache_distributedlog_core//jar",
+        "@io_netty_netty_all//jar",
         "@org_slf4j_slf4j_api//jar",
         "@org_slf4j_slf4j_jdk14//jar",
         "@commons_collections_commons_collections//jar",