Merge pull request #16 from apache/next-release

Merge changes for 2.1.0 into main branch
diff --git a/Dockerfile b/Dockerfile
index 0c2f1ff..d22b05c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,76 +13,110 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM centos:7
 
-ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk
+##
+## Base image. Rocky Linux 9 with updates, JRE 11 headless, and updated CA certs.
+##
+FROM rockylinux:9 as base
 
-ARG ACCUMULO_VERSION=2.0.0
-ARG HADOOP_VERSION=3.2.1
-ARG ZOOKEEPER_VERSION=3.6.0
-ARG HADOOP_USER_NAME=accumulo
-ARG ACCUMULO_FILE=
-ARG HADOOP_FILE=
-ARG ZOOKEEPER_FILE=
+RUN set -eux; \
+  yum install -y ca-certificates java-11-openjdk-headless && \
+  update-ca-trust extract && \
+  yum clean all && \
+  rm -rf /var/cache/yum
 
-ENV HADOOP_USER_NAME $HADOOP_USER_NAME
+##
+## Base image for building. Adds wget, JDK and make (for building Accumulo native libs).
+##
+FROM base as buildbase
 
-ENV APACHE_DIST_URLS \
-  https://www.apache.org/dyn/closer.cgi?action=download&filename= \
-# if the version is outdated (or we're grabbing the .asc file), we might have to pull from the dist/archive :/
-  https://www-us.apache.org/dist/ \
-  https://www.apache.org/dist/ \
-  https://archive.apache.org/dist/
+RUN set -eux; \
+  yum install -y java-11-openjdk-devel make gcc-c++ wget && \
+  update-ca-trust extract
 
-COPY README.md $ACCUMULO_FILE $HADOOP_FILE $ZOOKEEPER_FILE /tmp/
+COPY download.sh /usr/local/bin/
 
-RUN yum install -y java-1.8.0-openjdk-devel make gcc-c++ wget && \
-  set -eux; \
-  download() { \
-    local f="$1"; shift; \
-    local distFile="$1"; shift; \
-    local success=; \
-    local distUrl=; \
-    for distUrl in $APACHE_DIST_URLS; do \
-      if wget -nv -O "$f" "$distUrl$distFile"; then \
-        success=1; \
-        break; \
-      fi; \
-    done; \
-    [ -n "$success" ]; \
-  }; \
-  \
-  if [ -z "$HADOOP_FILE" ]; then \
-    download "hadoop.tar.gz" "hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"; \
-  else \
-    cp "/tmp/$HADOOP_FILE" "hadoop.tar.gz"; \
-  fi; \
-  if [ -z "$ZOOKEEPER_FILE" ]; then \
-    download "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION-bin.tar.gz"; \
-  else \
-    cp "/tmp/$ZOOKEEPER_FILE" "zookeeper.tar.gz"; \
-  fi; \
-  if [ -z "$ACCUMULO_FILE" ]; then \
-    download "accumulo.tar.gz" "accumulo/$ACCUMULO_VERSION/accumulo-$ACCUMULO_VERSION-bin.tar.gz"; \
-  else \
-    cp "/tmp/$ACCUMULO_FILE" "accumulo.tar.gz"; \
-  fi && \
-  tar xzf accumulo.tar.gz -C /tmp/ && \
-  tar xzf hadoop.tar.gz -C /tmp/ && \
-  tar xzf zookeeper.tar.gz -C /tmp/ && \
-  mv /tmp/hadoop-$HADOOP_VERSION /opt/hadoop && \
-  mv /tmp/apache-zookeeper-$ZOOKEEPER_VERSION-bin /opt/zookeeper && \
-  mv /tmp/accumulo-$ACCUMULO_VERSION /opt/accumulo && \
-  /opt/accumulo/bin/accumulo-util build-native && \
-# The below line is required for Accumulo 2.0 to work with ZK 3.5 & above.  This will not be needed for Accumulo 2.1
-  sed -i 's/\${ZOOKEEPER_HOME}\/\*/\${ZOOKEEPER_HOME}\/\*\:\${ZOOKEEPER_HOME}\/lib\/\*/g' /opt/accumulo/conf/accumulo-env.sh
+##
+## Hadoop image. Download/copy and extract the Hadoop installation.
+##
+FROM buildbase as hadoop
+
+ARG HADOOP_VERSION=3.3.4 \
+  HADOOP_FILE=_NOT_SET
+
+# Copy a known file along with the optional files (that might not exist).
+# The known file, along with '*' for the optional file allows the command
+# to succeed even if the optional file does not exist. If we used an empty
+# string for the optional file default value, then this command would copy
+# the entire build context, which is not what we want.
+COPY download.sh ${HADOOP_FILE}* /tmp/
+
+RUN set -eux; \
+  download.sh "${HADOOP_FILE}" "hadoop.tar.gz" "hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"; \
+  tar xzf hadoop.tar.gz -C /tmp/; \
+  mv /tmp/hadoop-*/ /opt/hadoop; \
+  rm -rf /opt/hadoop/share/doc/hadoop
+
+##
+## Zookeeper image. Download/copy and extract the Zookeeper installation.
+##
+FROM buildbase as zookeeper
+
+ARG ZOOKEEPER_VERSION=3.8.0 \
+  ZOOKEEPER_FILE=_NOT_SET
+# Copy a known file along with the optional files (that might not exist).
+# The known file, along with '*' for the optional file allows the command
+# to succeed even if the optional file does not exist. If we used an empty
+# string for the optional file default value, then this command would copy
+# the entire build context, which is not what we want.
+COPY download.sh ${ZOOKEEPER_FILE}* /tmp/
+
+RUN set -eux; \
+  download.sh "${ZOOKEEPER_FILE}" "zookeeper.tar.gz" "zookeeper/zookeeper-$ZOOKEEPER_VERSION/apache-zookeeper-$ZOOKEEPER_VERSION-bin.tar.gz"; \
+  tar xzf zookeeper.tar.gz -C /tmp/; \
+  mv /tmp/apache-zookeeper-*/ /opt/zookeeper
+
+##
+## Accumulo image. Download/copy and extract the Accumulo installation, build native libs, and copy in properties.
+##
+FROM buildbase as accumulo
+
+ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk
+
+ARG ACCUMULO_VERSION=2.1.0 \
+  ACCUMULO_FILE=_NOT_SET
+# Copy a known file along with the optional files (that might not exist).
+# The known file, along with '*' for the optional file allows the command
+# to succeed even if the optional file does not exist. If we used an empty
+# string for the optional file default value, then this command would copy
+# the entire build context, which is not what we want.
+COPY download.sh ${ACCUMULO_FILE}* /tmp/
+
+RUN set -eux; \
+  download.sh "${ACCUMULO_FILE}" "accumulo.tar.gz" "accumulo/$ACCUMULO_VERSION/accumulo-$ACCUMULO_VERSION-bin.tar.gz"; \
+  tar xzf accumulo.tar.gz -C /tmp/; \
+  mv /tmp/accumulo-*/ /opt/accumulo; \
+  /opt/accumulo/bin/accumulo-util build-native
 
 ADD properties/ /opt/accumulo/conf/
 
-ENV HADOOP_HOME /opt/hadoop
-ENV ZOOKEEPER_HOME /opt/zookeeper
-ENV ACCUMULO_HOME /opt/accumulo
-ENV PATH "$PATH:$ACCUMULO_HOME/bin"
+##
+## Final image. Copy extracted/built installations for hadoop, zookeeper, and accumulo.
+## Also set environment variables and entrypoint.
+##
+FROM base
+
+ARG HADOOP_USER_NAME=accumulo
+ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk \
+  HADOOP_HOME=/opt/hadoop \
+  HADOOP_USER_NAME=$HADOOP_USER_NAME \
+  ZOOKEEPER_HOME=/opt/zookeeper \
+  ACCUMULO_HOME=/opt/accumulo \
+  PATH="$PATH:/opt/accumulo/bin"
+
+COPY --from=hadoop /opt/hadoop /opt/hadoop
+COPY --from=zookeeper /opt/zookeeper /opt/zookeeper
+COPY --from=accumulo /opt/accumulo /opt/accumulo
 
 ENTRYPOINT ["accumulo"]
 CMD ["help"]
diff --git a/README.md b/README.md
index a28a5a6..256907e 100644
--- a/README.md
+++ b/README.md
@@ -16,9 +16,9 @@
 
 | Software    | Version       |
 |-------------|---------------|
-| [Accumulo]  | 2.0.0         |
-| [Hadoop]    | 3.2.1         |
-| [ZooKeeper] | 3.6.0         |
+| [Accumulo]  | 2.1.0         |
+| [Hadoop]    | 3.3.4         |
+| [ZooKeeper] | 3.8.0         |
 
 If these versions do not match what is running on your cluster, you should consider building
 your own image with matching versions. However, Accumulo must be 2.0.0+. Below are instructions for
@@ -39,7 +39,7 @@
 
    Or build with an Accumulo tarball (located in same directory as DockerFile) using the command below:
 
-        docker build --build-arg ACCUMULO_VERSION=2.0.0-SNAPSHOT --build-arg ACCUMULO_FILE=accumulo-2.0.0-SNAPSHOT-bin.tar.gz -t accumulo .
+        docker build --build-arg ACCUMULO_FILE=accumulo-2.1.0-SNAPSHOT-bin.tar.gz -t accumulo .
 
 ## Image basics
 
diff --git a/download.sh b/download.sh
new file mode 100755
index 0000000..985caa3
--- /dev/null
+++ b/download.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+APACHE_DIST_URLS=(
+  "https://www.apache.org/dyn/closer.cgi?action=download&filename="
+  # if the version is outdated (or we're grabbing the .asc file), we might have to pull from the dist/archive :/
+  "https://www-us.apache.org/dist/"
+  "https://www.apache.org/dist/"
+  "https://archive.apache.org/dist/"
+)
+
+download() {
+  local f="$1"; shift
+  local distFile="$1"; shift
+  local success=
+  local distUrl=
+  for distUrl in "${APACHE_DIST_URLS[@]}"; do
+    echo "Attempting to fetch $f from $distUrl$distFile"
+    if wget -nv -O "$f" "$distUrl$distFile"; then
+      success=1
+      break
+    fi
+  done
+  [ -n "$success" ]
+}
+
+existing_file=$1
+download_file=$2
+dist_file=$3
+
+if [[ "$existing_file" == "_NOT_SET" ]]; then
+  download "$download_file" "$dist_file"
+else
+  [ -f "/tmp/$existing_file" ] || { echo "Existing file $existing_file does not exist"; exit 1; }
+  echo "Skipping download of $existing_file"
+  mv "/tmp/$existing_file" "$download_file"
+fi