blob: 3d0aacfb7c84d39869b1928b3e9f8be26fa1b00f [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM {{ BASE_IMAGE }}
ARG spark_uid=185
RUN groupadd --system --gid=${spark_uid} spark && \
useradd --system --uid=${spark_uid} --gid=spark spark
RUN set -ex; \
apt-get update; \
apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \
mkdir -p /opt/spark; \
mkdir /opt/spark/python; \
mkdir -p /opt/spark/examples; \
mkdir -p /opt/spark/work-dir; \
chmod g+w /opt/spark/work-dir; \
touch /opt/spark/RELEASE; \
chown -R spark:spark /opt/spark; \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \
rm -rf /var/lib/apt/lists/*
# Install Apache Spark
# https://downloads.apache.org/spark/KEYS
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-{{ SPARK_VERSION }}/spark-{{ SPARK_VERSION }}-bin-hadoop3.tgz \
SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-{{ SPARK_VERSION }}/spark-{{ SPARK_VERSION }}-bin-hadoop3.tgz.asc \
GPG_KEY={{ SPARK_GPG_KEY }}
RUN set -ex; \
export SPARK_TMP="$(mktemp -d)"; \
cd $SPARK_TMP; \
wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
export GNUPGHOME="$(mktemp -d)"; \
gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
gpg --batch --verify spark.tgz.asc spark.tgz; \
gpgconf --kill all; \
rm -rf "$GNUPGHOME" spark.tgz.asc; \
\
tar -xf spark.tgz --strip-components=1; \
chown -R spark:spark .; \
mv jars /opt/spark/; \
mv bin /opt/spark/; \
mv sbin /opt/spark/; \
mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
mv examples /opt/spark/; \
mv kubernetes/tests /opt/spark/; \
mv data /opt/spark/; \
mv python/pyspark /opt/spark/python/pyspark/; \
mv python/lib /opt/spark/python/lib/; \
mv R /opt/spark/; \
chmod a+x /opt/decom.sh; \
cd ..; \
rm -rf "$SPARK_TMP";
COPY entrypoint.sh /opt/
ENV SPARK_HOME /opt/spark
WORKDIR /opt/spark/work-dir
USER spark
ENTRYPOINT [ "/opt/entrypoint.sh" ]