blob: 18927988be6a5c5862cc9a92bf86508a3f88bd23 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM livy-dev-base:latest
ARG HADOOP_VERSION=3.3.1
ARG SPARK_VERSION=3.2.3
ARG ROOT_PATH=/opt
RUN mkdir -p ${ROOT_PATH}
ENV HADOOP_HOME=${ROOT_PATH}/hadoop
ENV HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
ENV PATH=${PATH}:${HADOOP_HOME}/bin
ENV HADOOP_PACKAGE=hadoop-${HADOOP_VERSION}
COPY ${HADOOP_PACKAGE}.tar.gz ${HADOOP_PACKAGE}.tar.gz
RUN gunzip ${HADOOP_PACKAGE}.tar.gz \
&& tar -xf ${HADOOP_PACKAGE}.tar -C ${ROOT_PATH}/ \
&& ln -s ${ROOT_PATH}/${HADOOP_PACKAGE} ${HADOOP_HOME} \
&& rm -rf ${HADOOP_HOME}/share/doc \
&& chown -R root:root ${HADOOP_HOME} \
&& rm ${HADOOP_PACKAGE}.tar
ENV SPARK_HOME=${ROOT_PATH}/spark
ENV SPARK_DIST_CLASSPATH="${HADOOP_HOME}/etc/hadoop/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/yarn/lib/*:${HADOOP_HOME}/share/hadoop/yarn/*:${HADOOP_HOME}/share/hadoop/mapreduce/lib/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/tools/lib/*"
ENV PATH=${PATH}:${SPARK_HOME}/bin
ENV SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-without-hadoop
COPY ${SPARK_PACKAGE}.tgz ${SPARK_PACKAGE}.tgz
RUN gunzip ${SPARK_PACKAGE}.tgz \
&& tar -xf ${SPARK_PACKAGE}.tar -C ${ROOT_PATH}/ \
&& ln -s ${ROOT_PATH}/${SPARK_PACKAGE} ${SPARK_HOME} \
&& chown -R root:root ${SPARK_HOME} \
&& rm ${SPARK_PACKAGE}.tar
# Uncomment following line or add more such lines to replace the default jars with private builds.
# COPY hadoop-streaming-3.3.1.jar ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-streaming-3.3.1.jar
# Uncomment following line or add more such lines to replace the default jars with private builds.
# COPY spark-repl_2.12-3.2.3.jar ${SPARK_HOME}/jars/spark-repl_2.12-3.2.3.jar
WORKDIR ${SPARK_HOME}