blob: ba77deefcf1bb9dbf62f18bc795fc87beff3f9e2 [file]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
ARG BUILD_ENV=local
FROM ubuntu AS local
ONBUILD COPY hadoop-*.tar.gz /opt
ONBUILD COPY apache-hive-*-bin.tar.gz /opt
ONBUILD COPY apache-tez-*-bin.tar.gz /opt
FROM ubuntu AS release
ARG HADOOP_VERSION
ARG HIVE_VERSION
ARG TEZ_VERSION
ONBUILD RUN apt-get update && apt-get -y install wget
ONBUILD RUN wget --no-verbose https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz && \
wget --no-verbose https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz && \
wget --no-verbose https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
ONBUILD RUN mv /apache-tez-$TEZ_VERSION-bin.tar.gz /opt && \
mv hadoop-$HADOOP_VERSION.tar.gz /opt && \
mv apache-hive-$HIVE_VERSION-bin.tar.gz /opt
FROM ubuntu AS hybrid
ARG HADOOP_VERSION
ARG HIVE_VERSION
ARG TEZ_VERSION
ONBUILD RUN apt-get update && apt-get -y install wget
ONBUILD RUN wget --no-verbose https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz && \
wget --no-verbose https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
ONBUILD COPY ./apache-hive-$HIVE_VERSION-bin.tar.gz /opt
ONBUILD RUN mv /apache-tez-$TEZ_VERSION-bin.tar.gz /opt && \
mv hadoop-$HADOOP_VERSION.tar.gz /opt
FROM ${BUILD_ENV} AS env
RUN echo ${BUILD_ENV}
ARG HADOOP_VERSION
ARG HIVE_VERSION
ARG TEZ_VERSION
RUN tar -xzv \
--exclude="hadoop-$HADOOP_VERSION/share/doc" \
--exclude="*/jdiff" \
--exclude="*/sources" \
--exclude="*tests.jar" \
--exclude="*/webapps" \
-f /opt/hadoop-$HADOOP_VERSION.tar.gz \
-C /opt/ && \
# INSTALL HIVE
tar -xzv \
--exclude="apache-hive-$HIVE_VERSION-bin/jdbc" \
-f /opt/apache-hive-$HIVE_VERSION-bin.tar.gz \
-C /opt/ && \
# INSTALL TEZ
tar -xzv \
--exclude="apache-tez-$TEZ_VERSION-bin/share" \
-f /opt/apache-tez-$TEZ_VERSION-bin.tar.gz \
-C /opt; \
mkdir -p /opt/tez-snapshot;
FROM eclipse-temurin:21-jdk-ubi9-minimal AS run
ARG UID=1000
ARG HADOOP_VERSION
ARG HIVE_VERSION
ARG TEZ_VERSION
ARG TEZ_SNAPSHOT_VERSION=
ARG TEZ_SNAPSHOT_REPO_URL=https://repository.apache.org/content/repositories/snapshots
# Install dependencies
RUN set -ex; \
microdnf update -y; \
microdnf -y install procps gettext wget xmlstarlet; \
microdnf clean all; \
useradd --no-create-home -s /sbin/nologin -c "" --uid $UID hive
# Set necessary environment variables.
ENV HADOOP_HOME=/opt/hadoop \
HIVE_HOME=/opt/hive \
TEZ_HOME=/opt/tez \
HIVE_VER=$HIVE_VERSION
ENV PATH=$HIVE_HOME/bin:$HADOOP_HOME/bin:$PATH
COPY --from=env --chown=hive /opt/hadoop-$HADOOP_VERSION $HADOOP_HOME
COPY --from=env --chown=hive /opt/apache-hive-$HIVE_VERSION-bin $HIVE_HOME
COPY --from=env --chown=hive /opt/apache-tez-$TEZ_VERSION-bin $TEZ_HOME
COPY --from=env --chown=hive /opt/tez-snapshot /opt/tez-snapshot
# When TEZ_SNAPSHOT_VERSION is set, fetch Tez snapshot jars from the Maven snapshot repository
# and place them under /opt/tez-snapshot. At runtime, entrypoint.sh symlinks these into
# $HIVE_HOME/lib with a "0-" prefix so they sort first in bin/hive's classpath glob, ensuring
# snapshot classes take precedence over the Tez release jars bundled with Hive.
# Maven snapshot repositories use timestamped filenames (e.g. tez-api-1.0.0-20250101.jar),
# so we fetch maven-metadata.xml first to resolve the exact filename before downloading the jar.
RUN set -eux; \
mkdir -p /opt/tez-snapshot-download; \
if [[ -n "${TEZ_SNAPSHOT_VERSION}" ]]; then \
base_url="${TEZ_SNAPSHOT_REPO_URL}/org/apache/tez"; \
for artifact in tez-common tez-api tez-dag tez-mapreduce tez-runtime-internals tez-runtime-library; do \
version_url="${base_url}/${artifact}/${TEZ_SNAPSHOT_VERSION}"; \
metadata_url="${version_url}/maven-metadata.xml"; \
metadata_file="/opt/tez-snapshot-download/${artifact}-maven-metadata.xml"; \
echo "metadata_url=${metadata_url}"; \
wget -q "${metadata_url}" -O "${metadata_file}"; \
snapshot_value="$(xmlstarlet sel -t -v "string(/metadata/versioning/snapshotVersions/snapshotVersion[extension='jar' and not(classifier)]/value)" "${metadata_file}")"; \
test -n "${snapshot_value}"; \
jar_file="${artifact}-${snapshot_value}.jar"; \
jar_url="${version_url}/${jar_file}"; \
echo "jar_url=${jar_url}"; \
wget -q "${jar_url}" -O "/opt/tez-snapshot/${jar_file}"; \
done; \
echo "Downloaded Tez snapshot jars under /opt/tez-snapshot:"; \
ls -1 /opt/tez-snapshot/*.jar; \
else \
echo "TEZ_SNAPSHOT_VERSION not set. Skipping Tez snapshot download."; \
fi; \
rm -rf /opt/tez-snapshot-download
COPY --chown=hive entrypoint.sh /
COPY --chown=hive conf $HIVE_HOME/conf
RUN chmod +x /entrypoint.sh && \
mkdir -p $HIVE_HOME/data/warehouse && \
chown hive $HIVE_HOME/data/warehouse && \
mkdir -p $HIVE_HOME/scratch && \
chown hive $HIVE_HOME/scratch && \
mkdir -p /home/hive/.beeline && \
chown hive /home/hive/.beeline && \
chown -R hive /opt/tez-snapshot
USER hive
WORKDIR $HIVE_HOME
EXPOSE 10000 10002 9083
ENTRYPOINT ["sh", "-c", "/entrypoint.sh"]