blob: ef3ca422fd49ecb13133236a6663c444f5d5ba41 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This Dockerfile is for building a HiveMetaStore used in testing
# Usage:
# Run the docker command below
# docker build \
# --build-arg APACHE_MIRROR="https://archive.apache.org/dist" \
# --build-arg HIVE_VERSION="2.3.9" \
# --build-arg HADOOP_VERSION="2.10.2" \
# --file build/Dockerfile.HMS \
# --tag nekyuubi/kyuubi-hive-metastore:<tag> \
# .
# Options:
# -f, --file this docker file
# -t, --tag the target repo and tag name
# more options can be found with -h, --help
FROM eclipse-temurin:8-jdk-focal
ARG APACHE_MIRROR=https://archive.apache.org/dist
ARG HIVE_VERSION=2.3.9
ARG HADOOP_VERSION=2.10.2
# Setup KDC
RUN mkdir -p /etc/krb5kdc
COPY <<"EOF" /etc/krb5kdc/kadm5.acl
*/admin@TEST.ORG *
EOF
COPY <<"EOF" /etc/krb5kdc/kdc.conf
[kdcdefaults]
kdc_ports = 88
kdc_tcp_ports = 88
[realms]
TEST.ORG = {
acl_file = /etc/krb5kdc/kadm5.acl
dict_file = /usr/share/dict/words
admin_keytab = /etc/krb5kdc/kadm5.keytab
supported_enctypes = aes128-cts:normal
}
EOF
COPY <<"EOF" /etc/krb5.conf
[logging]
default = FILE:/var/log/krb5libs.log
kdc = FILE:/var/log/krb5kdc.log
admin_server = FILE:/var/log/kadmind.log
[libdefaults]
default_realm = TEST.ORG
dns_lookup_realm = false
dns_lookup_kdc = false
forwardable = true
allow_weak_crypto = true
[realms]
TEST.ORG = {
kdc = localhost:88
admin_server = localhost
}
EOF
COPY <<"EOF" /usr/local/bin/create-principal
#!/bin/bash
set -euo pipefail
function usage() {
if [ $# -ne 2 ]; then
echo "Usage: $0 -p <principal> -k <output keytab file>" >&2
exit 1
fi
}
while getopts "p:k:" o; do
case "${o}" in
p)
principal="$OPTARG"
;;
k)
keytab="$OPTARG"
;;
*)
esac
done
if [[ ! -v principal ]]; then
usage
fi
if [[ ! -v keytab ]]; then
usage
fi
/usr/sbin/kadmin.local -q "addprinc -randkey $principal@TEST.ORG"
/usr/sbin/kadmin.local -q "ktadd -norandkey -k $keytab $principal"
EOF
RUN chmod +x /usr/local/bin/create-principal
RUN set -xeu && \
ln -snf /usr/bin/bash /usr/bin/sh && \
apt update -qq && \
apt install -y busybox curl krb5-kdc krb5-admin-server krb5-user && \
rm -r /var/lib/apt/lists /var/cache/apt/archives && \
mkdir /opt/busybox && \
busybox --install /opt/busybox
# CREATE KERBEROS DATABASE
RUN /usr/sbin/kdb5_util create -s -P password
# Setup Hive MetaStore
WORKDIR /opt
RUN wget ${APACHE_MIRROR}/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz && \
tar -xzf apache-hive-${HIVE_VERSION}-bin.tar.gz && \
rm apache-hive-${HIVE_VERSION}-bin.tar.gz && \
wget ${APACHE_MIRROR}/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \
tar -xzf hadoop-${HADOOP_VERSION}.tar.gz && \
rm hadoop-${HADOOP_VERSION}.tar.gz
ENV HIVE_HOME=/opt/apache-hive-${HIVE_VERSION}-bin
ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION}
COPY <<"EOF" /wait-kdc-ready.sh
#!/bin/bash
timeout=60
interval=1
end_time=$((SECONDS + timeout))
while [ $SECONDS -lt $end_time ]; do
if /opt/busybox/nc localhost 88 -e true && /opt/busybox/nc localhost 749 -e true; then
echo "KDC is now available"
break
else
echo "KDC is not available, retrying in $interval seconds..."
sleep $interval
fi
done
if [ $SECONDS -ge $end_time ]; then
echo "Timeout reached. KDC is still not available."
exit 1
fi
EOF
COPY <<"EOF" /start-hive-metastore.sh
auth=${HADOOP_SECURITY_AUTHENTICATION:-simple}
if [[ "${auth}" == "kerberos" ]]; then
bash /wait-kdc-ready.sh
# Create Hive MetaStore Principal
HIVE_METASTORE_KERBEROS_PRINCIPAL=${HIVE_METASTORE_KERBEROS_PRINCIPAL:-hive/localhost}
HIVE_METASTORE_KERBEROS_KEYTAB_FILE=${HIVE_METASTORE_KERBEROS_KEYTAB_FILE:-/hive.service.keytab}
HIVE_METASTORE_KERBEROS_PRINCIPAL_NAME=$(echo ${HIVE_METASTORE_KERBEROS_PRINCIPAL} | cut -d '/' -f 1)
if [[ ! -f $HIVE_METASTORE_KERBEROS_KEYTAB_FILE ]]; then
bash create-principal -p ${HIVE_METASTORE_KERBEROS_PRINCIPAL} -k ${HIVE_METASTORE_KERBEROS_KEYTAB_FILE}
fi
cat > ${HIVE_HOME}/conf/hive-site.xml <<_EOF
<configuration>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>${HIVE_METASTORE_KERBEROS_PRINCIPAL}</value>
</property>
<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value>${HIVE_METASTORE_KERBEROS_KEYTAB_FILE}</value>
</property>
</configuration>
_EOF
cat > ${HADOOP_HOME}/etc/hadoop/core-site.xml <<_EOF
<configuration>
<property>
<name>hadoop.security.authentication</name>
<value>kerberos</value>
</property>
<property>
<name>hadoop.proxyuser.${HIVE_METASTORE_KERBEROS_PRINCIPAL_NAME}.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.${HIVE_METASTORE_KERBEROS_PRINCIPAL_NAME}.groups</name>
<value>*</value>
</property>
</configuration>
_EOF
elif [[ "${auth}" == "simple" ]]; then
cat > ${HIVE_HOME}/conf/hive-site.xml <<_EOF
<configuration>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateTables</name>
<value>true</value>
</property>
</configuration>
_EOF
cat > ${HADOOP_HOME}/etc/hadoop/core-site.xml <<_EOF
<configuration>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
</configuration>
_EOF
else
echo "Invalid auth type: ${auth}"
exit 1
fi
bash ${HIVE_HOME}/bin/hive --service metastore
EOF
COPY <<"EOF" /entrypoint.sh
#!/usr/bin/env bash
set -euo pipefail
/bin/bash -c "exec /usr/sbin/krb5kdc -P /var/run/krb5kdc.pid -n -r TEST.ORG" &
/bin/bash -c "exec /usr/sbin/kadmind -P /var/run/kadmind.pid -nofork -r TEST.ORG" &
/bin/bash /start-hive-metastore.sh &
tail -F /tmp/root/hive.log
EOF
# Hive MetaStore Thrift Port
EXPOSE 9083/tcp
# KDC Ports
EXPOSE 88/tcp
EXPOSE 88/udp
EXPOSE 749/tcp
ENTRYPOINT ["/usr/bin/bash", "/entrypoint.sh"]