blob: 5f2dea2ea6a01fb659790924c37e3943518124c3 [file] [log] [blame]
#!/usr/bin/env bash
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
set -o pipefail
source "$(dirname "$0")"/common.sh
FLINK_TARBALL_DIR=$TEST_DATA_DIR
FLINK_TARBALL=flink.tar.gz
FLINK_DIRNAME=$(basename $FLINK_DIR)
MAX_RETRY_SECONDS=120
CLUSTER_SETUP_RETRIES=3
echo "Flink Tarball directory $FLINK_TARBALL_DIR"
echo "Flink tarball filename $FLINK_TARBALL"
echo "Flink distribution directory name $FLINK_DIRNAME"
echo "End-to-end directory $END_TO_END_DIR"
docker --version
docker-compose --version
# make sure we stop our cluster at the end
function cluster_shutdown {
# don't call ourselves again for another signal interruption
trap "exit -1" INT
# don't call ourselves again for normal exit
trap "" EXIT
docker-compose -f $END_TO_END_DIR/test-scripts/docker-hadoop-secure-cluster/docker-compose.yml down
rm $FLINK_TARBALL_DIR/$FLINK_TARBALL
}
trap cluster_shutdown INT
trap cluster_shutdown EXIT
function start_hadoop_cluster() {
echo "Starting Hadoop cluster"
docker-compose -f $END_TO_END_DIR/test-scripts/docker-hadoop-secure-cluster/docker-compose.yml up -d
# wait for kerberos to be set up
start_time=$(date +%s)
until docker logs master 2>&1 | grep -q "Finished master initialization"; do
current_time=$(date +%s)
time_diff=$((current_time - start_time))
if [ $time_diff -ge $MAX_RETRY_SECONDS ]; then
return 1
else
echo "Waiting for hadoop cluster to come up. We have been trying for $time_diff seconds, retrying ..."
sleep 10
fi
done
# perform health checks
if ! { [ $(docker inspect -f '{{.State.Running}}' master 2>&1) = 'true' ] &&
[ $(docker inspect -f '{{.State.Running}}' slave1 2>&1) = 'true' ] &&
[ $(docker inspect -f '{{.State.Running}}' slave2 2>&1) = 'true' ] &&
[ $(docker inspect -f '{{.State.Running}}' kdc 2>&1) = 'true' ]; };
then
return 1
fi
return 0
}
mkdir -p $FLINK_TARBALL_DIR
tar czf $FLINK_TARBALL_DIR/$FLINK_TARBALL -C $(dirname $FLINK_DIR) .
echo "Building Hadoop Docker container"
until docker build --build-arg HADOOP_VERSION=2.8.4 \
-f $END_TO_END_DIR/test-scripts/docker-hadoop-secure-cluster/Dockerfile \
-t flink/docker-hadoop-secure-cluster:latest \
$END_TO_END_DIR/test-scripts/docker-hadoop-secure-cluster/;
do
# with all the downloading and ubuntu updating a lot of flakiness can happen, make sure
# we don't immediately fail
echo "Something went wrong while building the Docker image, retrying ..."
sleep 2
done
CLUSTER_STARTED=1
for (( i = 0; i < $CLUSTER_SETUP_RETRIES; i++ ))
do
if start_hadoop_cluster; then
echo "Cluster started successfully."
CLUSTER_STARTED=0
break #continue test, cluster set up succeeded
fi
echo "ERROR: Could not start hadoop cluster. Retrying..."
docker-compose -f $END_TO_END_DIR/test-scripts/docker-hadoop-secure-cluster/docker-compose.yml down
done
if [[ ${CLUSTER_STARTED} -ne 0 ]]; then
echo "ERROR: Could not start hadoop cluster. Aborting..."
exit 1
fi
docker cp $FLINK_TARBALL_DIR/$FLINK_TARBALL master:/home/hadoop-user/
# now, at least the container is ready
docker exec -it master bash -c "tar xzf /home/hadoop-user/$FLINK_TARBALL --directory /home/hadoop-user/"
# minimal Flink config, bebe
docker exec -it master bash -c "echo \"security.kerberos.login.keytab: /home/hadoop-user/hadoop-user.keytab\" > /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
docker exec -it master bash -c "echo \"security.kerberos.login.principal: hadoop-user\" >> /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
docker exec -it master bash -c "echo \"slot.request.timeout: 60000\" >> /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
docker exec -it master bash -c "echo \"containerized.heap-cutoff-min: 100\" >> /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
echo "Flink config:"
docker exec -it master bash -c "cat /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
# make the output path random, just in case it already exists, for example if we
# had cached docker containers
OUTPUT_PATH=hdfs:///user/hadoop-user/wc-out-$RANDOM
start_time=$(date +%s)
# it's important to run this with higher parallelism, otherwise we might risk that
# JM and TM are on the same YARN node and that we therefore don't test the keytab shipping
if docker exec -it master bash -c "export HADOOP_CLASSPATH=\`hadoop classpath\` && \
/home/hadoop-user/$FLINK_DIRNAME/bin/flink run -m yarn-cluster -yn 3 -ys 1 -ytm 1000 -yjm 1000 \
-p 3 /home/hadoop-user/$FLINK_DIRNAME/examples/streaming/WordCount.jar --output $OUTPUT_PATH";
then
docker exec -it master bash -c "kinit -kt /home/hadoop-user/hadoop-user.keytab hadoop-user"
docker exec -it master bash -c "hdfs dfs -ls $OUTPUT_PATH"
OUTPUT=$(docker exec -it master bash -c "hdfs dfs -cat $OUTPUT_PATH/*")
docker exec -it master bash -c "kdestroy"
echo "$OUTPUT"
else
echo "Running the job failed."
mkdir -p $TEST_DATA_DIR/logs
echo "Hadoop logs:"
docker cp master:/var/log/hadoop/* $TEST_DATA_DIR/logs/
for f in $TEST_DATA_DIR/logs/*; do
echo "$f:"
cat $f
done
echo "Docker logs:"
docker logs master
exit 1
fi
if [[ ! "$OUTPUT" =~ "consummation,1" ]]; then
echo "Output does not contain (consummation, 1) as required"
mkdir -p $TEST_DATA_DIR/logs
echo "Hadoop logs:"
docker cp master:/var/log/hadoop/* $TEST_DATA_DIR/logs/
for f in $TEST_DATA_DIR/logs/*; do
echo "$f:"
cat $f
done
echo "Docker logs:"
docker logs master
exit 1
fi
if [[ ! "$OUTPUT" =~ "of,14" ]]; then
echo "Output does not contain (of, 14) as required"
exit 1
fi
if [[ ! "$OUTPUT" =~ "calamity,1" ]]; then
echo "Output does not contain (calamity, 1) as required"
exit 1
fi
echo "Running Job without configured keytab, the exception you see below is expected"
docker exec -it master bash -c "echo \"\" > /home/hadoop-user/$FLINK_DIRNAME/conf/flink-conf.yaml"
# verify that it doesn't work if we don't configure a keytab
OUTPUT=$(docker exec -it master bash -c "export HADOOP_CLASSPATH=\`hadoop classpath\` && \
/home/hadoop-user/$FLINK_DIRNAME/bin/flink run \
-m yarn-cluster -yn 3 -ys 1 -ytm 1000 -yjm 1000 -p 3 \
/home/hadoop-user/$FLINK_DIRNAME/examples/streaming/WordCount.jar --output $OUTPUT_PATH")
echo "$OUTPUT"
if [[ ! "$OUTPUT" =~ "Hadoop security with Kerberos is enabled but the login user does not have Kerberos credentials" ]]; then
echo "Output does not contain the Kerberos error message as required"
exit 1
fi