blob: 21d09d09ac52eb3ecb8d2ad8079eb81509d20f86 [file] [log] [blame]
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Script which wraps running a test and redirects its output to a
# test log directory.
#
# If KUDU_COMPRESS_TEST_OUTPUT is non-empty, then the logs will be
# gzip-compressed while they are written.
#
# If KUDU_FLAKY_TEST_ATTEMPTS is set, and either the test being run matches one
# of the lines in the file KUDU_FLAKY_TEST_LIST or KUDU_RETRY_ALL_FAILED_TESTS
# is non-zero, then the test will be retried on failure up to the specified
# number of times. This can be used in the gerrit workflow to prevent annoying
# false -1s caused by tests that are known to be flaky in master.
#
# If KUDU_REPORT_TEST_RESULTS is non-zero, then tests are reported to the
# central test server.
#
# If KUDU_MEASURE_TEST_CPU_CONSUMPTION is non-zero, then tests will be wrapped
# in a 'perf stat' command which measures the CPU consumption on a periodic
# basis, writing the output into '<test>.<shard>.perf.txt' logs in the test log
# directory. This is handy in order to determine an appropriate value for the
# 'PROCESSORS' property passed to 'ADD_KUDU_TEST' in CMakeLists.txt.
# Path to the test executable or script to be run.
# May be relative or absolute.
TEST_PATH=$1
# Absolute path to the root source directory. This script is expected to live within it.
SOURCE_ROOT=$(cd $(dirname "$BASH_SOURCE")/.. ; pwd)
# Absolute path to the root build directory. The test path is expected to be within it.
BUILD_ROOT=$(cd $(dirname "$TEST_PATH")/.. ; pwd)
TEST_LOGDIR=$BUILD_ROOT/test-logs
mkdir -p $TEST_LOGDIR
TEST_DEBUGDIR=$BUILD_ROOT/test-debug
mkdir -p $TEST_DEBUGDIR
TEST_DIRNAME=$(cd $(dirname $TEST_PATH); pwd)
TEST_FILENAME=$(basename $TEST_PATH)
ABS_TEST_PATH=$TEST_DIRNAME/$TEST_FILENAME
shift
# Remove path and extension (if any).
# The "short" test name doesn't include the shard number.
SHORT_TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//')
# The full test name does include the shard number if the test is sharded.
if [ ${GTEST_TOTAL_SHARDS:-0} -gt 1 ]; then
TEST_NAME=${SHORT_TEST_NAME}.${GTEST_SHARD_INDEX:?}
else
TEST_NAME=${SHORT_TEST_NAME}
fi
# Determine whether the user has chosen to retry all failed tests, or whether
# the test is a known flaky by comparing against the user-specified list.
TEST_EXECUTION_ATTEMPTS=1
if [ -n "$KUDU_RETRY_ALL_FAILED_TESTS" ]; then
echo "Will retry all failed tests"
TEST_IS_RETRYABLE=1
elif [ -n "$KUDU_FLAKY_TEST_LIST" ]; then
if [ -f "$KUDU_FLAKY_TEST_LIST" ] && grep -q --count --line-regexp "$SHORT_TEST_NAME" "$KUDU_FLAKY_TEST_LIST"; then
TEST_IS_RETRYABLE=1
else
echo "Flaky test list file $KUDU_FLAKY_TEST_LIST missing"
fi
fi
if [ -n "$TEST_IS_RETRYABLE" ]; then
TEST_EXECUTION_ATTEMPTS=${KUDU_FLAKY_TEST_ATTEMPTS:-1}
echo $TEST_NAME is a retryable test. Will attempt running it
echo up to $TEST_EXECUTION_ATTEMPTS times.
fi
# We run each test in its own subdir to avoid core file related races.
TEST_WORKDIR=$BUILD_ROOT/test-work/$TEST_NAME
mkdir -p $TEST_WORKDIR
pushd $TEST_WORKDIR >/dev/null || exit 1
rm -f *
set -o pipefail
LOGFILE=$TEST_LOGDIR/$TEST_NAME.txt
XMLFILE=$TEST_LOGDIR/$TEST_NAME.xml
export GTEST_OUTPUT="xml:$XMLFILE" # Enable JUnit-compatible XML output.
# Remove both the compressed and uncompressed output, so the developer
# doesn't accidentally get confused and read output from a prior test
# run.
rm -f $LOGFILE $LOGFILE.gz
if [ -n "$KUDU_COMPRESS_TEST_OUTPUT" ] && [ "$KUDU_COMPRESS_TEST_OUTPUT" -ne 0 ] ; then
pipe_cmd=gzip
grep_cmd=$(which zgrep)
LOGFILE=${LOGFILE}.gz
else
pipe_cmd=cat
grep_cmd=$(which grep)
fi
# Set a 15-minute timeout for tests run via 'make test'.
# This keeps our jenkins builds from hanging in the case that there's
# a deadlock or anything.
#
# NOTE: this should be kept in sync with the default value of ARG_TIMEOUT
# in the definition of ADD_KUDU_TEST in the top-level CMakeLists.txt.
KUDU_TEST_TIMEOUT=${KUDU_TEST_TIMEOUT:-900}
# Allow for collecting core dumps.
KUDU_TEST_ULIMIT_CORE=${KUDU_TEST_ULIMIT_CORE:-0}
ulimit -c $KUDU_TEST_ULIMIT_CORE
# Run the actual test.
for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
# If the test fails, the test output may or may not be left behind,
# depending on whether the test cleaned up or exited immediately. Either
# way we need to clean it up. We do this by comparing the data directory
# contents before and after the test runs, and deleting anything new.
#
# The comm program requires that its two inputs be sorted.
TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
fi
# gtest won't overwrite old junit test files, resulting in a build failure
# even when retries are successful.
rm -f $XMLFILE
if [[ $OSTYPE =~ ^darwin ]]; then
#
# For builds on MacOS X 10.11, neither (g)addr2line nor atos translates
# address into line number. The atos utility is able to do that only
# if load address (-l <addr>) or reference to currently running process
# (-p <pid>) is given. This is so even for binaries linked with
# PIE disabled.
#
addr2line_filter=cat
else
addr2line_filter="$SOURCE_ROOT/build-support/stacktrace_addr2line.pl $ABS_TEST_PATH"
fi
echo "Running $TEST_NAME, redirecting output into $LOGFILE" \
"(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)"
test_command=$ABS_TEST_PATH
if [ -n "$KUDU_MEASURE_TEST_CPU_CONSUMPTION" ] && [ "$KUDU_MEASURE_TEST_CPU_CONSUMPTION" -ne 0 ] ; then
perf_file=$TEST_LOGDIR/$TEST_NAME.perf.txt
echo "Will wrap test in perf-stat, output to ${perf_file}..."
test_command="perf stat -I1000 -e task-clock -o $perf_file $test_command"
fi
$test_command "$@" --test_timeout_after $KUDU_TEST_TIMEOUT 2>&1 \
| $addr2line_filter \
| $pipe_cmd > $LOGFILE
STATUS=$?
# An empty gzipped file is corrupt because even a logically empty gzipped file
# should contain at least the gzip header. If we detect this, treat the test
# log output as a plain text file.
if [[ -f $LOGFILE && ! -s $LOGFILE && $LOGFILE =~ .gz$ ]]; then
RENAMED_LOGFILE=${LOGFILE%%.gz}
mv $LOGFILE $RENAMED_LOGFILE
LOGFILE=$RENAMED_LOGFILE
fi
# TSAN doesn't always exit with a non-zero exit code due to a bug:
# mutex errors don't get reported through the normal error reporting infrastructure.
# So we make sure to detect this and exit 1.
#
# Additionally, certain types of failures won't show up in the standard JUnit
# XML output from gtest. We assume that gtest knows better than us and our
# regexes in most cases, but for certain errors we delete the resulting xml
# file and let our own post-processing step regenerate it.
export GREP=$(which egrep)
if zgrep --silent "ThreadSanitizer|Leak check.*detected leaks" $LOGFILE ; then
echo ThreadSanitizer or leak check failures in $LOGFILE
STATUS=1
rm -f $XMLFILE
fi
if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
# Now delete any new test output.
TEST_TMPDIR_AFTER=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
DIFF=$(comm -13 <(echo "$TEST_TMPDIR_BEFORE") \
<(echo "$TEST_TMPDIR_AFTER"))
for DIR in $DIFF; do
# Multiple tests may be running concurrently. To avoid deleting the
# wrong directories, constrain to only directories beginning with the
# test name.
#
# This may delete old test directories belonging to this test, but
# that's not typically a concern when rerunning flaky tests.
if [[ $DIR =~ ^$TEST_TMPDIR/$TEST_NAME ]]; then
echo Deleting leftover flaky test directory "$DIR"
rm -Rf "$DIR"
fi
done
fi
if [ -n "$KUDU_REPORT_TEST_RESULTS" ] && [ "$KUDU_REPORT_TEST_RESULTS" -ne 0 ]; then
echo Reporting results
$SOURCE_ROOT/build-support/report-test.sh "$ABS_TEST_PATH" "$LOGFILE" "$STATUS"
fi
if [ "$STATUS" -eq "0" ]; then
break
elif [ "$ATTEMPT_NUMBER" -lt "$TEST_EXECUTION_ATTEMPTS" ]; then
echo Test failed attempt number $ATTEMPT_NUMBER
echo Will retry...
fi
done
# If the tests failed, we have a LeakSanitizer report, and XML reporting is
# configured, add a new test case result to the XML file for the leak report.
# Otherwise Jenkins won't show us that the tests had LSAN errors.
if [[ "$STATUS" -ne "0" ]] && $grep_cmd --silent "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then
echo Test had memory leaks. Editing XML
perl -p -i -e '
if (m#</testsuite>#) {
print "<testcase name=\"LeakSanitizer\" status=\"run\" classname=\"LSAN\">\n";
print " <failure message=\"LeakSanitizer failed\" type=\"\">\n";
print " See txt log file for details\n";
print " </failure>\n";
print "</testcase>\n";
}' $XMLFILE
fi
# Capture and compress core file and binary.
COREFILES=$(ls | grep ^core)
if [ -n "$COREFILES" ]; then
echo Found core dump. Saving executable and core files.
gzip < $ABS_TEST_PATH > "$TEST_DEBUGDIR/$TEST_NAME.gz" || exit $?
for COREFILE in $COREFILES; do
gzip < $COREFILE > "$TEST_DEBUGDIR/$TEST_NAME.$COREFILE.gz" || exit $?
done
# Pull in any .so files as well.
for LIB in $(ldd $ABS_TEST_PATH | grep $BUILD_ROOT | awk '{print $3}'); do
LIB_NAME=$(basename $LIB)
gzip < $LIB > "$TEST_DEBUGDIR/$LIB_NAME.gz" || exit $?
done
fi
popd >/dev/null
rm -Rf $TEST_WORKDIR
exit $STATUS