| #!/usr/bin/env bash |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| set -euo pipefail |
| trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR |
| |
| # run buildall.sh -help to see options |
| |
| ROOT=`dirname "$0"` |
| ROOT=`cd "$ROOT"; pwd` |
| |
| # Grab this *before* we source impala-config.sh to see if the caller has |
| # kerberized environment variables already or not. |
| NEEDS_RE_SOURCE_NOTE=1 |
| : ${MINIKDC_REALM=} |
| if [ ! -z "${MINIKDC_REALM}" ]; then |
| NEEDS_RE_SOURCE_NOTE=0 |
| fi |
| |
| export IMPALA_HOME=$ROOT |
| if ! . "$ROOT"/bin/impala-config.sh; then |
| echo "Bad configuration, aborting buildall." |
| exit 1 |
| fi |
| |
| # Defaults that are only changable via the commandline. |
| CLEAN_ACTION=1 |
| TESTDATA_ACTION=0 |
| TESTS_ACTION=1 |
| FORMAT_CLUSTER=0 |
| FORMAT_METASTORE=0 |
| FORMAT_SENTRY_POLICY_DB=0 |
| IMPALA_KERBERIZE=0 |
| SNAPSHOT_FILE= |
| METASTORE_SNAPSHOT_FILE= |
| MAKE_IMPALA_ARGS="" |
| CODE_COVERAGE=0 |
| BUILD_ASAN=0 |
| BUILD_FE_ONLY=0 |
| MAKE_CMD=make |
| LZO_CMAKE_ARGS= |
| |
| # Defaults that can be picked up from the environment, but are overridable through the |
| # commandline. |
| : ${EXPLORATION_STRATEGY:=core} |
| : ${CMAKE_BUILD_TYPE:=Debug} |
| |
| # parse command line options |
| # TODO: We have to change this to use getopts, or something more sensible. |
| while [ -n "$*" ] |
| do |
| case "$1" in |
| -noclean) |
| CLEAN_ACTION=0 |
| ;; |
| -testdata) |
| TESTDATA_ACTION=1 |
| ;; |
| -skiptests) |
| TESTS_ACTION=0 |
| ;; |
| -build_shared_libs|-so) |
| MAKE_IMPALA_ARGS="${MAKE_IMPALA_ARGS} -build_shared_libs" |
| ;; |
| -notests) |
| TESTS_ACTION=0 |
| MAKE_IMPALA_ARGS="${MAKE_IMPALA_ARGS} -notests" |
| ;; |
| -format) |
| FORMAT_CLUSTER=1 |
| FORMAT_METASTORE=1 |
| FORMAT_SENTRY_POLICY_DB=1 |
| ;; |
| -format_cluster) |
| FORMAT_CLUSTER=1 |
| ;; |
| -format_metastore) |
| FORMAT_METASTORE=1 |
| ;; |
| -format_sentry_policy_db) |
| FORMAT_SENTRY_POLICY_DB=1 |
| ;; |
| -release) |
| CMAKE_BUILD_TYPE=Release |
| ;; |
| -codecoverage) |
| CODE_COVERAGE=1 |
| ;; |
| -asan) |
| BUILD_ASAN=1 |
| ;; |
| -testpairwise) |
| EXPLORATION_STRATEGY=pairwise |
| ;; |
| -testexhaustive) |
| EXPLORATION_STRATEGY=exhaustive |
| ;; |
| -snapshot_file) |
| SNAPSHOT_FILE=${2-} |
| if [ ! -f $SNAPSHOT_FILE ]; then |
| echo "-snapshot_file does not exist: $SNAPSHOT_FILE" |
| exit 1; |
| fi |
| TESTDATA_ACTION=1 |
| # Get the full path. |
| SNAPSHOT_FILE=$(readlink -f $SNAPSHOT_FILE) |
| shift; |
| ;; |
| -metastore_snapshot_file) |
| METASTORE_SNAPSHOT_FILE=${2-} |
| if [ ! -f $METASTORE_SNAPSHOT_FILE ]; then |
| echo "-metastore_snapshot_file does not exist: $METASTORE_SNAPSHOT_FILE" |
| exit 1; |
| fi |
| TESTDATA_ACTION=1 |
| # Get the full path. |
| METASTORE_SNAPSHOT_FILE=$(readlink -f $METASTORE_SNAPSHOT_FILE) |
| shift; |
| ;; |
| -k|-kerberize|-kerberos|-kerb) |
| # Export to the environment for all child process tools |
| export IMPALA_KERBERIZE=1 |
| set +u |
| . ${MINIKDC_ENV} |
| set -u |
| ;; |
| -v|-debug) |
| echo "Running in Debug mode" |
| set -x |
| ;; |
| -fe_only) |
| BUILD_FE_ONLY=1 |
| ;; |
| -ninja) |
| MAKE_IMPALA_ARGS+=" -ninja" |
| LZO_CMAKE_ARGS+=" -GNinja" |
| MAKE_CMD=ninja |
| ;; |
| -help|*) |
| echo "buildall.sh - Builds Impala and runs all tests." |
| echo "[-noclean] : Omits cleaning all packages before building. Will not kill"\ |
| "running Hadoop services unless any -format* is True" |
| echo "[-format] : Format the minicluster, metastore db, and sentry policy db"\ |
| " [Default: False]" |
| echo "[-format_cluster] : Format the minicluster [Default: False]" |
| echo "[-format_metastore] : Format the metastore db [Default: False]" |
| echo "[-format_sentry_policy_db] : Format the Sentry policy db [Default: False]" |
| echo "[-release] : Release build [Default: debug]" |
| echo "[-codecoverage] : Build with code coverage [Default: False]" |
| echo "[-asan] : Address sanitizer build [Default: False]" |
| echo "[-skiptests] : Skips execution of all tests" |
| echo "[-notests] : Skips building and execution of all tests" |
| echo "[-testpairwise] : Sun tests in 'pairwise' mode (increases"\ |
| "test execution time)" |
| echo "[-testexhaustive] : Run tests in 'exhaustive' mode (significantly increases"\ |
| "test execution time)" |
| echo "[-testdata] : Loads test data. Implied as true if -snapshot_file is "\ |
| "specified. If -snapshot_file is not specified, data will be regenerated." |
| echo "[-snapshot_file <file name>] : Load test data from a snapshot file" |
| echo "[-metastore_snapshot_file <file_name>]: Load the hive metastore snapshot" |
| echo "[-so|-build_shared_libs] : Dynamically link executables (default is static)" |
| echo "[-kerberize] : Enable kerberos on the cluster" |
| echo "[-fe_only] : Build just the frontend" |
| echo "----------------------------------------------------------------------------- |
| Examples of common tasks: |
| |
| # Build and run all tests |
| ./buildall.sh |
| |
| # Build and skip tests |
| ./buildall.sh -skiptests |
| |
| # Incrementally rebuild and skip tests. Keeps existing Hadoop services running. |
| ./buildall.sh -skiptests -noclean |
| |
| # Build, load a snapshot file, run tests |
| ./buildall.sh -snapshot_file <file> |
| |
| # Build, load the hive metastore and the hdfs snapshot, run tests |
| ./buildall.sh -snapshot_file <file> -metastore_snapshot_file <file> |
| |
| # Build, generate, and incrementally load test data without formatting the mini-cluster |
| # (reuses existing data in HDFS if it exists). Can be faster than loading from a |
| # snapshot. |
| ./buildall.sh -testdata |
| |
| # Build, format mini-cluster and metastore, load all test data, run tests |
| ./buildall.sh -testdata -format" |
| exit 1 |
| ;; |
| esac |
| shift; |
| done |
| |
| # Adjust CMAKE_BUILD_TYPE for ASAN and code coverage, if necessary. |
| if [[ ${CODE_COVERAGE} -eq 1 ]]; then |
| case ${CMAKE_BUILD_TYPE} in |
| Debug) |
| CMAKE_BUILD_TYPE=CODE_COVERAGE_DEBUG |
| ;; |
| Release) |
| CMAKE_BUILD_TYPE=CODE_COVERAGE_RELEASE |
| ;; |
| esac |
| fi |
| if [[ ${BUILD_ASAN} -eq 1 ]]; then |
| # The next check also catches cases where CODE_COVERAGE=1, which is not supported |
| # together with BUILD_ASAN=1. |
| if [[ "${CMAKE_BUILD_TYPE}" != "Debug" ]]; then |
| echo "Address sanitizer build not supported for build type: ${CMAKE_BUILD_TYPE}" |
| exit 1 |
| fi |
| CMAKE_BUILD_TYPE=ADDRESS_SANITIZER |
| fi |
| |
| # If we aren't kerberized then we certainly don't need to talk about |
| # re-sourcing impala-config. |
| if [ ${IMPALA_KERBERIZE} -eq 0 ]; then |
| NEEDS_RE_SOURCE_NOTE=0 |
| fi |
| |
| # Loading data on a filesystem other than fs.defaultFS is not supported. |
| if [[ -z $METASTORE_SNAPSHOT_FILE && "${TARGET_FILESYSTEM}" != "hdfs" && |
| $TESTDATA_ACTION -eq 1 ]]; then |
| echo "The metastore snapshot is required for loading data into ${TARGET_FILESYSTEM}" |
| echo "Use the -metastore_snapshot_file command line paramater." |
| exit 1 |
| fi |
| |
| # option to clean everything first |
| if [ $CLEAN_ACTION -eq 1 ]; then |
| $IMPALA_HOME/bin/clean.sh |
| fi |
| |
| # Populate necessary thirdparty components unless it's set to be skipped. |
| if [ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]; then |
| echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping download of Python dependencies." |
| echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap." |
| else |
| echo "Downloading Python dependencies" |
| # Download all the Python dependencies we need before doing anything |
| # of substance. Does not re-download anything that is already present. |
| if ! $IMPALA_HOME/infra/python/deps/download_requirements; then |
| echo "Warning: Unable to download Python requirements." |
| echo "Warning: bootstrap_virtualenv or other Python-based tooling may fail." |
| else |
| echo "Finished downloading Python dependencies" |
| fi |
| |
| echo "Downloading and extracting toolchain dependencies." |
| $IMPALA_HOME/bin/bootstrap_toolchain.py |
| echo "Toolchain bootstrap complete." |
| fi |
| |
| MAKE_IMPALA_ARGS="${MAKE_IMPALA_ARGS} -build_type=${CMAKE_BUILD_TYPE}" |
| |
| if [ $BUILD_FE_ONLY -eq 1 ]; then |
| $IMPALA_HOME/bin/make_impala.sh ${MAKE_IMPALA_ARGS} -cmake_only |
| ${MAKE_CMD} fe |
| exit 0 |
| fi |
| |
| if [ -e $HADOOP_LZO/build/native/Linux-*-*/lib/libgplcompression.so ] |
| then |
| cp $HADOOP_LZO/build/native/Linux-*-*/lib/libgplcompression.* $HADOOP_HOME/lib/native |
| else |
| echo "No hadoop-lzo found" |
| fi |
| |
| # Stop any running Impala services. |
| ${IMPALA_HOME}/bin/start-impala-cluster.py --kill --force |
| |
| if [[ $CLEAN_ACTION -eq 1 || $FORMAT_METASTORE -eq 1 || $FORMAT_CLUSTER -eq 1 || |
| $FORMAT_SENTRY_POLICY_DB -eq 1 || -n $METASTORE_SNAPSHOT_FILE ]] |
| then |
| # Kill any processes that may be accessing postgres metastore. To be safe, this is done |
| # before we make any changes to the config files. |
| set +e |
| ${IMPALA_HOME}/testdata/bin/kill-all.sh |
| set -e |
| fi |
| |
| CREATE_TEST_CONFIG_ARGS="" |
| if [[ $FORMAT_SENTRY_POLICY_DB -eq 1 ]]; then |
| CREATE_TEST_CONFIG_ARGS+=" -create_sentry_policy_db" |
| fi |
| |
| if [[ $FORMAT_METASTORE -eq 1 && -z $METASTORE_SNAPSHOT_FILE ]]; then |
| CREATE_TEST_CONFIG_ARGS+=" -create_metastore" |
| fi |
| |
| # Generate the Hadoop configs needed by Impala |
| ${IMPALA_HOME}/bin/create-test-configuration.sh ${CREATE_TEST_CONFIG_ARGS} |
| |
| # If a metastore snapshot exists, load it. |
| if [ $METASTORE_SNAPSHOT_FILE ]; then |
| echo "Loading metastore snapshot" |
| ${IMPALA_HOME}/testdata/bin/load-metastore-snapshot.sh $METASTORE_SNAPSHOT_FILE |
| fi |
| |
| # build common and backend |
| echo "Calling make_impala.sh ${MAKE_IMPALA_ARGS}" |
| $IMPALA_HOME/bin/make_impala.sh ${MAKE_IMPALA_ARGS} |
| |
| if [ -e $IMPALA_LZO ] |
| then |
| pushd $IMPALA_LZO |
| LZO_CMAKE_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=./cmake_modules/toolchain.cmake" |
| rm -f CMakeCache.txt |
| cmake ${LZO_CMAKE_ARGS} |
| ${MAKE_CMD} |
| popd |
| fi |
| |
| # build the external data source API |
| pushd ${IMPALA_HOME}/ext-data-source |
| ${IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests |
| popd |
| |
| # build frontend and copy dependencies |
| pushd ${IMPALA_FE_DIR} |
| ${IMPALA_HOME}/bin/mvn-quiet.sh package -DskipTests |
| popd |
| |
| # Build the shell tarball |
| echo "Creating shell tarball" |
| ${IMPALA_HOME}/shell/make_shell_tarball.sh |
| |
| if [ $FORMAT_CLUSTER -eq 1 ]; then |
| $IMPALA_HOME/testdata/bin/run-all.sh -format |
| elif [ $TESTDATA_ACTION -eq 1 ] || [ $TESTS_ACTION -eq 1 ]; then |
| $IMPALA_HOME/testdata/bin/run-all.sh |
| fi |
| |
| # |
| # KERBEROS TODO |
| # There is still work to be done for kerberos. |
| # - The hive metastore needs to be kerberized |
| # - If the user principal is "impala/localhost", MR jobs complain that user |
| # "impala" is not user ${USER}. But if the principal is ${USER}/localhost, |
| # the impala daemons change it to impala/localhost in |
| # KerberosAuthProvider::RunKinit() - and there may be other difficulties |
| # down the road with getting all the permissions correct. |
| # - Futher Beeline -> HiveServer2 -> HBase|MapReduce combo issues |
| # - Getting farther down the testing path, it's likely more issues will turn up |
| # - Further extensive testing |
| # |
| if [ ${IMPALA_KERBERIZE} -eq 1 ]; then |
| if [ ${TESTDATA_ACTION} -eq 1 -o ${TESTS_ACTION} -eq 1 ]; then |
| echo "At this time we only support cluster creation and impala daemon" |
| echo "bringup in kerberized mode. Data won't be loaded, and tests" |
| echo "won't be run. The impala daemons will be started." |
| TESTDATA_ACTION=0 |
| TESTS_ACTION=0 |
| ${IMPALA_HOME}/bin/start-impala-cluster.py |
| fi |
| fi |
| # END KERBEROS TODO |
| |
| # |
| # Don't try to run tests without data! |
| # |
| TESTWH_ITEMS=`(hadoop fs -ls ${FILESYSTEM_PREFIX}/test-warehouse 2> /dev/null || true) | \ |
| (grep test-warehouse || true) | wc -l` |
| if [ ${TESTS_ACTION} -eq 1 -a \ |
| ${TESTDATA_ACTION} -eq 0 -a \ |
| ${TESTWH_ITEMS} -lt 5 ]; then |
| set +x |
| echo "You just asked buildall to run tests, but did not supply any data." |
| echo "Running tests without data doesn't work. Exiting now." |
| exit 1 |
| fi |
| |
| if [ $TESTDATA_ACTION -eq 1 ]; then |
| # Create testdata. |
| $IMPALA_HOME/bin/create_testdata.sh |
| cd $ROOT |
| # We have 4 cases: |
| # - test-warehouse and metastore snapshots exists. |
| # - Only the test-warehouse snapshot exists. |
| # - Only the metastore snapshot exists. |
| # - Neither of them exist. |
| CREATE_LOAD_DATA_ARGS="" |
| if [[ $SNAPSHOT_FILE && $METASTORE_SNAPSHOT_FILE ]]; then |
| CREATE_LOAD_DATA_ARGS="-snapshot_file ${SNAPSHOT_FILE} -skip_metadata_load" |
| elif [[ $SNAPSHOT_FILE && -z $METASTORE_SNAPSHOT_FILE ]]; then |
| CREATE_LOAD_DATA_ARGS="-snapshot_file ${SNAPSHOT_FILE}" |
| elif [[ -z $SNAPSHOT_FILE && $METASTORE_SNAPSHOT_FILE ]]; then |
| CREATE_LOAD_DATA_ARGS="-skip_metadata_load -skip_snapshot_load" |
| fi |
| ${IMPALA_HOME}/testdata/bin/create-load-data.sh ${CREATE_LOAD_DATA_ARGS} <<< Y |
| fi |
| |
| if [ $TESTS_ACTION -eq 1 ]; then |
| if [ $CODE_COVERAGE -eq 0 ]; then |
| ${IMPALA_HOME}/bin/run-all-tests.sh -e $EXPLORATION_STRATEGY |
| else |
| ${IMPALA_HOME}/bin/run-all-tests.sh -e $EXPLORATION_STRATEGY -c |
| fi |
| fi |
| |
| # Generate list of files for Cscope to index |
| $IMPALA_HOME/bin/gen-cscope.sh |
| |
| if [ ${NEEDS_RE_SOURCE_NOTE} -eq 1 ]; then |
| echo |
| echo "You have just successfully created a kerberized cluster." |
| echo "Congratulations! Communication with this cluster requires" |
| echo "the setting of certain environment variables. These" |
| echo "environment variables weren't available before the cluster" |
| echo "was created. To pick them up, please source impala-config.sh:" |
| echo |
| echo " . ${IMPALA_HOME}/bin/impala-config.sh" |
| echo |
| fi |