| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Source this file from the $IMPALA_HOME directory to |
| # setup your environment. If $IMPALA_HOME is undefined |
| # this script will set it to the current working directory. |
| # |
| # Some config variables can be overridden. All overridable variables can be overridden |
| # by impala-config-branch.sh, which in turn can be by impala-config-local.sh. Some config |
| # variables in the second part of this file (e.g. locations of dependencies, secret keys) |
| # can be also overridden by setting environment variables before sourcing this file. We |
| # don't support this for variables that change between branches and versions, e.g. |
| # version numbers because it creates a "sticky config variable" problem where an old |
| # value stays in effect when switching between branches or rebasing until the developer |
| # opens a new shell. We also do not support overriding of some variables that are |
| # computed based on the values of other variables. |
| # |
| # This file must be kept compatible with bash options "set -euo pipefail". Those options |
| # will be set by other scripts before sourcing this file. Those options are not set in |
| # this script because scripts outside this repository may need to be updated and that |
| # is not practical at this time. |
| |
| if ! [[ "'$IMPALA_HOME'" =~ [[:blank:]] ]]; then |
| if [ -z "$IMPALA_HOME" ]; then |
| if [[ ! -z "$ZSH_NAME" ]]; then |
| export IMPALA_HOME=$(dirname "$(cd $(dirname ${(%):-%x}) >/dev/null && pwd)") |
| else |
| export IMPALA_HOME=$(dirname "$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null && pwd)") |
| fi |
| fi |
| fi |
| |
| if [[ "'$IMPALA_HOME'" =~ [[:blank:]] ]]; then |
| echo "IMPALA_HOME cannot have spaces in the path" |
| exit 1 |
| fi |
| |
| export IMPALA_TOOLCHAIN=${IMPALA_TOOLCHAIN-"$IMPALA_HOME/toolchain"} |
| if [ -z "$IMPALA_TOOLCHAIN" ]; then |
| echo "IMPALA_TOOLCHAIN must be specified. Please set it to a valid directory or"\ |
| "leave it unset." |
| return 1 |
| fi |
| |
| ####################################################################################### |
| # Variables that can be overridden by impala-config-*.sh but not by environment vars. # |
| # All component versions and other variables that get updated periodically or between # |
| # branches go here to avoid the "sticky variable" problem (IMPALA-4653) where the # |
| # variable from a previously-sourced impala-config.sh overrides the new value. # |
| ####################################################################################### |
| |
| # The current Impala version that will be embedded in the Impala binary. This is |
| # also used to find the Impala frontend jar files, so the version must match |
| # the version in our Maven pom.xml files. This is validated via |
| # bin/validate-java-pom-version.sh during the build. |
| # WARNING: If changing this value, also run these commands: |
| # cd ${IMPALA_HOME}/java |
| # mvn versions:set -DnewVersion=YOUR_NEW_VERSION |
| export IMPALA_VERSION=4.5.0-SNAPSHOT |
| |
| # Whether to build the backend on Avro C++ library or C. |
| # This is added temporarily to help transitioning from Avro C to C++ library. |
| export USE_AVRO_CPP=${USE_AVRO_CPP:=false} |
| |
| # The unique build id of the toolchain to use if bootstrapping. This is generated by the |
| # native-toolchain build when publishing its build artifacts. This should be changed when |
| # moving to a different build of the toolchain, e.g. when a version is bumped or a |
| # compile option is changed. The build id can be found in the output of the toolchain |
| # build jobs, it is constructed from the build number and toolchain git hash prefix. |
| export IMPALA_TOOLCHAIN_BUILD_ID_AARCH64=56-810d0f4757 |
| export IMPALA_TOOLCHAIN_BUILD_ID_X86_64=486-810d0f4757 |
| export IMPALA_TOOLCHAIN_REPO=\ |
| ${IMPALA_TOOLCHAIN_REPO:-https://github.com/cloudera/native-toolchain.git} |
| export IMPALA_TOOLCHAIN_BRANCH=${IMPALA_TOOLCHAIN_BRANCH:-master} |
| export IMPALA_TOOLCHAIN_COMMIT_HASH=\ |
| ${IMPALA_TOOLCHAIN_COMMIT_HASH-810d0f47572d9ac4036c34eb2ad6342d51d510cd} |
| # Compare the build ref in build IDs by removing everything up-to-and-including the |
| # first hyphen. |
| if [ "${IMPALA_TOOLCHAIN_BUILD_ID_AARCH64#*-}" \ |
| != "${IMPALA_TOOLCHAIN_BUILD_ID_X86_64#*-}" ]; then |
| echo "IMPALA_TOOLCHAIN_BUILD_ID_AARCH64 and IMPALA_TOOLCHAIN_BUILD_ID_X86_64 must" \ |
| "come from the same commit hash." |
| exit 1 |
| fi |
| |
| export ARCH_NAME=$(uname -p) |
| |
| # Versions of toolchain dependencies. |
| # ----------------------------------- |
| if $USE_AVRO_CPP; then |
| export IMPALA_AVRO_VERSION=1.11.1-p1 |
| else |
| export IMPALA_AVRO_VERSION=1.7.4-p5 |
| fi |
| unset IMPALA_AVRO_URL |
| export IMPALA_BINUTILS_VERSION=2.35.1 |
| unset IMPALA_BINUTILS_URL |
| export IMPALA_BOOST_VERSION=1.74.0-p1 |
| unset IMPALA_BOOST_URL |
| export IMPALA_BREAKPAD_VERSION=e09741c609dcd5f5274d40182c5e2cc9a002d5ba-p2 |
| unset IMPALA_BREAKPAD_URL |
| export IMPALA_BZIP2_VERSION=1.0.8-p2 |
| unset IMPALA_BZIP2_URL |
| export IMPALA_CCTZ_VERSION=2.2 |
| unset IMPALA_CCTZ_URL |
| export IMPALA_CMAKE_VERSION=3.22.2 |
| unset IMPALA_CMAKE_URL |
| export IMPALA_CRCUTIL_VERSION=2903870057d2f1f109b245650be29e856dc8b646 |
| unset IMPALA_CRCUTIL_URL |
| export IMPALA_CURL_VERSION=8.10.1 |
| unset IMPALA_CURL_URL |
| export IMPALA_CYRUS_SASL_VERSION=2.1.23 |
| unset IMPALA_CYRUS_SASL_URL |
| export IMPALA_FLATBUFFERS_VERSION=1.9.0-p1 |
| unset IMPALA_FLATBUFFERS_URL |
| export IMPALA_GCC_VERSION=10.4.0 |
| unset IMPALA_GCC_URL |
| export IMPALA_GDB_VERSION=12.1 |
| unset IMPALA_GDB_URL |
| export IMPALA_GFLAGS_VERSION=2.2.0-p2 |
| unset IMPALA_GFLAGS_URL |
| export IMPALA_GLOG_VERSION=0.3.5-p3 |
| unset IMPALA_GLOG_URL |
| export IMPALA_GPERFTOOLS_VERSION=2.10 |
| unset IMPALA_GPERFTOOLS_URL |
| export IMPALA_GTEST_VERSION=1.14.0 |
| unset IMPALA_GTEST_URL |
| export IMPALA_JWT_CPP_VERSION=0.5.0 |
| unset IMPALA_JWT_CPP_URL |
| export IMPALA_LIBEV_VERSION=4.20-p1 |
| unset IMPALA_LIBEV_URL |
| export IMPALA_LIBUNWIND_VERSION=1.7.2-p1 |
| unset IMPALA_LIBUNWIND_URL |
| export IMPALA_LLVM_VERSION=5.0.1-p8 |
| unset IMPALA_LLVM_URL |
| export IMPALA_LLVM_ASAN_VERSION=5.0.1-p8 |
| unset IMPALA_LLVM_ASAN_URL |
| |
| # To limit maximum memory available for the mini-cluster and CDH cluster, add the |
| # following in $IMPALA_HOME/bin/impala-config-local.sh |
| # export IMPALA_CLUSTER_MAX_MEM_GB=<value> |
| |
| # LLVM stores some files in subdirectories that are named after what |
| # version it thinks it is. We might think it is 5.0.1-p1, based on a |
| # patch we have applied, but LLVM thinks its version is 5.0.1. |
| export IMPALA_LLVM_UBSAN_BASE_VERSION=5.0.1 |
| |
| # Debug builds should use the release+asserts build to get additional coverage. |
| # Don't use the LLVM debug build because the binaries are too large to distribute. |
| export IMPALA_LLVM_DEBUG_VERSION=5.0.1-asserts-p8 |
| unset IMPALA_LLVM_DEBUG_URL |
| export IMPALA_LZ4_VERSION=1.9.3 |
| unset IMPALA_LZ4_URL |
| export IMPALA_ZSTD_VERSION=1.5.2 |
| unset IMPALA_ZSTD_URL |
| export IMPALA_OPENLDAP_VERSION=2.4.47 |
| unset IMPALA_OPENLDAP_URL |
| export IMPALA_ORC_VERSION=1.7.9-p10 |
| unset IMPALA_ORC_URL |
| export IMPALA_PROTOBUF_VERSION=3.14.0 |
| unset IMPALA_PROTOBUF_URL |
| export IMPALA_PROTOBUF_CLANG_VERSION=3.14.0-clangcompat-p2 |
| unset IMPALA_PROTOBUF_CLANG_URL |
| export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.6 |
| unset IMPALA_POSTGRES_JDBC_DRIVER_URL |
| export IMPALA_PYTHON_VERSION=2.7.16 |
| unset IMPALA_PYTHON_URL |
| export IMPALA_PYTHON3_VERSION=3.8.18 |
| export IMPALA_RAPIDJSON_VERSION=1.1.0 |
| unset IMPALA_RAPIDJSON_URL |
| export IMPALA_RE2_VERSION=2023-03-01 |
| unset IMPALA_RE2_URL |
| export IMPALA_SNAPPY_VERSION=1.1.8 |
| unset IMPALA_SNAPPY_URL |
| export IMPALA_SQUEASEL_VERSION=3.3 |
| unset IMPALA_SQUEASEL_URL |
| # TPC utilities used for test/benchmark data generation. |
| export IMPALA_TPC_DS_VERSION=2.1.0-p1 |
| unset IMPALA_TPC_DS_URL |
| export IMPALA_TPC_H_VERSION=2.17.0 |
| unset IMPALA_TPC_H_URL |
| export IMPALA_ZLIB_VERSION=1.2.13 |
| unset IMPALA_ZLIB_URL |
| export IMPALA_CLOUDFLAREZLIB_VERSION=9e601a3f37 |
| unset IMPALA_CLOUDFLAREZLIB_URL |
| export IMPALA_CALLONCEHACK_VERSION=1.0.0 |
| unset IMPALA_CALLONCEHACK_URL |
| if [[ $ARCH_NAME == 'aarch64' ]]; then |
| export IMPALA_HADOOP_CLIENT_VERSION=3.3.6 |
| unset IMPALA_HADOOP_CLIENT_URL |
| fi |
| export IMPALA_MOLD_VERSION=2.4.1 |
| unset IMPALA_MOLD_URL |
| |
| # Impala JDBC driver for testing. |
| export IMPALA_SIMBA_JDBC_DRIVER_VERSION=42-2.6.32.1041 |
| |
| # Find system python versions for testing |
| export IMPALA_SYSTEM_PYTHON2="${IMPALA_SYSTEM_PYTHON2_OVERRIDE-$(command -v python2)}" |
| export IMPALA_SYSTEM_PYTHON3="${IMPALA_SYSTEM_PYTHON3_OVERRIDE-$(command -v python3)}" |
| |
| # Additional Python versions to use when building the impala-shell prebuilt tarball |
| # via make_shell_tarball.sh. That tarball includes precompiled packages, so it can be |
| # used without additional system dependencies needed for pip install. |
| # export IMPALA_EXTRA_PACKAGE_PYTHONS=python3.6;python3.10 |
| |
| if [[ $OSTYPE == "darwin"* ]]; then |
| IMPALA_CYRUS_SASL_VERSION=2.1.26 |
| unset IMPALA_CYRUS_SASL_URL |
| IMPALA_GPERFTOOLS_VERSION=2.3 |
| unset IMPALA_GPERFTOOLS_URL |
| fi |
| |
| : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com} |
| export IMPALA_TOOLCHAIN_HOST |
| |
| export CDP_BUILD_NUMBER=49623641 |
| export CDP_MAVEN_REPOSITORY=\ |
| "https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven" |
| export CDP_AVRO_JAVA_VERSION=1.8.2.7.3.0.0-128 |
| export CDP_HADOOP_VERSION=3.1.1.7.3.0.0-128 |
| export CDP_HBASE_VERSION=2.4.17.7.3.0.0-128 |
| export CDP_HIVE_VERSION=3.1.3000.7.3.0.0-128 |
| export CDP_ICEBERG_VERSION=1.3.1.7.3.0.0-128 |
| export CDP_KNOX_VERSION=2.0.0.7.3.0.0-128 |
| export CDP_OZONE_VERSION=1.3.0.7.3.0.0-128 |
| export CDP_PARQUET_VERSION=1.12.3.7.3.0.0-128 |
| export CDP_RANGER_VERSION=2.4.0.7.3.0.0-128 |
| export CDP_TEZ_VERSION=0.9.1.7.3.0.0-128 |
| |
| # Ref: https://infra.apache.org/release-download-pages.html#closer |
| : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"} |
| export APACHE_MIRROR |
| export APACHE_AVRO_JAVA_VERSION=1.8.2 |
| export APACHE_HADOOP_VERSION=3.3.6 |
| export APACHE_HBASE_VERSION=2.6.0 |
| export APACHE_ICEBERG_VERSION=1.4.3 |
| export APACHE_KNOX_VERSION=1.6.1 |
| export APACHE_PARQUET_VERSION=1.12.3 |
| export APACHE_RANGER_VERSION=2.4.0 |
| export APACHE_TEZ_VERSION=0.10.2 |
| export APACHE_HIVE_VERSION=3.1.3 |
| export APACHE_HIVE_STORAGE_API_VERSION=2.7.0 |
| export APACHE_OZONE_VERSION=1.4.0 |
| |
| # Java dependencies that are not also runtime components. Declaring versions here allows |
| # other branches to override them in impala-config-branch.sh for cleaner patches. |
| export IMPALA_BOUNCY_CASTLE_VERSION=1.78 |
| export IMPALA_COMMONS_IO_VERSION=2.6 |
| export IMPALA_COS_VERSION=3.1.0-8.0.8 |
| export IMPALA_DERBY_VERSION=10.14.2.0 |
| export IMPALA_GUAVA_VERSION=32.0.1-jre |
| export IMPALA_HUDI_VERSION=0.5.0-incubating |
| export IMPALA_HTTP_CORE_VERSION=4.4.14 |
| export IMPALA_JACKSON_VERSION=2.15.3 |
| export IMPALA_JACKSON_DATABIND_VERSION=2.15.3 |
| export IMPALA_JSON_SMART_VERSION=2.4.11 |
| export IMPALA_JUNIT_VERSION=4.12 |
| export IMPALA_KITE_VERSION=1.1.0 |
| export IMPALA_LOG4J2_VERSION=2.18.0 |
| export IMPALA_ORC_JAVA_VERSION=1.7.6 |
| export IMPALA_PAC4J_VERSION=4.5.5 |
| export IMPALA_RELOAD4j_VERSION=1.2.22 |
| export IMPALA_SLF4J_VERSION=2.0.3 |
| export IMPALA_SPRINGFRAMEWORK_VERSION=5.3.37 |
| export IMPALA_XMLSEC_VERSION=2.2.6 |
| export IMPALA_OBS_VERSION=3.1.1-hw-42 |
| export IMPALA_DBCP2_VERSION=2.9.0 |
| export IMPALA_DROPWIZARD_METRICS_VERSION=4.2.26 |
| export IMPALA_AIRCOMPRESSOR_VERSION=0.27 |
| |
| # When Impala is building docker images on Redhat-based distributions, |
| # it is useful to be able to customize the base image. Some users will |
| # want to use open source / free distributions like Centos/Rocky/Alma/etc. |
| # Some users will want to produce images on top of official Redhat UBI |
| # images (which have certain guarantees about maintenance, CVEs, etc). |
| # These environment variables control the base images. They default to |
| # free distributions, but Redhat UBI images are known to work. |
| export IMPALA_REDHAT7_DOCKER_BASE=${IMPALA_REDHAT7_DOCKER_BASE:-"centos:centos7.9.2009"} |
| export IMPALA_REDHAT8_DOCKER_BASE=${IMPALA_REDHAT8_DOCKER_BASE:-"rockylinux:8.5"} |
| export IMPALA_REDHAT9_DOCKER_BASE=${IMPALA_REDHAT9_DOCKER_BASE:-"rockylinux:9.2"} |
| |
| # Selects the version of Java to use when start-impala-cluster.py starts with container |
| # images (created via e.g. 'make docker_debug_java11_images'). The Java version used in |
| # these images is independent of the Java version used to compile Impala. |
| # Accepts 8, 11, 17. |
| export IMPALA_DOCKER_JAVA=${IMPALA_DOCKER_JAVA:-"8"} |
| if [ "${IMPALA_DOCKER_USE_JAVA11:-}" = "true" ]; then |
| export IMPALA_DOCKER_JAVA=11 |
| fi |
| |
| # There are multiple compatible implementations of zlib. Cloudflare Zlib is an |
| # implementation with optimizations to use platform-specific CPU features that are not |
| # in the standard Zlib implementation. When set to true, this builds and links against |
| # Cloudflare Zlib. When false, the build uses the regular Madler Zlib. This defaults |
| # to true due to the large performance benefits. |
| export IMPALA_USE_CLOUDFLARE_ZLIB=${IMPALA_USE_CLOUDFLARE_ZLIB:-"true"} |
| |
| # When IMPALA_(CDP_COMPONENT)_URL are overridden, they may contain '$(platform_label)' |
| # which will be substituted for the CDP platform label in bootstrap_toolchain.py |
| unset IMPALA_HADOOP_URL |
| unset IMPALA_HBASE_URL |
| unset IMPALA_HIVE_URL |
| unset IMPALA_OZONE_URL |
| unset IMPALA_KUDU_URL |
| unset IMPALA_KUDU_VERSION |
| |
| export IMPALA_KERBERIZE=false |
| |
| unset IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY |
| unset IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED |
| |
| # Source the branch and local config override files here to override any |
| # variables above or any variables below that allow overriding via environment |
| # variable. |
| . "$IMPALA_HOME/bin/impala-config-branch.sh" |
| if [ -f "$IMPALA_HOME/bin/impala-config-local.sh" ]; then |
| . "$IMPALA_HOME/bin/impala-config-local.sh" |
| fi |
| |
| # IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native |
| # toolchain packages are placed. This uses a subdirectory that contains the information |
| # about the compiler to allow using different compiler versions. |
| IMPALA_TOOLCHAIN_PACKAGES_HOME=\ |
| ${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION} |
| if ! [ -z ${NATIVE_TOOLCHAIN_HOME-} ]; then |
| IMPALA_TOOLCHAIN_PACKAGES_HOME=$(realpath ${NATIVE_TOOLCHAIN_HOME})/build |
| export SKIP_TOOLCHAIN_BOOTSTRAP=true |
| fi |
| export IMPALA_TOOLCHAIN_PACKAGES_HOME |
| |
| export CDP_HADOOP_URL=${CDP_HADOOP_URL-} |
| export CDP_HBASE_URL=${CDP_HBASE_URL-} |
| export CDP_HIVE_URL=${CDP_HIVE_URL-} |
| export CDP_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-} |
| export CDP_OZONE_URL=${CDP_OZONE_URL-} |
| export CDP_ICEBERG_URL=${CDP_ICEBERG_URL-} |
| export CDP_RANGER_URL=${CDP_RANGER_URL-} |
| export CDP_TEZ_URL=${CDP_TEZ_URL-} |
| |
| export APACHE_HIVE_URL=${APACHE_HIVE_URL-} |
| export APACHE_HIVE_SOURCE_URL=${APACHE_HIVE_SOURCE_URL-} |
| export APACHE_OZONE_URL=${APACHE_OZONE_URL-} |
| |
| export CDP_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdp_components-$CDP_BUILD_NUMBER" |
| export CDH_MAJOR_VERSION=7 |
| if ${USE_APACHE_COMPONENTS:=false}; then |
| export IMPALA_AVRO_JAVA_VERSION=${APACHE_AVRO_JAVA_VERSION} |
| export IMPALA_HADOOP_VERSION=${APACHE_HADOOP_VERSION} |
| export IMPALA_HBASE_VERSION=${APACHE_HBASE_VERSION} |
| export IMPALA_ICEBERG_VERSION=${APACHE_ICEBERG_VERSION} |
| export IMPALA_KNOX_VERSION=${APACHE_KNOX_VERSION} |
| export IMPALA_OZONE_VERSION=${APACHE_OZONE_VERSION} |
| export IMPALA_PARQUET_VERSION=${APACHE_PARQUET_VERSION} |
| export IMPALA_RANGER_VERSION=${RANGER_VERSION_OVERRIDE:-"$APACHE_RANGER_VERSION"} |
| export IMPALA_TEZ_VERSION=${APACHE_TEZ_VERSION} |
| export USE_APACHE_HADOOP=${USE_APACHE_HADOOP:=true} |
| export USE_APACHE_HBASE=${USE_APACHE_HBASE:=true} |
| export USE_APACHE_HIVE=${USE_APACHE_HIVE:=true} |
| export USE_APACHE_TEZ=${USE_APACHE_TEZ:=true} |
| export USE_APACHE_RANGER=${USE_APACHE_RANGER:=true} |
| export USE_APACHE_OZONE=${USE_APACHE_OZONE:=true} |
| else |
| export IMPALA_AVRO_JAVA_VERSION=${CDP_AVRO_JAVA_VERSION} |
| export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION} |
| export IMPALA_HADOOP_URL=${CDP_HADOOP_URL-} |
| export IMPALA_HBASE_VERSION=${CDP_HBASE_VERSION} |
| export IMPALA_HBASE_URL=${CDP_HBASE_URL-} |
| export IMPALA_ICEBERG_VERSION=${CDP_ICEBERG_VERSION} |
| export IMPALA_ICEBERG_URL=${CDP_ICEBERG_URL-} |
| export IMPALA_KNOX_VERSION=${CDP_KNOX_VERSION} |
| export IMPALA_OZONE_VERSION=${CDP_OZONE_VERSION} |
| export IMPALA_PARQUET_VERSION=${CDP_PARQUET_VERSION} |
| export IMPALA_RANGER_VERSION=${RANGER_VERSION_OVERRIDE:-"$CDP_RANGER_VERSION"} |
| export IMPALA_RANGER_URL=${CDP_RANGER_URL-} |
| export IMPALA_TEZ_VERSION=${CDP_TEZ_VERSION} |
| export IMPALA_TEZ_URL=${CDP_TEZ_URL-} |
| export USE_APACHE_HADOOP=${USE_APACHE_HADOOP:=false} |
| export USE_APACHE_HBASE=${USE_APACHE_HBASE:=false} |
| export USE_APACHE_HIVE=${USE_APACHE_HIVE:=false} |
| export USE_APACHE_TEZ=${USE_APACHE_TEZ:=false} |
| export USE_APACHE_RANGER=${USE_APACHE_RANGER:=false} |
| export USE_APACHE_OZONE=${USE_APACHE_OZONE:=false} |
| fi |
| |
| export APACHE_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/apache_components" |
| |
| if $USE_APACHE_HADOOP; then |
| export HADOOP_HOME="$APACHE_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}" |
| else |
| export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}" |
| fi |
| |
| if $USE_APACHE_HIVE; then |
| # When USE_APACHE_HIVE is set we use the apache hive version to build as well as deploy |
| # in the minicluster |
| export IMPALA_HIVE_DIST_TYPE="apache-hive" |
| export IMPALA_HIVE_VERSION=${APACHE_HIVE_VERSION} |
| export IMPALA_HIVE_URL=${APACHE_HIVE_URL-} |
| export IMPALA_HIVE_SOURCE_URL=${APACHE_HIVE_SOURCE_URL-} |
| export IMPALA_HIVE_STORAGE_API_VERSION=${APACHE_HIVE_STORAGE_API_VERSION} |
| else |
| # CDP hive version is used to build and deploy in minicluster when USE_APACHE_HIVE is |
| # false |
| export IMPALA_HIVE_DIST_TYPE="hive" |
| export IMPALA_HIVE_VERSION=${HIVE_VERSION_OVERRIDE:-"$CDP_HIVE_VERSION"} |
| export IMPALA_HIVE_URL=${CDP_HIVE_URL-} |
| export IMPALA_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-} |
| export IMPALA_HIVE_STORAGE_API_VERSION=${HIVE_STORAGE_API_VERSION_OVERRIDE:-\ |
| "2.3.0.$IMPALA_HIVE_VERSION"} |
| fi |
| # Thrift related environment variables. |
| # IMPALA_THRIFT_POM_VERSION is used to populate IMPALA_THRIFT_JAVA_VERSION and |
| # thrift.version in java/pom.xml. |
| # IMPALA_THRIFT_PY_VERSION is used to find the thrift compiler to produce |
| # the generated Python code. The code that uses the generated Python code gets |
| # the corresponding Thrift runtime library by pip installing thrift (and does not |
| # respect this version). If upgrading IMPALA_THRIFT_PY_VERSION, also upgrade the |
| # thrift version in shell/ext-py, shell/packaging/requirements.txt, and |
| # infra/python/deps/requirements.txt. |
| export IMPALA_THRIFT_CPP_VERSION=0.16.0-p7 |
| unset IMPALA_THRIFT_CPP_URL |
| if $USE_APACHE_HIVE; then |
| # Apache Hive 3 clients can't run on thrift versions >= 0.14 (IMPALA-11801) |
| export IMPALA_THRIFT_POM_VERSION=0.11.0 |
| export IMPALA_THRIFT_JAVA_VERSION=${IMPALA_THRIFT_POM_VERSION}-p5 |
| else |
| export IMPALA_THRIFT_POM_VERSION=0.16.0 |
| export IMPALA_THRIFT_JAVA_VERSION=${IMPALA_THRIFT_POM_VERSION}-p7 |
| fi |
| unset IMPALA_THRIFT_JAVA_URL |
| export IMPALA_THRIFT_PY_VERSION=0.16.0-p7 |
| unset IMPALA_THRIFT_PY_URL |
| |
| # Extract the first component of the hive version. |
| # Allow overriding of Hive source location in case we want to build Impala without |
| # a complete Hive build. This is used by various tests and scripts to enable and |
| # disable tests and functionality. |
| export IMPALA_HIVE_MAJOR_VERSION=$(echo "$IMPALA_HIVE_VERSION" | cut -d . -f 1) |
| |
| # Hive 1 and 2 are no longer supported. |
| if [[ "${IMPALA_HIVE_MAJOR_VERSION}" == "1" || |
| "${IMPALA_HIVE_MAJOR_VERSION}" == "2" ]]; then |
| echo "Hive 1 and 2 are no longer supported" |
| return 1 |
| fi |
| |
| if $USE_APACHE_OZONE; then |
| export IMPALA_OZONE_VERSION=${APACHE_OZONE_VERSION} |
| export IMPALA_OZONE_URL=${APACHE_OZONE_URL-} |
| else |
| export IMPALA_OZONE_VERSION=${CDP_OZONE_VERSION} |
| export IMPALA_OZONE_URL=${CDP_OZONE_URL-} |
| fi |
| |
| # It is important to have a coherent view of the JAVA_HOME and JAVA executable. |
| # The JAVA_HOME should be determined first, then the JAVA executable should be |
| # derived from JAVA_HOME. For development, it is useful to be able to specify |
| # the JDK version as part of bin/impala-config-local.sh |
| |
| # Decision tree: |
| # if IMPALA_JAVA_HOME_OVERRIDE is set, respect it |
| # else if IMPALA_JDK_VERSION == system, look for system JDK |
| # else if IMPALA_JDK_VERSION == 8+, look for Java 8+ JDK |
| |
| # Initialize IMPALA_JDK_VERSION and set package variables for Docker builds |
| . "$IMPALA_HOME/bin/impala-config-java.sh" |
| |
| if [[ -n "${IMPALA_JAVA_HOME_OVERRIDE-}" ]]; then |
| IMPALA_JDK_VERSION=override |
| fi |
| |
| DETECTED_JAVA_HOME=Invalid |
| if [[ "${IMPALA_JDK_VERSION}" == "system" ]]; then |
| # Try to detect the system's JAVA_HOME |
| # If javac exists, then the system has a Java SDK (JRE does not have javac). |
| # Follow the symbolic links and use this to determine the system's JAVA_HOME. |
| DETECTED_JAVA_HOME="/usr/java/default" |
| if [ -n "$(which javac)" ]; then |
| DETECTED_JAVA_HOME=$(dirname $(dirname $(readlink -f $(which javac)))) |
| fi |
| elif [[ "${IMPALA_JDK_VERSION}" != "override" ]]; then |
| # Now, we are looking for a specific version, and that will depend on the |
| # distribution. Currently, this is implemented for Redhat and Ubuntu. |
| DISTRIBUTION=Unknown |
| if [[ -f /etc/redhat-release ]]; then |
| echo "Identified Redhat image." |
| DISTRIBUTION=Redhat |
| else |
| source /etc/lsb-release |
| if [[ $DISTRIB_ID == Ubuntu ]]; then |
| echo "Identified Ubuntu image." |
| DISTRIBUTION=Ubuntu |
| fi |
| fi |
| if [[ "${DISTRIBUTION}" == "Unknown" ]]; then |
| echo "ERROR: auto-detection of JAVA_HOME only supported for Ubuntu and RedHat." |
| echo "Use IMPALA_JAVA_HOME_OVERRIDE to configure JAVA_HOME." |
| return 1 |
| fi |
| |
| JVMS_PATH=/usr/lib/jvm |
| if [[ "${DISTRIBUTION}" == "Ubuntu" ]]; then |
| JAVA_PACKAGE_NAME="java-${IMPALA_JDK_VERSION}-openjdk-${UBUNTU_PACKAGE_ARCH}" |
| DETECTED_JAVA_HOME="${JVMS_PATH}/${JAVA_PACKAGE_NAME}" |
| elif [[ "${DISTRIBUTION}" == "Redhat" ]]; then |
| if [[ "${IMPALA_JDK_VERSION}" == "8" ]]; then |
| DETECTED_JAVA_HOME="${JVMS_PATH}/java-1.8.0" |
| else |
| DETECTED_JAVA_HOME="${JVMS_PATH}/java-${IMPALA_JDK_VERSION}" |
| fi |
| fi |
| |
| if [[ ! -d "${DETECTED_JAVA_HOME}" ]]; then |
| echo "ERROR: Could not detect Java ${IMPALA_JDK_VERSION}."\ |
| "${DETECTED_JAVA_HOME} is not a directory." |
| return 1 |
| fi |
| fi |
| |
| # Prefer the JAVA_HOME set in the environment, but use the system's JAVA_HOME otherwise |
| export JAVA_HOME="${IMPALA_JAVA_HOME_OVERRIDE:-${DETECTED_JAVA_HOME}}" |
| if [ ! -d "$JAVA_HOME" ]; then |
| echo "JAVA_HOME must be set to the location of your JDK!" |
| return 1 |
| fi |
| export JAVA="$JAVA_HOME/bin/java" |
| if [[ ! -e "$JAVA" ]]; then |
| echo "Could not find java binary at $JAVA" >&2 |
| return 1 |
| fi |
| |
| # Java libraries required by executables and java tests. |
| export LIB_JAVA=$(find "${JAVA_HOME}/" -name libjava.so | head -1) |
| export LIB_JSIG=$(find "${JAVA_HOME}/" -name libjsig.so | head -1) |
| export LIB_JVM=$(find "${JAVA_HOME}/" -name libjvm.so | head -1) |
| |
| ######################################################################################### |
| # Below here are variables that can be overridden by impala-config-*.sh and environment # |
| # vars, variables computed based on other variables, and variables that cannot be # |
| # overridden. # |
| ######################################################################################### |
| |
| # If true, will not call $IMPALA_HOME/bin/bootstrap_toolchain.py. |
| export SKIP_TOOLCHAIN_BOOTSTRAP=${SKIP_TOOLCHAIN_BOOTSTRAP-false} |
| |
| # If true, will not download python dependencies. |
| export SKIP_PYTHON_DOWNLOAD=${SKIP_PYTHON_DOWNLOAD-false} |
| |
| # Provide isolated python egg location and ensure it's only writable by user to avoid |
| # Python warnings during testing. |
| export PYTHON_EGG_CACHE="${IMPALA_HOME}/shell/build/.python-eggs" |
| mkdir -p "${PYTHON_EGG_CACHE}" |
| chmod 755 "${PYTHON_EGG_CACHE}" |
| |
| # This flag is used in $IMPALA_HOME/cmake_modules/toolchain.cmake. |
| # If it's 0, Impala will be built with the compiler in the toolchain directory. |
| export USE_SYSTEM_GCC=${USE_SYSTEM_GCC-0} |
| |
| # Allow the linker to be set to gold, mold, or regular ld. Gold is the default |
| # as it has been for a long time. Mold is a new linker that is faster than gold. |
| # Note: This is validated in the CMake code. |
| # TODO: Add support for lld as well |
| export IMPALA_LINKER=${IMPALA_LINKER-gold} |
| |
| # Override the default compiler by setting a path to the new compiler. The default |
| # compiler depends on USE_SYSTEM_GCC and IMPALA_GCC_VERSION. The intended use case |
| # is to set the compiler to distcc, in that case the user would also set |
| # IMPALA_BUILD_THREADS to increase parallelism. |
| export IMPALA_CXX_COMPILER=${IMPALA_CXX_COMPILER-default} |
| |
| # Add options to 'mvn'; useful for configuring a settings file (-s). |
| export IMPALA_MAVEN_OPTIONS=${IMPALA_MAVEN_OPTIONS-} |
| |
| # If enabled, debug symbols are added to cross-compiled IR. |
| export ENABLE_IMPALA_IR_DEBUG_INFO=${ENABLE_IMPALA_IR_DEBUG_INFO-false} |
| |
| # Impala has dozens of binaries that link in all the Impala libraries. |
| # Each binary is hundreds of megabytes, and they end up taking 10s of GBs |
| # disk space for a developer environment. A large amount of the binary |
| # size is due to debug information. |
| # |
| # These are a few options for reducing the binary size and disk space |
| # usage. |
| # - IMPALA_MINIMAL_DEBUG_INFO=true changes the build to produce only |
| # minimal debuginfo (i.e. -g1). This has line tables and can do backtraces, |
| # but it doesn't include variable information and limits further |
| # debuggability. This option reduces the size of binaries by 60+%. |
| # - IMPALA_COMPRESSED_DEBUG_INFO=true changes the build to compress the |
| # debug info with gzip. This significantly reduces the size of the |
| # binary without changing the quantity of debug information. The catch |
| # is that tools need to support it. gdb is known to support it and |
| # the Breakpad scripts have been modified to handle it, but there may |
| # be other tools that do not know how to use it. This reduces the size |
| # of binaries by 50+%. |
| # - IMPALA_SPLIT_DEBUG_INFO=true changes the build to put debug info in |
| # separate .dwo files for each C++ file. Executables contain metadata |
| # pointing to these .dwo files without needing to incorporate the debug |
| # information. This allows executables to share a single copy of |
| # the debug information. It also reduces link time, as the linker does |
| # not need to process the debug info. Tools (including gdb) mostly know |
| # how to handle this split debug information. |
| # |
| # Due to the major reduction in binary size and broad support in debugging |
| # tools, compressed debug information is enabled by default. |
| export IMPALA_MINIMAL_DEBUG_INFO=${IMPALA_MINIMAL_DEBUG_INFO-false} |
| export IMPALA_COMPRESSED_DEBUG_INFO=${IMPALA_COMPRESSED_DEBUG_INFO-true} |
| export IMPALA_SPLIT_DEBUG_INFO=${IMPALA_SPLIT_DEBUG_INFO-false} |
| |
| # Download and use the CDH components from S3. It can be useful to set this to false if |
| # building against a custom local build using HIVE_SRC_DIR_OVERRIDE, |
| # HADOOP_INCLUDE_DIR_OVERRIDE, and HADOOP_LIB_DIR_OVERRIDE. |
| export DOWNLOAD_CDH_COMPONENTS=${DOWNLOAD_CDH_COMPONENTS-true} |
| export DOWNLOAD_APACHE_COMPONENTS=${DOWNLOAD_APACHE_COMPONENTS-true} |
| |
| export IS_OSX="$(if [[ "$OSTYPE" == "darwin"* ]]; then echo true; else echo false; fi)" |
| |
| export IMPALA_AUX_TEST_HOME="${IMPALA_AUX_TEST_HOME-$IMPALA_HOME/../Impala-auxiliary-tests}" |
| export TARGET_FILESYSTEM="${TARGET_FILESYSTEM-hdfs}" |
| export ERASURE_CODING="${ERASURE_CODING-false}" |
| export FILESYSTEM_PREFIX="${FILESYSTEM_PREFIX-}" |
| export S3_BUCKET="${S3_BUCKET-}" |
| export S3GUARD_ENABLED="${S3GUARD_ENABLED-false}" |
| export S3GUARD_DYNAMODB_TABLE="${S3GUARD_DYNAMODB_TABLE-}" |
| export S3GUARD_DYNAMODB_REGION="${S3GUARD_DYNAMODB_REGION-}" |
| export azure_tenant_id="${azure_tenant_id-DummyAdlsTenantId}" |
| export azure_client_id="${azure_client_id-DummyAdlsClientId}" |
| export azure_client_secret="${azure_client_secret-DummyAdlsClientSecret}" |
| export azure_data_lake_store_name="${azure_data_lake_store_name-}" |
| export azure_storage_account_name="${azure_storage_account_name-}" |
| export azure_storage_container_name="${azure_storage_container_name-}" |
| export GOOGLE_CLOUD_PROJECT_ID="${GOOGLE_CLOUD_PROJECT_ID-}" |
| export GOOGLE_CLOUD_SERVICE_ACCOUNT="${GOOGLE_CLOUD_SERVICE_ACCOUNT-}" |
| export GOOGLE_APPLICATION_CREDENTIALS="${GOOGLE_APPLICATION_CREDENTIALS-}" |
| export GCS_BUCKET="${GCS_BUCKET-}" |
| export COS_SECRET_ID="${COS_SECRET_ID-}" |
| export COS_SECRET_KEY="${COS_SECRET_KEY-}" |
| export COS_REGION="${COS_REGION-}" |
| export COS_BUCKET="${COS_BUCKET-}" |
| export OSS_ACCESS_KEY_ID="${OSS_ACCESS_KEY_ID-}" |
| export OSS_SECRET_ACCESS_KEY="${OSS_SECRET_ACCESS_KEY-}" |
| export OSS_ACCESS_ENDPOINT="${OSS_ACCESS_ENDPOINT-}" |
| export OSS_BUCKET="${OSS_BUCKET-}" |
| export HDFS_REPLICATION="${HDFS_REPLICATION-3}" |
| export ISILON_NAMENODE="${ISILON_NAMENODE-}" |
| # Internal and external interfaces that test cluster services will listen on. The |
| # internal interface is used for ports that should not be accessed from outside the |
| # host that the cluster is running on. The external interface is used for ports |
| # that may need to be accessed from outside, e.g. web UIs. |
| export INTERNAL_LISTEN_HOST="${INTERNAL_LISTEN_HOST-localhost}" |
| export EXTERNAL_LISTEN_HOST="${EXTERNAL_LISTEN_HOST-0.0.0.0}" |
| export DEFAULT_FS="${DEFAULT_FS-hdfs://${INTERNAL_LISTEN_HOST}:20500}" |
| export WAREHOUSE_LOCATION_PREFIX="${WAREHOUSE_LOCATION_PREFIX-}" |
| export LOCAL_FS="file:${WAREHOUSE_LOCATION_PREFIX}" |
| # Use different node directories for each filesystem so we don't need to recreate them |
| # from scratch when switching. |
| UNIQUE_FS_LABEL= |
| if [[ "${TARGET_FILESYSTEM}" != "hdfs" ]]; then |
| UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-${TARGET_FILESYSTEM}" |
| fi |
| if [[ "${ERASURE_CODING}" = true ]]; then |
| UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-ec" |
| fi |
| DEFAULT_NODES_DIR="$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION$UNIQUE_FS_LABEL" |
| export IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$DEFAULT_NODES_DIR}" |
| |
| ESCAPED_DB_UID=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$UNIQUE_FS_LABEL$IMPALA_HOME") |
| if $USE_APACHE_HIVE; then |
| export HIVE_HOME="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin" |
| export HIVE_SRC_DIR="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-src" |
| # if apache hive is being used change the metastore db name, so we don't have to |
| # format the metastore db everytime we switch between hive versions |
| export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_DB_UID)_apache"} |
| else |
| export HIVE_HOME=${HIVE_HOME_OVERRIDE:-\ |
| "$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"} |
| export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-\ |
| "${CDP_COMPONENTS_HOME}/hive-${IMPALA_HIVE_VERSION}"} |
| # Previously, there were multiple configurations and the "_cdp" included below |
| # allowed the two to be distinct. We keep this "_cdp" for historical reasons. |
| export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_DB_UID)_cdp"} |
| fi |
| # Set the path to the hive_metastore.thrift which is used to build thrift code |
| export HIVE_METASTORE_THRIFT_DIR=${HIVE_METASTORE_THRIFT_DIR_OVERRIDE:-\ |
| "$HIVE_SRC_DIR/standalone-metastore/src/main/thrift"} |
| if $USE_APACHE_TEZ; then |
| export TEZ_HOME="$APACHE_COMPONENTS_HOME/apache-tez-${IMPALA_TEZ_VERSION}-bin" |
| else |
| export TEZ_HOME="$CDP_COMPONENTS_HOME/tez-${IMPALA_TEZ_VERSION}-minimal" |
| fi |
| |
| if $USE_APACHE_HBASE; then |
| export HBASE_HOME="$APACHE_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}-hadoop3/" |
| else |
| export HBASE_HOME="$CDP_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}/" |
| fi |
| if $USE_APACHE_OZONE; then |
| export OZONE_HOME="$APACHE_COMPONENTS_HOME/ozone-${IMPALA_OZONE_VERSION}/" |
| else |
| export OZONE_HOME="$CDP_COMPONENTS_HOME/ozone-${IMPALA_OZONE_VERSION}/" |
| fi |
| # Set the Hive binaries in the path |
| export PATH="$HIVE_HOME/bin:$HBASE_HOME/bin:$OZONE_HOME/bin:$PATH" |
| |
| RANGER_POLICY_DB=${RANGER_POLICY_DB-$(cut -c-63 <<< ranger$ESCAPED_DB_UID)} |
| # The DB script in Ranger expects the database name to be in lower case. |
| export RANGER_POLICY_DB=$(echo ${RANGER_POLICY_DB} | tr '[:upper:]' '[:lower:]') |
| |
| # Environment variables carrying AWS security credentials are prepared |
| # according to the following rules: |
| # |
| # Instance: Running outside EC2 || Running in EC2 | |
| # --------------------+--------+--------++--------+--------+ |
| # TARGET_FILESYSTEM | S3 | not S3 || S3 | not S3 | |
| # --------------------+--------+--------++--------+--------+ |
| # | | || | | |
| # empty | unset | dummy || unset | unset | |
| # AWS_* | | || | | |
| # env --------------+--------+--------++--------+--------+ |
| # var | | || | | |
| # not empty | export | export || export | export | |
| # | | || | | |
| # --------------------+--------+--------++--------+--------+ |
| # |
| # Legend: unset: the variable is unset |
| # export: the variable is exported with its current value |
| # dummy: the variable is set to a constant dummy value and exported |
| # |
| # Running on an EC2 VM is indicated by setting RUNNING_IN_EC2 to "true" and |
| # exporting it from an script running before this one. |
| |
| # Checks are performed in a subshell to avoid leaking secrets to log files. |
| if (set +x; [[ -n ${AWS_ACCESS_KEY_ID-} ]]); then |
| export AWS_ACCESS_KEY_ID |
| else |
| if [[ "${TARGET_FILESYSTEM}" == "s3" || "${RUNNING_IN_EC2:-false}" == "true" ]]; then |
| unset AWS_ACCESS_KEY_ID |
| else |
| export AWS_ACCESS_KEY_ID=DummyAccessKeyId |
| fi |
| fi |
| |
| if (set +x; [[ -n ${AWS_SECRET_ACCESS_KEY-} ]]); then |
| export AWS_SECRET_ACCESS_KEY |
| else |
| if [[ "${TARGET_FILESYSTEM}" == "s3" || "${RUNNING_IN_EC2:-false}" == "true" ]]; then |
| unset AWS_SECRET_ACCESS_KEY |
| else |
| export AWS_SECRET_ACCESS_KEY=DummySecretAccessKey |
| fi |
| fi |
| |
| # AWS_SESSION_TOKEN is not set to a dummy value, it is not needed by the FE tests |
| if (set +x; [[ -n ${AWS_SESSION_TOKEN-} ]]); then |
| export AWS_SESSION_TOKEN |
| else |
| unset AWS_SESSION_TOKEN |
| fi |
| |
| if [ "${TARGET_FILESYSTEM}" = "s3" ]; then |
| # We guard the S3 access check with a variable. This check hits a rate-limited endpoint |
| # on AWS and multiple inclusions of S3 can exceed the limit, causing the check to fail. |
| S3_ACCESS_VALIDATED="${S3_ACCESS_VALIDATED-0}" |
| if [[ "${S3_ACCESS_VALIDATED}" -ne 1 ]]; then |
| if ${IMPALA_HOME}/bin/check-s3-access.sh; then |
| export S3_ACCESS_VALIDATED=1 |
| export DEFAULT_FS="s3a://${S3_BUCKET}" |
| else |
| return 1 |
| fi |
| else |
| echo "S3 access already validated" |
| fi |
| # If using s3guard, verify that the dynamodb table and region are set |
| if [[ "${S3GUARD_ENABLED}" = "true" ]]; then |
| if [[ -z "${S3GUARD_DYNAMODB_TABLE}" || -z "${S3GUARD_DYNAMODB_REGION}" ]]; then |
| echo "When S3GUARD_ENABLED=true, S3GUARD_DYNAMODB_TABLE and |
| S3GUARD_DYNAMODB_REGION must be set" |
| echo "S3GUARD_DYNAMODB_TABLE: ${S3GUARD_DYNAMODB_TABLE}" |
| echo "S3GUARD_DYNAMODB_REGION: ${S3GUARD_DYNAMODB_REGION}" |
| return 1 |
| fi |
| fi |
| elif [ "${TARGET_FILESYSTEM}" = "adls" ]; then |
| # Basic error checking |
| if [[ "${azure_client_id}" = "DummyAdlsClientId" ||\ |
| "${azure_tenant_id}" = "DummyAdlsTenantId" ||\ |
| "${azure_client_secret}" = "DummyAdlsClientSecret" ]]; then |
| echo "All 3 of the following need to be assigned valid values and belong |
| to the owner of the ADLS store in order to access the filesystem: |
| azure_client_id, azure_tenant_id, azure_client_secret." |
| return 1 |
| fi |
| if [[ "${azure_data_lake_store_name}" = "" ]]; then |
| echo "azure_data_lake_store_name cannot be an empty string for ADLS" |
| return 1 |
| fi |
| DEFAULT_FS="adl://${azure_data_lake_store_name}.azuredatalakestore.net" |
| export DEFAULT_FS |
| elif [ "${TARGET_FILESYSTEM}" = "abfs" ]; then |
| # ABFS is also known as ADLS Gen2, and they can share credentials |
| # Basic error checking |
| if [[ "${azure_client_id}" = "DummyAdlsClientId" ||\ |
| "${azure_tenant_id}" = "DummyAdlsTenantId" ||\ |
| "${azure_client_secret}" = "DummyAdlsClientSecret" ]]; then |
| echo "All 3 of the following need to be assigned valid values and belong |
| to the owner of the Azure storage account in order to access the |
| filesystem: azure_client_id, azure_tenant_id, azure_client_secret." |
| return 1 |
| fi |
| if [[ "${azure_storage_account_name}" = "" ]]; then |
| echo "azure_storage_account_name cannot be an empty string for ABFS" |
| return 1 |
| fi |
| if [[ "${azure_storage_container_name}" = "" ]]; then |
| echo "azure_storage_container_name cannot be an empty string for ABFS" |
| return 1 |
| fi |
| domain="${azure_storage_account_name}.dfs.core.windows.net" |
| DEFAULT_FS="abfss://${azure_storage_container_name}@${domain}" |
| export DEFAULT_FS |
| elif [ "${TARGET_FILESYSTEM}" = "gs" ]; then |
| # Basic error checking |
| if [[ "${GOOGLE_APPLICATION_CREDENTIALS}" = "" ]]; then |
| echo "GOOGLE_APPLICATION_CREDENTIALS should be set to the JSON file that contains |
| your service account key." |
| return 1 |
| fi |
| DEFAULT_FS="gs://${GCS_BUCKET}" |
| export DEFAULT_FS |
| elif [ "${TARGET_FILESYSTEM}" = "cosn" ]; then |
| # Basic error checking |
| if [[ "${COS_SECRET_ID}" = "" ]]; then |
| echo "COS_SECRET_ID cannot be an empty string for COS" |
| return 1 |
| fi |
| if [[ "${COS_SECRET_KEY}" = "" ]]; then |
| echo "COS_SECRET_KEY cannot be an empty string for COS" |
| return 1 |
| fi |
| if [[ "${COS_REGION}" = "" ]]; then |
| echo "COS_REGION cannot be an empty string for COS" |
| return 1 |
| fi |
| if [[ "${COS_BUCKET}" = "" ]]; then |
| echo "COS_BUCKET cannot be an empty string for COS" |
| return 1 |
| fi |
| DEFAULT_FS="cosn://${COS_BUCKET}" |
| export DEFAULT_FS |
| elif [ "${TARGET_FILESYSTEM}" = "oss" ]; then |
| # Basic error checking |
| if [[ "${OSS_ACCESS_KEY_ID}" = "" ]]; then |
| echo "OSS_ACCESS_KEY_ID cannot be an empty string for OSS" |
| return 1 |
| fi |
| if [[ "${OSS_SECRET_ACCESS_KEY}" = "" ]]; then |
| echo "OSS_SECRET_ACCESS_KEY cannot be an empty string for OSS" |
| return 1 |
| fi |
| if [[ "${OSS_ACCESS_ENDPOINT}" = "" ]]; then |
| echo "OSS_ACCESS_ENDPOINT cannot be an empty string for OSS" |
| return 1 |
| fi |
| if [[ "${OSS_BUCKET}" = "" ]]; then |
| echo "OSS_BUCKET cannot be an empty string for OSS" |
| return 1 |
| fi |
| DEFAULT_FS="oss://${OSS_BUCKET}" |
| export DEFAULT_FS |
| elif [ "${TARGET_FILESYSTEM}" = "obs" ]; then |
| # Basic error checking |
| OBS_ACCESS_KEY="${OBS_ACCESS_KEY:?OBS_ACCESS_KEY cannot be an empty string for OBS}" |
| OBS_SECRET_KEY="${OBS_SECRET_KEY:?OBS_SECRET_KEY cannot be an empty string for OBS}" |
| OBS_ENDPOINT="${OBS_ENDPOINT:?OBS_ENDPOINT cannot be an empty string for OBS}" |
| OBS_BUCKET="${OBS_BUCKET:?OBS_BUCKET cannot be an empty string for OBS}" |
| DEFAULT_FS="obs://${OBS_BUCKET}" |
| export OBS_ACCESS_KEY OBS_SECRET_KEY OBS_ENDPOINT DEFAULT_FS ENABLE_OBS_FILESYSTEM=true |
| elif [ "${TARGET_FILESYSTEM}" = "isilon" ]; then |
| if [ "${ISILON_NAMENODE}" = "" ]; then |
| echo "In order to access the Isilon filesystem, ISILON_NAMENODE" |
| echo "needs to be a non-empty and valid address." |
| return 1 |
| fi |
| DEFAULT_FS="hdfs://${ISILON_NAMENODE}:8020" |
| export DEFAULT_FS |
| # isilon manages its own replication. |
| export HDFS_REPLICATION=1 |
| elif [ "${TARGET_FILESYSTEM}" = "local" ]; then |
| if [[ "${WAREHOUSE_LOCATION_PREFIX}" = "" ]]; then |
| echo "WAREHOUSE_LOCATION_PREFIX cannot be an empty string for local filesystem" |
| return 1 |
| fi |
| if [ ! -d "${WAREHOUSE_LOCATION_PREFIX}" ]; then |
| echo "'$WAREHOUSE_LOCATION_PREFIX' is not a directory on the local filesystem." |
| return 1 |
| elif [ ! -r "${WAREHOUSE_LOCATION_PREFIX}" ] || \ |
| [ ! -w "${WAREHOUSE_LOCATION_PREFIX}" ]; then |
| echo "Current user does not have read/write permissions on local filesystem path " |
| "'$WAREHOUSE_LOCATION_PREFIX'" |
| return 1 |
| fi |
| export DEFAULT_FS="${LOCAL_FS}" |
| export FILESYSTEM_PREFIX="${LOCAL_FS}" |
| elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then |
| if [[ "${ERASURE_CODING}" = true ]]; then |
| export HDFS_ERASURECODE_POLICY="RS-3-2-1024k" |
| export ERASURECODE_POLICY="$HDFS_ERASURECODE_POLICY" |
| export HDFS_ERASURECODE_PATH="/test-warehouse" |
| fi |
| elif [ "${TARGET_FILESYSTEM}" = "ozone" ]; then |
| export USE_OZONE_ENCRYPTION=${USE_OZONE_ENCRYPTION-true} |
| export OZONE_VOLUME="impala" |
| export DEFAULT_FS="ofs://${INTERNAL_LISTEN_HOST}:9862" |
| export FILESYSTEM_PREFIX="${DEFAULT_FS}/${OZONE_VOLUME}" |
| export WAREHOUSE_LOCATION_PREFIX="/${OZONE_VOLUME}" |
| if [[ "${ERASURE_CODING}" = true ]]; then |
| export OZONE_ERASURECODE_POLICY="RS-3-2-1024k" |
| # Ozone normalizes the policy for internal storage. Use this string for tests. |
| export ERASURECODE_POLICY="rs-3-2-1024k" |
| fi |
| else |
| echo "Unsupported filesystem '$TARGET_FILESYSTEM'" |
| echo "Valid values are: hdfs, isilon, s3, abfs, adls, gs, local, ozone" |
| return 1 |
| fi |
| |
| # Directories where local cluster logs will go when running tests or loading data |
| DEFAULT_LOGS_DIR="${IMPALA_HOME}/logs" # override by setting IMPALA_LOGS_DIR env var |
| export IMPALA_LOGS_DIR="${IMPALA_LOGS_DIR:-$DEFAULT_LOGS_DIR}" |
| export IMPALA_CLUSTER_LOGS_DIR="${IMPALA_LOGS_DIR}/cluster" |
| export IMPALA_DATA_LOADING_LOGS_DIR="${IMPALA_LOGS_DIR}/data_loading" |
| export IMPALA_DATA_LOADING_SQL_DIR="${IMPALA_DATA_LOADING_LOGS_DIR}/sql" |
| export IMPALA_FE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/fe_tests" |
| export IMPALA_FE_TEST_COVERAGE_DIR="${IMPALA_FE_TEST_LOGS_DIR}/coverage" |
| export IMPALA_BE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/be_tests" |
| export IMPALA_EE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/ee_tests" |
| export IMPALA_JS_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/js_tests" |
| export IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/custom_cluster_tests" |
| export IMPALA_MVN_LOGS_DIR="${IMPALA_LOGS_DIR}/mvn" |
| export IMPALA_TIMEOUT_LOGS_DIR="${IMPALA_LOGS_DIR}/timeout_stacktrace" |
| # List of all Impala log dirs so they can be created by buildall.sh |
| export IMPALA_ALL_LOGS_DIRS="${IMPALA_CLUSTER_LOGS_DIR} |
| ${IMPALA_DATA_LOADING_LOGS_DIR} ${IMPALA_DATA_LOADING_SQL_DIR} |
| ${IMPALA_FE_TEST_LOGS_DIR} ${IMPALA_FE_TEST_COVERAGE_DIR} |
| ${IMPALA_BE_TEST_LOGS_DIR} ${IMPALA_EE_TEST_LOGS_DIR} |
| ${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR} ${IMPALA_MVN_LOGS_DIR} |
| ${IMPALA_TIMEOUT_LOGS_DIR}" |
| |
| # Compute CPUs, using cgroup limits if present and not "max" (v2) or negative (v1) |
| awk_divide_roundup='{ cores = $1/$2; print cores==int(cores) ? cores : int(cores)+1 }' |
| if grep -v max /sys/fs/cgroup/cpu.max >& /dev/null; then |
| # Get CPU limits under cgroups v2 |
| CORES=$(awk "$awk_divide_roundup" /sys/fs/cgroup/cpu.max) |
| echo "Detected $CORES cores from cgroups v2" |
| elif grep -v '\-' /sys/fs/cgroup/cpu/cpu.cfs_quota_us >& /dev/null; then |
| # Get CPU limits under cgroups v1 |
| CORES=$(paste /sys/fs/cgroup/cpu/cpu.cfs_quota_us /sys/fs/cgroup/cpu/cpu.cfs_period_us | |
| awk "$awk_divide_roundup") |
| echo "Detected $CORES cores from cgroups v1" |
| else |
| CORES=$(getconf _NPROCESSORS_ONLN) |
| fi |
| # Reduce the concurrency for local tests to half the number of cores in the system. |
| export NUM_CONCURRENT_TESTS="${NUM_CONCURRENT_TESTS-$((CORES / 2))}" |
| |
| export KUDU_MASTER_HOSTS="${KUDU_MASTER_HOSTS:-${INTERNAL_LISTEN_HOST}}" |
| export KUDU_MASTER_PORT="${KUDU_MASTER_PORT:-7051}" |
| export KUDU_MASTER_WEBUI_PORT="${KUDU_MASTER_WEBUI_PORT:-8051}" |
| |
| export IMPALA_FE_DIR="$IMPALA_HOME/fe" |
| export IMPALA_BE_DIR="$IMPALA_HOME/be" |
| export IMPALA_WORKLOAD_DIR="$IMPALA_HOME/testdata/workloads" |
| export IMPALA_AUX_WORKLOAD_DIR="$IMPALA_AUX_TEST_HOME/testdata/workloads" |
| export IMPALA_DATASET_DIR="$IMPALA_HOME/testdata/datasets" |
| export IMPALA_AUX_DATASET_DIR="$IMPALA_AUX_TEST_HOME/testdata/datasets" |
| export IMPALA_COMMON_DIR="$IMPALA_HOME/common" |
| export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/gdb-$IMPALA_GDB_VERSION/bin:$PATH" |
| export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/cmake-$IMPALA_CMAKE_VERSION/bin/:$PATH" |
| export PATH="$IMPALA_HOME/bin:$PATH" |
| |
| export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources" |
| # The include and lib paths are needed to pick up hdfs.h and libhdfs.* |
| # Allow overriding in case we want to point to a package/install with a different layout. |
| export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR_OVERRIDE:-"${HADOOP_HOME}/include"} |
| export HADOOP_LIB_DIR=${HADOOP_LIB_DIR_OVERRIDE:-"${HADOOP_HOME}/lib"} |
| |
| # Beware of adding entries from $HADOOP_HOME here, because they can change |
| # the order of the classpath, leading to configuration not showing up first. |
| export HADOOP_CLASSPATH="${HADOOP_CLASSPATH-}" |
| # Add Ozone Hadoop filesystem implementation when using Ozone |
| if [ "${TARGET_FILESYSTEM}" = "ozone" ]; then |
| OZONE_JAR="ozone-filesystem-hadoop3-${IMPALA_OZONE_VERSION}.jar" |
| HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${OZONE_HOME}/share/ozone/lib/${OZONE_JAR}" |
| fi |
| # Add the path containing the hadoop-aws jar, which is required to access AWS from the |
| # minicluster. |
| # Please note that the * is inside quotes, thus it won't get expanded by bash but |
| # by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft |
| HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/tools/lib/*" |
| |
| export PATH="$HADOOP_HOME/bin:$PATH" |
| |
| if $USE_APACHE_RANGER; then |
| export RANGER_HOME=${RANGER_HOME_OVERRIDE:-\ |
| "${APACHE_COMPONENTS_HOME}/ranger-${IMPALA_RANGER_VERSION}-admin"} |
| else |
| export RANGER_HOME=\ |
| ${RANGER_HOME_OVERRIDE:-"${CDP_COMPONENTS_HOME}/ranger-${IMPALA_RANGER_VERSION}-admin"} |
| fi |
| export RANGER_CONF_DIR="$IMPALA_HOME/fe/src/test/resources" |
| |
| # To configure Hive logging, there's a hive-log4j2.properties[.template] |
| # file in fe/src/test/resources. To get it into the classpath earlier |
| # than the hive-log4j2.properties file included in some Hive jars, |
| # we must set HIVE_CONF_DIR. Additionally, on Hadoop 3, because of |
| # https://issues.apache.org/jira/browse/HADOOP-15019, when HIVE_CONF_DIR happens to equal |
| # HADOOP_CONF_DIR, it gets de-duped out of its pole position in the CLASSPATH variable, |
| # so we add an extra "./" into the path to avoid that. Use HADOOP_SHELL_SCRIPT_DEBUG=true |
| # to debug issues like this. Hive may log something like: |
| # Logging initialized using configuration in file:.../fe/src/test/resources/hive-log4j2.properties |
| # |
| # To debug log4j2 loading issues, add to HADOOP_CLIENT_OPTS: |
| # -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE |
| # |
| # We use a unique -Dhive.log.file to distinguish the HiveMetaStore and HiveServer2 logs. |
| export HIVE_CONF_DIR="$IMPALA_FE_DIR/./src/test/resources" |
| |
| # Hive looks for jar files in a single directory from HIVE_AUX_JARS_PATH plus |
| # any jars in AUX_CLASSPATH. (Or a list of jars in HIVE_AUX_JARS_PATH.) |
| # The Postgres JDBC driver is downloaded by maven when building the frontend. |
| # Export the location of Postgres JDBC driver so Ranger can pick it up. |
| export POSTGRES_JDBC_DRIVER="${IMPALA_FE_DIR}/target/dependency/postgresql-${IMPALA_POSTGRES_JDBC_DRIVER_VERSION}.jar" |
| |
| export HIVE_AUX_JARS_PATH="$POSTGRES_JDBC_DRIVER" |
| # Add the jar of iceberg-hive-runtime to have HiveIcebergStorageHandler. |
| # Only needed by Apache Hive3 since CDP Hive3 has the jar of hive-iceberg-handler in its |
| # lib folder. |
| if $USE_APACHE_HIVE; then |
| export HIVE_AUX_JARS_PATH="$HIVE_AUX_JARS_PATH:\ |
| $IMPALA_HOME/fe/target/dependency/iceberg-hive-runtime-${IMPALA_ICEBERG_VERSION}.jar" |
| fi |
| export AUX_CLASSPATH="" |
| ### Tell hive not to use jline |
| export HADOOP_USER_CLASSPATH_FIRST=true |
| |
| # Add the jars so hive can create hbase tables. |
| export AUX_CLASSPATH="$AUX_CLASSPATH:$HBASE_HOME/lib/hbase-common-${IMPALA_HBASE_VERSION}.jar" |
| export AUX_CLASSPATH="$AUX_CLASSPATH:$HBASE_HOME/lib/hbase-client-${IMPALA_HBASE_VERSION}.jar" |
| export AUX_CLASSPATH="$AUX_CLASSPATH:$HBASE_HOME/lib/hbase-server-${IMPALA_HBASE_VERSION}.jar" |
| export AUX_CLASSPATH="$AUX_CLASSPATH:$HBASE_HOME/lib/hbase-protocol-${IMPALA_HBASE_VERSION}.jar" |
| export AUX_CLASSPATH="$AUX_CLASSPATH:$HBASE_HOME/lib/hbase-hadoop-compat-${IMPALA_HBASE_VERSION}.jar" |
| |
| export HBASE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources" |
| # Suppress Ozone deprecation warning |
| export OZONE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources" |
| |
| # To use a local build of Kudu, set KUDU_BUILD_DIR to the path Kudu was built in and |
| # set KUDU_CLIENT_DIR to the path KUDU was installed in. |
| # Example: |
| # git clone https://github.com/cloudera/kudu.git |
| # ...build 3rd party etc... |
| # mkdir -p $KUDU_BUILD_DIR |
| # cd $KUDU_BUILD_DIR |
| # cmake <path to Kudu source dir> |
| # make |
| # DESTDIR=$KUDU_CLIENT_DIR make install |
| export KUDU_BUILD_DIR=${KUDU_BUILD_DIR-} |
| export KUDU_CLIENT_DIR=${KUDU_CLIENT_DIR-} |
| if [[ -n "$KUDU_BUILD_DIR" && -z "$KUDU_CLIENT_DIR" ]]; then |
| echo When KUDU_BUILD_DIR is set KUDU_CLIENT_DIR must also be set. 1>&2 |
| return 1 |
| fi |
| if [[ -z "$KUDU_BUILD_DIR" && -n "$KUDU_CLIENT_DIR" ]]; then |
| echo When KUDU_CLIENT_DIR is set KUDU_BUILD_DIR must also be set. 1>&2 |
| return 1 |
| fi |
| |
| # Only applies to the minicluster Kudu (we always link against the libkudu_client for the |
| # overall build type) and does not apply when using a local Kudu build. |
| export USE_KUDU_DEBUG_BUILD=${USE_KUDU_DEBUG_BUILD-false} |
| |
| export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"e742f86f6d"} |
| export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN_PACKAGES_HOME}/kudu-$IMPALA_KUDU_VERSION |
| export IMPALA_KUDU_JAVA_HOME=\ |
| ${IMPALA_TOOLCHAIN_PACKAGES_HOME}/kudu-${IMPALA_KUDU_VERSION}/java |
| export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY=\ |
| "file://${IMPALA_KUDU_JAVA_HOME}/repository" |
| export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=true |
| |
| # Set $THRIFT_XXX_HOME to the Thrift directory in toolchain. |
| export THRIFT_CPP_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_CPP_VERSION}" |
| export THRIFT_JAVA_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_JAVA_VERSION}" |
| export THRIFT_PY_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_PY_VERSION}" |
| |
| # ASAN needs a matching version of llvm-symbolizer to symbolize stack traces. |
| export ASAN_SYMBOLIZER_PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_ASAN_VERSION}/bin/llvm-symbolizer" |
| |
| export CLUSTER_DIR="${IMPALA_HOME}/testdata/cluster" |
| |
| # The number of parallel build processes we should run at a time. Require 2GB memory per |
| # core as too many compilation processes can exhaust available memory and fail a build. |
| if $IS_OSX; then |
| AVAILABLE_MEM=$(($(sysctl -n hw.memsize) / 1024 / 1024 / 1024)) |
| else |
| # MemTotal: 65550228 kB |
| AVAILABLE_MEM=$(awk '/MemTotal/{print int($2/1024/1024)}' /proc/meminfo) |
| fi |
| if grep -v max /sys/fs/cgroup/memory.max >& /dev/null; then |
| # Get memory limits under cgroups v2 |
| CGROUP_MEM_LIMIT=$(($(cat /sys/fs/cgroup/memory.max) / 1024 / 1024 / 1024)) |
| echo "Detected $CGROUP_MEM_LIMIT GB memory limit from cgroups v2" |
| elif grep -v '\-' /sys/fs/cgroup/memory/memory.limit_in_bytes >& /dev/null; then |
| # Get memory limits under cgroups v1 |
| CGROUP_MEM_LIMIT=$(( |
| $(cat /sys/fs/cgroup/memory/memory.limit_in_bytes) / 1024 / 1024 / 1024)) |
| echo "Detected $CGROUP_MEM_LIMIT GB memory limit from cgroups v1" |
| else |
| CGROUP_MEM_LIMIT=8589934591 # max int64 bytes in GB |
| fi |
| AVAILABLE_MEM=$((AVAILABLE_MEM > $CGROUP_MEM_LIMIT ? $CGROUP_MEM_LIMIT : $AVAILABLE_MEM)) |
| BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 2)) |
| if [[ $AVAILABLE_MEM -lt 2 ]]; then |
| echo "Insufficient memory ($AVAILABLE_MEM GB) to build Impala" |
| exit 1 |
| elif [[ $BOUNDED_CONCURRENCY -lt $CORES ]]; then |
| echo "Bounding concurrency for available memory ($AVAILABLE_MEM GB)" |
| else |
| BOUNDED_CONCURRENCY=$CORES |
| fi |
| export IMPALA_BUILD_THREADS=${IMPALA_BUILD_THREADS-"${BOUNDED_CONCURRENCY}"} |
| |
| # Additional flags to pass to make or ninja. |
| export IMPALA_MAKE_FLAGS=${IMPALA_MAKE_FLAGS-} |
| |
| # Some environments (like the packaging build) might not have $USER set. Fix that here. |
| export USER="${USER-`id -un`}" |
| |
| # These arguments are, despite the name, passed to every JVM created |
| # by an impalad. |
| # - Enable JNI check |
| # When running hive UDFs, this check makes it unacceptably slow (over 100x) |
| # Enable if you suspect a JNI issue |
| # TODO: figure out how to turn this off only the stuff that can't run with it. |
| #LIBHDFS_OPTS="-Xcheck:jni -Xcheck:nabounds" |
| export LIBHDFS_OPTS="${LIBHDFS_OPTS:-} -Djava.library.path=${HADOOP_LIB_DIR}/native/" |
| LIBHDFS_OPTS+=" -XX:ErrorFile=${IMPALA_LOGS_DIR}/hs_err_pid%p.log" |
| |
| |
| # IMPALA-5080: Our use of PermGen space sometimes exceeds the default maximum while |
| # running tests that load UDF jars. |
| LIBHDFS_OPTS="${LIBHDFS_OPTS} -XX:MaxPermSize=128m" |
| |
| export CLASSPATH="$IMPALA_FE_DIR/target/dependency:${CLASSPATH:+:${CLASSPATH}}" |
| CLASSPATH="$IMPALA_FE_DIR/target/classes:$CLASSPATH" |
| CLASSPATH="$IMPALA_FE_DIR/src/test/resources:$CLASSPATH" |
| |
| # A marker in the environment to prove that we really did source this file |
| export IMPALA_CONFIG_SOURCED=1 |
| |
| echo "IMPALA_VERSION = $IMPALA_VERSION" |
| echo "IMPALA_HOME = $IMPALA_HOME" |
| echo "HADOOP_HOME = $HADOOP_HOME" |
| echo "HADOOP_CONF_DIR = $HADOOP_CONF_DIR" |
| echo "HADOOP_INCLUDE_DIR = $HADOOP_INCLUDE_DIR" |
| echo "HADOOP_LIB_DIR = $HADOOP_LIB_DIR" |
| echo "IMPALA_CLUSTER_NODES_DIR= $IMPALA_CLUSTER_NODES_DIR" |
| echo "HIVE_HOME = $HIVE_HOME" |
| echo "HIVE_CONF_DIR = $HIVE_CONF_DIR" |
| echo "HIVE_SRC_DIR = $HIVE_SRC_DIR" |
| echo "HBASE_HOME = $HBASE_HOME" |
| echo "HBASE_CONF_DIR = $HBASE_CONF_DIR" |
| echo "OZONE_HOME = $OZONE_HOME" |
| echo "OZONE_CONF_DIR = $OZONE_CONF_DIR" |
| echo "RANGER_HOME = $RANGER_HOME" |
| echo "RANGER_CONF_DIR = $RANGER_CONF_DIR " |
| echo "THRIFT_CPP_HOME = $THRIFT_CPP_HOME" |
| echo "THRIFT_JAVA_HOME = $THRIFT_JAVA_HOME" |
| echo "THRIFT_PY_HOME = $THRIFT_PY_HOME" |
| echo "CLASSPATH = $CLASSPATH" |
| echo "LIBHDFS_OPTS = $LIBHDFS_OPTS" |
| echo "JAVA_HOME = $JAVA_HOME" |
| echo "POSTGRES_JDBC_DRIVER = $POSTGRES_JDBC_DRIVER" |
| echo "IMPALA_TOOLCHAIN = $IMPALA_TOOLCHAIN" |
| echo "IMPALA_TOOLCHAIN_PACKAGES_HOME = $IMPALA_TOOLCHAIN_PACKAGES_HOME" |
| echo "METASTORE_DB = $METASTORE_DB" |
| echo "DOWNLOAD_CDH_COMPONENTS = $DOWNLOAD_CDH_COMPONENTS" |
| echo "IMPALA_MAVEN_OPTIONS = $IMPALA_MAVEN_OPTIONS" |
| echo "IMPALA_TOOLCHAIN_HOST = $IMPALA_TOOLCHAIN_HOST" |
| echo "CDP_BUILD_NUMBER = $CDP_BUILD_NUMBER" |
| echo "CDP_COMPONENTS_HOME = $CDP_COMPONENTS_HOME" |
| if $USE_APACHE_COMPONENTS; then |
| echo "APACHE_MIRROR = $APACHE_MIRROR" |
| echo "APACHE_COMPONENTS_HOME = $APACHE_COMPONENTS_HOME" |
| fi |
| echo "IMPALA_HADOOP_VERSION = $IMPALA_HADOOP_VERSION" |
| echo "IMPALA_AVRO_JAVA_VERSION= $IMPALA_AVRO_JAVA_VERSION" |
| echo "IMPALA_PARQUET_VERSION = $IMPALA_PARQUET_VERSION" |
| echo "IMPALA_HIVE_VERSION = $IMPALA_HIVE_VERSION" |
| echo "IMPALA_TEZ_VERSION = $IMPALA_TEZ_VERSION" |
| echo "IMPALA_HBASE_VERSION = $IMPALA_HBASE_VERSION" |
| echo "IMPALA_OZONE_VERSION = $IMPALA_OZONE_VERSION" |
| echo "IMPALA_HUDI_VERSION = $IMPALA_HUDI_VERSION" |
| echo "IMPALA_KUDU_VERSION = $IMPALA_KUDU_VERSION" |
| echo "IMPALA_RANGER_VERSION = $IMPALA_RANGER_VERSION" |
| echo "IMPALA_ICEBERG_VERSION = $IMPALA_ICEBERG_VERSION" |
| echo "IMPALA_COS_VERSION = $IMPALA_COS_VERSION" |
| echo "IMPALA_OBS_VERSION = $IMPALA_OBS_VERSION" |
| echo "IMPALA_SYSTEM_PYTHON2 = $IMPALA_SYSTEM_PYTHON2" |
| echo "IMPALA_SYSTEM_PYTHON3 = $IMPALA_SYSTEM_PYTHON3" |
| echo "IMPALA_BUILD_THREADS = $IMPALA_BUILD_THREADS" |
| echo "NUM_CONCURRENT_TESTS = $NUM_CONCURRENT_TESTS" |
| |
| # Kerberos things. If the cluster exists and is kerberized, source |
| # the required environment. This is required for any hadoop tool to |
| # work. Note that if impala-config.sh is sourced before the |
| # kerberized cluster is created, it will have to be sourced again |
| # *after* the cluster is created in order to pick up these settings. |
| export MINIKDC_ENV="${IMPALA_HOME}/testdata/bin/minikdc_env.sh" |
| if "${CLUSTER_DIR}/admin" is_kerberized || |
| ( ! "${CLUSTER_DIR}/admin" cluster_exists && [[ "$IMPALA_KERBERIZE" == "true" ]] ); then |
| |
| . "${MINIKDC_ENV}" |
| echo " *** This cluster is kerberized ***" |
| echo "KRB5_KTNAME = $KRB5_KTNAME" |
| echo "KRB5_CONFIG = $KRB5_CONFIG" |
| echo "KRB5_TRACE = ${KRB5_TRACE:-}" |
| echo "HADOOP_OPTS = $HADOOP_OPTS" |
| echo " *** This cluster is kerberized ***" |
| else |
| # If the cluster *isn't* kerberized, ensure that the environment isn't |
| # polluted with kerberos items that might screw us up. We go through |
| # everything set in the minikdc environment and explicitly unset it. |
| unset `grep export "${MINIKDC_ENV}" | sed "s/.*export \([^=]*\)=.*/\1/" \ |
| | sort | uniq` |
| fi |
| |
| # Check for minimum required Java version |
| # Only issue Java version warning when running Java 7. |
| if $JAVA -version 2>&1 | grep -q 'java version "1.7'; then |
| cat << EOF |
| |
| WARNING: Your development environment is configured for Hadoop 3 and Java 7. Hadoop 3 |
| requires at least Java 8. Your JAVA binary currently points to $JAVA |
| and reports the following version: |
| |
| EOF |
| $JAVA -version |
| echo |
| fi |