IMPALA-1071: Distributable python package for impala-shell
The patch adds a set of scripts for converting the impala-shell
into a true distributable python package. The package can be
installed using familiar python commands, e.g.:
$ python setup.py (install|develop)
or
$ pip install -e /path/to/dist/dir
The entry point script, make_python_package.sh, will run as a
part of the standard sequence of steps that results from calling
buildall.sh, and will produce a gzipped tarball inside of
Impala/shell/dist as an artifact. Thereafter, make_python_package.sh
can be run manually any time.
The expectation is that an official maintainer would need to manually
upload official releases to the Python Package Index as appropriate.
Change-Id: Ib8c745bddddf6a16f0c039430152745a2f00e044
Reviewed-on: http://gerrit.cloudera.org:8080/14181
Reviewed-by: David Knupp <dknupp@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6d72430..c355218 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -421,6 +421,10 @@
COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
)
+add_custom_target(shell_pypi_package DEPENDS shell_tarball
+ COMMAND "DIST_DIR=${CMAKE_SOURCE_DIR}/shell/dist CLEAN_DIST=true ${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
+)
+
add_custom_target(cscope ALL DEPENDS gen-deps
COMMAND "${CMAKE_SOURCE_DIR}/bin/gen-cscope.sh"
)
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 9e15b0a..aefaeef 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -24,6 +24,8 @@
www/index.html
lib/python/impala_py_lib/__init__.py
lib/python/impala_py_lib/jenkins/__init__.py
+shell/packaging/MANIFEST.in
+shell/packaging/requirements.txt
# See $IMPALA_HOME/LICENSE.txt
be/src/gutil/*
@@ -91,6 +93,7 @@
be/src/thirdparty/pcg-cpp-0.98/README.md
lib/python/README.md
lib/python/impala_py_lib/gdb/README.md
+shell/packaging/README.md
# http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a
# source header would cause the tests to fail."
diff --git a/shell/impala_client.py b/shell/impala_client.py
index 6761e8e..4dd22a8 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -939,6 +939,8 @@
if t.type == TApplicationException.UNKNOWN_METHOD:
raise MissingThriftMethodException(t.message)
raise
+ except TTransportException as e:
+ raise DisconnectedException("Error communicating with impalad: %s" % e)
return (resp.version, resp.webserver_address)
def _create_query_req(self, query_str, set_query_options):
@@ -1094,4 +1096,39 @@
if t.type == TApplicationException.UNKNOWN_METHOD:
raise MissingThriftMethodException(t.message)
raise RPCException("Application Exception : %s" % t)
+ except Exception as e:
+ # This final except clause should ONLY be exercised in the case of Impala
+ # shell being installed as a standalone python package from public PyPI,
+ # rather than being included as part of a typical Impala deployment.
+ #
+ # Essentially, it's a hack that is required due to issues stemming from
+ # IMPALA-6808. Because of the way the Impala python environment has been
+ # somewhat haphazardly constructed, we end up polluting the top level Impala
+ # python environment with modules that should really be sub-modules. One of
+ # the principal places this occurs is with the various modules required by
+ # the Impala shell. This isn't a concern when the shell is invoked via a
+ # specially installed version of python that belongs to Impala, but it does
+ # become an issue when the shell is being run using the system python.
+ #
+ # When we install the shell as a standalone package, we need to construct
+ # it in such a way that all of the internal modules are contained within
+ # a top-level impala_shell namespace. However, this then breaks various
+ # imports and, in this case, exception handling in the original code.
+ # As far as I can tell, there's no clean way to address this without fully
+ # resolving IMPALA-6808.
+ #
+ # Without taking some additional measure here to recognize certain common
+ # exceptions, especially Beeswax exceptions raised by RPC calls, when
+ # errors occur during a standalone shell session, we wind up falling
+ # entirely through this block and returning nothing to the caller (which
+ # happens to be the primary command loop in impala_shell.py). This in turn
+ # has the result of disconnecting the shell in the case of, say, even simple
+ # typos in database or table names.
+ if suppress_error_on_cancel and self.is_query_cancelled:
+ raise QueryCancelledByShellException()
+ else:
+ if "BeeswaxException" in str(e):
+ raise RPCException("ERROR: %s" % e.message)
+ if "QueryNotFoundException" in str(e):
+ raise QueryStateException('Error: Stale query handle')
diff --git a/shell/packaging/MANIFEST.in b/shell/packaging/MANIFEST.in
new file mode 100644
index 0000000..ec0d80f
--- /dev/null
+++ b/shell/packaging/MANIFEST.in
@@ -0,0 +1,3 @@
+include *.txt *.md *.py
+recursive-include impala_shell *.py
+recursive-exclude impala_shell *.pyc
diff --git a/shell/packaging/README.md b/shell/packaging/README.md
new file mode 100644
index 0000000..cd40b12
--- /dev/null
+++ b/shell/packaging/README.md
@@ -0,0 +1,73 @@
+# Impala Interactive Shell
+
+You can use the Impala shell tool (impala-shell) to connect to an Impala
+service. The shell allows you to set up databases and tables, insert data,
+and issue queries. For ad hoc queries and exploration, you can submit SQL
+statements in an interactive session. The impala-shell interpreter accepts
+all the same SQL statements listed in
+[Impala SQL Statements](http://impala.apache.org/docs/build/html/topics/impala_langref_sql.html),
+plus some shell-only commands that you can use for tuning performance and
+diagnosing problems.
+
+To automate your work, you can specify command-line options to process a single
+statement or a script file. (Other avenues for Impala automation via python
+are provided by Impyla or ODBC.)
+
+## Installing
+
+```
+$ pip install impala-shell
+```
+
+## Online documentation
+
+* [Impala Shell Documentation](http://impala.apache.org/docs/build/html/topics/impala_impala_shell.html)
+* [Apache Impala Documentation](http://impala.apache.org/impala-docs.html)
+
+## Quickstart
+
+### Non-interactive mode
+
+Processing a single query, e.g., ```show tables```:
+
+```
+$ impala-shell -i impalad-host.domain.com -d some_database -q 'show tables'
+```
+
+Processing a text file with a series of queries:
+
+```
+$ impala-shell -i impalad-host.domain.com -d some_database -f /path/to/queries.sql
+```
+
+### Launching the interactive shell
+
+To connect to an impalad host at the default service port (21000):
+
+```
+$ impala-shell -i impalad-host.domain.com
+Starting Impala Shell without Kerberos authentication
+Connected to impalad-host.domain.com:21000
+Server version: impalad version 2.11.0-SNAPSHOT RELEASE (build d4596f9ca3ea32a8008cdc809a7ac9a3dea47962)
+***********************************************************************************
+Welcome to the Impala shell.
+(Impala Shell v3.0.0-SNAPSHOT (73e90d2) built on Thu Mar 8 00:59:00 PST 2018)
+
+The '-B' command line flag turns off pretty-printing for query results. Use this
+flag to remove formatting from results you want to save for later, or to benchmark
+Impala.
+***********************************************************************************
+[impalad-host.domain.com:21000] >
+```
+
+### Launching the interactive shell (secure mode)
+
+To connect to a secure host using kerberos and SSL:
+
+```
+$ impala-shell -k --ssl -i impalad-secure-host.domain.com
+```
+
+### Disconnecting
+
+To exit the shell when running interactively, press ```Ctrl-D``` at the shell prompt.
diff --git a/shell/packaging/__init__.py b/shell/packaging/__init__.py
new file mode 100644
index 0000000..43e0baa
--- /dev/null
+++ b/shell/packaging/__init__.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from os.path import dirname, abspath
+import sys
+
+# When installing the python shell as a standalone package, this __init__ is
+# used to workaround the issues stemming from IMPALA-6808. Because of the way
+# the Impala python environment has been somewhat haphazardly constructed in
+# a deployed cluster, it ends up being "polluted" with top-level modules that
+# should really be sub-modules. One of the principal places this occurs is with
+# the various modules required by the Impala shell. This isn't a concern when
+# the shell is invoked via a specially installed version of python that belongs
+# to Impala, but it does become an issue when the shell is being run using the
+# system python.
+#
+# If we want to install the shell as a standalone package, we need to construct
+# it in such a way that all of the internal modules are contained within a
+# top-level impala_shell namespace. However, this then breaks various imports
+# throughout the Impala shell code. The way this file corrects that is to add
+# the impala_shell directory to PYTHONPATH only when the shell is invoked. As
+# far as I can tell, there's no cleaner way to address this without fully
+# resolving IMPALA-6808.
+impala_shell_dir = dirname(abspath(__file__))
+sys.path.append(impala_shell_dir)
diff --git a/shell/packaging/make_python_package.sh b/shell/packaging/make_python_package.sh
new file mode 100755
index 0000000..ba95148
--- /dev/null
+++ b/shell/packaging/make_python_package.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ----------------------------------------------------------------------
+# This script is invoked during the Impala build process, and creates
+# a distributable python package of the Impala shell. The resulting
+# archive will be saved to:
+#
+# ${IMPALA_HOME}/shell/dist/impala_shell-<version>.tar.gz
+#
+# Until the thrift-generated python files in ${IMPALA_HOME}/shell/gen-py
+# have been created by the build process, this script will not work.
+# It also relies upon the impala_build_version.py file created by the
+# parent packaging script, ${IMPALA_HOME}/shell/make_shell_tarball.sh,
+# which needs to be run before this script will work.
+#
+# After those files exist, however, this script can be run again at will.
+
+set -eu -o pipefail
+
+WORKING_DIR="$(cd "$(dirname "$0")" ; pwd -P )"
+SHELL_HOME="${IMPALA_HOME}"/shell
+STAGING_DIR="${WORKING_DIR}"/staging
+DIST_DIR="${DIST_DIR:-$WORKING_DIR/dist}"
+PACKAGE_DIR="${STAGING_DIR}"/impala_shell_package
+MODULE_LIB_DIR="${PACKAGE_DIR}"/impala_shell
+NO_CLEAN_DIST="${NO_CLEAN_DIST:-}"
+
+assemble_package_files() {
+ mkdir -p "${MODULE_LIB_DIR}"
+
+ cp -r "${SHELL_HOME}/gen-py"/* "${MODULE_LIB_DIR}"
+ cp -r "${THRIFT_HOME}/python/lib/python2.7/site-packages/thrift" "${MODULE_LIB_DIR}"
+
+ cp "${WORKING_DIR}/__init__.py" "${MODULE_LIB_DIR}"
+ cp "${SHELL_HOME}/impala_shell.py" "${MODULE_LIB_DIR}"
+ cp "${SHELL_HOME}/impala_client.py" "${MODULE_LIB_DIR}"
+ cp "${SHELL_HOME}/option_parser.py" "${MODULE_LIB_DIR}"
+ cp "${SHELL_HOME}/shell_output.py" "${MODULE_LIB_DIR}"
+ cp "${SHELL_HOME}/impala_shell_config_defaults.py" "${MODULE_LIB_DIR}"
+ cp "${SHELL_HOME}/TSSLSocketWithWildcardSAN.py" "${MODULE_LIB_DIR}"
+
+ cp "${SHELL_HOME}/packaging/README.md" "${PACKAGE_DIR}"
+ cp "${SHELL_HOME}/packaging/MANIFEST.in" "${PACKAGE_DIR}"
+ cp "${SHELL_HOME}/packaging/requirements.txt" "${PACKAGE_DIR}"
+ cp "${SHELL_HOME}/packaging/setup.py" "${PACKAGE_DIR}"
+
+ cp "${IMPALA_HOME}/LICENSE.txt" "${PACKAGE_DIR}"
+}
+
+create_distributable_python_package() {
+ # Generate a new python package tarball in ${IMPALA_HOME}/shell/dist
+ if [[ "${NO_CLEAN_DIST}" != "true" ]]; then
+ rm -rf "${DIST_DIR}"
+ fi
+
+ mkdir -p "${DIST_DIR}"
+
+ pushd "${PACKAGE_DIR}"
+ echo "Building package..."
+ PACKAGE_TYPE="${PACKAGE_TYPE:-}" OFFICIAL="${OFFICIAL:-}" \
+ python setup.py sdist --dist-dir "${DIST_DIR}"
+ popd
+
+ if [[ "${NO_CLEAN_DIST}" != "true" ]]; then
+ rm -rf "${STAGING_DIR}"
+ fi
+}
+
+assemble_package_files
+create_distributable_python_package
diff --git a/shell/packaging/requirements.txt b/shell/packaging/requirements.txt
new file mode 100644
index 0000000..32aef56
--- /dev/null
+++ b/shell/packaging/requirements.txt
@@ -0,0 +1,8 @@
+bitarray==1.0.1
+prettytable==0.7.1
+sasl==0.2.1
+setuptools>=36.8.0
+six==1.11.0
+sqlparse==0.1.19
+thrift==0.9.3
+thrift_sasl==0.2.1
diff --git a/shell/packaging/setup.py b/shell/packaging/setup.py
new file mode 100644
index 0000000..173e0d8
--- /dev/null
+++ b/shell/packaging/setup.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+"""Set up the Impala shell python package."""
+
+import datetime
+import os
+import re
+import sys
+import time
+
+from impala_shell import impala_build_version
+from setuptools import find_packages, setup
+from textwrap import dedent
+
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def parse_requirements(requirements_file='requirements.txt'):
+ """
+ Parse requirements from the requirements file, stripping comments.
+
+ Args:
+ requirements_file: path to a requirements file
+
+ Returns:
+ a list of python packages
+ """
+ lines = []
+ with open(requirements_file) as reqs:
+ for _ in reqs:
+ line = _.split('#')[0]
+ if line.strip():
+ lines.append(line)
+ return lines
+
+
+def get_version():
+ """Generate package version string when calling 'setup.py'.
+
+ When setup.py is being used to CREATE a distribution, e.g., via setup.py sdist
+ or setup.py bdist, then use the output from impala_build_version.get_version(),
+ and append modifiers as specified by the RELEASE_TYPE and OFFICIAL environment
+ variables. By default, the package created will be a dev release, designated
+ by timestamp. For example, if get_version() returns the string 3.0.0-SNAPSHOT,
+ the package version may be something like 3.0.0.dev20180322154653.
+
+ It's also possible set an evironment variable for BUILD_VERSION to override the
+ default build value returned from impala_build_version.get_version().
+
+ E.g., to specify an offical 3.4 beta 2 release (3.4b2), one would call:
+
+ BUILD_VERSION=3.4 RELEASE_TYPE=b2 OFFICIAL=true python setup.py sdist
+
+ The generated version string will be written to a version.txt file to be
+ referenced when the distribution is installed.
+
+ When setup.py is invoked during installation, e.g., via pip install or
+ setup.py install, read the package version from the version.txt file, which
+ is presumed to contain a single line containing a valid PEP-440 version string.
+ The file should have been generated when the distribution being installed was
+ created. (Although a version.txt file can also be created manually.)
+
+ See https://www.python.org/dev/peps/pep-0440/ for more info on python
+ version strings.
+
+ Returns:
+ A package version string compliant with PEP-440
+ """
+ version_file = os.path.join(CURRENT_DIR, 'version.txt')
+
+ if not os.path.isfile(version_file):
+ # If setup.py is being executed to create a distribution, e.g., via setup.py
+ # sdist or setup.py bdist, then derive the version and WRITE the version.txt
+ # file that will later be used for installations.
+ if os.getenv('BUILD_VERSION') is not None:
+ package_version = os.getenv('BUILD_VERSION')
+ else:
+ version_match = re.search('\d+\.\d+\.\d+', impala_build_version.get_version())
+ if version_match is None:
+ sys.exit('Unable to acquire Impala version.')
+ package_version = version_match.group(0)
+
+ # packages can be marked as alpha, beta, or rc RELEASE_TYPE
+ release_type = os.getenv('RELEASE_TYPE')
+ if release_type:
+ if not re.match('(a|b|rc)\d+?', release_type):
+ msg = """\
+ RELEASE_TYPE \'{0}\' does not conform to any PEP-440 release format:
+
+ aN (for alpha releases)
+ bN (for beta releases)
+ rcN (for release candidates)
+
+ where N is the number of the release"""
+ sys.exit(dedent(msg).format(release_type))
+ package_version += release_type
+
+ # packages that are not marked OFFICIAL have ".dev" + a timestamp appended
+ if os.getenv('OFFICIAL') != 'true':
+ epoch_t = time.time()
+ ts_fmt = '%Y%m%d%H%M%S'
+ timestamp = datetime.datetime.fromtimestamp(epoch_t).strftime(ts_fmt)
+ package_version = '{0}.dev{1}'.format(package_version, timestamp)
+
+ with open('version.txt', 'w') as version_file:
+ version_file.write(package_version)
+ else:
+ # If setup.py is being invoked during installation, e.g., via pip install
+ # or setup.py install, we expect a version.txt file from which to READ the
+ # version string.
+ with open(version_file) as version_file:
+ package_version = version_file.readline()
+
+ return package_version
+
+
+setup(
+ name='impala_shell',
+ python_requires='>2.6, <3.0.0',
+ version=get_version(),
+ description='Impala Shell',
+ long_description_content_type='text/markdown',
+ long_description=open('README.md').read(),
+ author="Impala Dev",
+ author_email='dev@impala.apache.org',
+ url='https://impala.apache.org/',
+ license='Apache Software License',
+ packages=find_packages(),
+ include_package_data=True,
+ install_requires=parse_requirements(),
+ entry_points={
+ 'console_scripts': [
+ 'impala-shell = impala_shell.impala_shell:impala_shell_main'
+ ]
+ },
+ classifiers=[
+ 'Development Status :: 5 - Production/Stable',
+ 'Environment :: Console',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: End Users/Desktop',
+ 'Intended Audience :: Science/Research',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Operating System :: MacOS :: MacOS X',
+ 'Operating System :: POSIX :: Linux',
+ 'Programming Language :: Python :: 2 :: Only',
+ 'Programming Language :: Python :: 2.6',
+ 'Programming Language :: Python :: 2.7',
+ 'Topic :: Database :: Front-Ends'
+ ]
+)