| #!/usr/bin/env impala-python |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # Runs the Impala query tests, first executing the tests that cannot be run in parallel |
| # and then executing the remaining tests in parallel. All additional command line options |
| # are passed to py.test. |
| import itertools |
| import multiprocessing |
| import os |
| import pytest |
| import sys |
| |
| # We whitelist valid test directories. If a new test directory is added, update this. |
| VALID_TEST_DIRS = ['failure', 'query_test', 'stress', 'unittests', 'aux_query_tests', |
| 'shell', 'hs2', 'catalog_service', 'metadata', 'data_errors', |
| 'statestore'] |
| |
| TEST_DIR = os.path.join(os.environ['IMPALA_HOME'], 'tests') |
| TEST_RESULT_DIR = os.path.join(os.environ['IMPALA_EE_TEST_LOGS_DIR'], 'results') |
| |
| # Arguments that control output logging. If additional default arguments are needed they |
| # should go in the pytest.ini file. |
| LOGGING_ARGS = '--junitxml=%(result_dir)s/TEST-impala-%(log_name)s.xml '\ |
| '--resultlog=%(result_dir)s/TEST-impala-%(log_name)s.log' |
| |
| # Default the number of concurrent tests defaults to the cpu cores in the system. |
| # This can be overridden by setting the NUM_CONCURRENT_TESTS environment variable. |
| NUM_CONCURRENT_TESTS = multiprocessing.cpu_count() |
| if 'NUM_CONCURRENT_TESTS' in os.environ: |
| NUM_CONCURRENT_TESTS = int(os.environ['NUM_CONCURRENT_TESTS']) |
| |
| # Default the number of stress clinets to 4x the number of CPUs (but not exceeding the |
| # default max # of concurrent connections) |
| # This can be overridden by setting the NUM_STRESS_CLIENTS environment variable. |
| # TODO: fix the stress test so it can start more clients than available connections |
| # without deadlocking (e.g. close client after each test instead of on test class |
| # teardown). |
| NUM_STRESS_CLIENTS = min(multiprocessing.cpu_count() * 4, 64) |
| if 'NUM_STRESS_CLIENTS' in os.environ: |
| NUM_STRESS_CLIENTS = int(os.environ['NUM_STRESS_CLIENTS']) |
| |
| |
| class TestExecutor: |
| def __init__(self, exit_on_error=True): |
| self._exit_on_error = exit_on_error |
| self.tests_failed = False |
| |
| def run_tests(self, args): |
| try: |
| exit_code = pytest.main(args) |
| except: |
| sys.stderr.write("Unexpected exception with pytest {}".format(args)) |
| raise |
| if exit_code != 0 and self._exit_on_error: |
| sys.exit(exit_code) |
| self.tests_failed = exit_code != 0 or self.tests_failed |
| |
| |
| def build_test_args(log_base_name, valid_dirs): |
| """ |
| Modify and return the command line arguments that will be passed to py.test. |
| |
| Args: |
| log_base_name: the base name for the log file to write |
| valid_dirs: a white list of sub-directories with desired tests (i.e, those |
| that will not get flagged with --ignore before py.test is called.) |
| |
| Return: |
| a modified command line for py.test |
| |
| For most test stages (e.g., serial, parallel), we augment the given command |
| line arguments with a list of directories to ignore. However, when running the |
| metric verification tests at the end of the test run: |
| |
| - verifiers.test_verify_metrics.TestValidateMetrics.test_metrics_are_zero |
| - verifiers.test_verify_metrics.TestValidateMetrics.test_num_unused_buffers |
| |
| then we instead need to filter out args that specifiy other tests (otherwise, |
| they will be run again), but still retain the basic config args. |
| """ |
| logging_args = LOGGING_ARGS % {'result_dir': TEST_RESULT_DIR, |
| 'log_name': log_base_name} |
| |
| # The raw command line arguments need to be modified because of the way our |
| # repo is organized. We have several non-test directories and files in our |
| # tests/ path, which causes auto-discovery problems for pytest -- i.e., pytest |
| # will futiley try to execute them as tests, resulting in misleading failures. |
| # (There is a JIRA filed to restructure this: IMPALA-4417.) |
| # |
| # e.g. --ignore="comparison" --ignore="util" --ignore=etc... |
| ignored_dirs = build_ignore_dir_arg_list(valid_dirs=valid_dirs) |
| |
| if valid_dirs != ['verifiers']: |
| # This isn't the metrics verification stage yet, so after determining the |
| # logging params and which sub-directories within tests/ to ignore, just tack |
| # on any other args from sys.argv -- excluding sys.argv[0], which of course |
| # is the script name |
| test_args = '%s %s %s' % (ignored_dirs, logging_args, ' '.join(sys.argv[1:])) |
| else: |
| # When filtering, we need to account for the fact that '--foo bar' and |
| # '--foo=bar' might be supplied by the user, as well as random options. E.g., |
| # if the user specified the following on the command line: |
| # |
| # 'run-tests.py --arg1 value1 --random_opt --arg2=value2' |
| # |
| # we want an iterable that, if unpacked as a list, would look like: |
| # |
| # [arg1, value1, random_opt, arg2, value2] |
| # |
| raw_args = itertools.chain(*[arg.split('=') for arg in sys.argv[1:]]) |
| kept_args = [] |
| |
| for arg in raw_args: |
| try: |
| pytest.config.getvalue(arg.strip('-')) # Raises ValueError if invalid arg |
| kept_args += [arg, str(raw_args.next())] |
| except ValueError: |
| # For any arg that's not a required pytest config arg, we can filter it out |
| continue |
| test_args = '%s %s %s' % (ignored_dirs, logging_args, ' '.join(kept_args)) |
| |
| return test_args |
| |
| |
| def build_ignore_dir_arg_list(valid_dirs): |
| """ Builds a list of directories to ignore """ |
| subdirs = [subdir for subdir in os.listdir(TEST_DIR) if os.path.isdir(subdir)] |
| # In bash, in single-quoted strings, single quotes cannot appear - not even escaped! |
| # Instead, one must close the string with a single-quote, insert a literal single-quote |
| # (escaped, so bash doesn't think you're starting a new string), then start your |
| # single-quoted string again. That works out to the four-character sequence '\''. |
| return ' '.join(["--ignore='%s'" % d.replace("'", "'\''") |
| for d in set(subdirs) - set(valid_dirs)]) |
| |
| |
| if __name__ == "__main__": |
| exit_on_error = '-x' in sys.argv or '--exitfirst' in sys.argv |
| test_executor = TestExecutor(exit_on_error=exit_on_error) |
| |
| # If the user is just asking for --help, just print the help test and then exit. |
| if '-h' in sys.argv[1:] or '--help' in sys.argv[1:]: |
| test_executor.run_tests(sys.argv[1:]) |
| sys.exit(0) |
| |
| os.chdir(TEST_DIR) |
| |
| # Create the test result directory if it doesn't already exist. |
| if not os.path.exists(TEST_RESULT_DIR): |
| os.makedirs(TEST_RESULT_DIR) |
| |
| # First run query tests that need to be executed serially |
| args = '-m "execute_serially" %s' % build_test_args('serial', VALID_TEST_DIRS) |
| test_executor.run_tests(args) |
| |
| # Run the stress tests tests |
| args = '-m "stress" -n %d %s' %\ |
| (NUM_STRESS_CLIENTS, build_test_args('stress', VALID_TEST_DIRS)) |
| test_executor.run_tests(args) |
| |
| # Run the remaining query tests in parallel |
| args = '-m "not execute_serially and not stress" -n %d %s' %\ |
| (NUM_CONCURRENT_TESTS, build_test_args('parallel', VALID_TEST_DIRS)) |
| test_executor.run_tests(args) |
| |
| # Finally, validate impalad/statestored metrics. |
| args = build_test_args(log_base_name='verify-metrics', valid_dirs=['verifiers']) |
| args += ' verifiers/test_verify_metrics.py' |
| test_executor.run_tests(args) |
| |
| if test_executor.tests_failed: |
| sys.exit(1) |