| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Impala tests for queries that query metadata and set session settings |
| |
| import pytest |
| import re |
| |
| from tests.beeswax.impala_beeswax import ImpalaBeeswaxException |
| from tests.common.impala_test_suite import ImpalaTestSuite |
| from tests.common.skip import (SkipIfIsilon, SkipIfS3, SkipIfABFS, SkipIfADLS, |
| SkipIfLocal, SkipIfCatalogV2) |
| from tests.common.test_dimensions import ALL_NODES_ONLY |
| from tests.common.test_dimensions import create_exec_option_dimension |
| from tests.common.test_dimensions import create_uncompressed_text_dimension |
| from tests.util.filesystem_utils import get_fs_path |
| |
| # TODO: For these tests to pass, all table metadata must be created exhaustively. |
| # the tests should be modified to remove that requirement. |
| class TestMetadataQueryStatements(ImpalaTestSuite): |
| |
| CREATE_DATA_SRC_STMT = ("CREATE DATA SOURCE %s LOCATION '" + |
| get_fs_path("/test-warehouse/data-sources/test-data-source.jar") + |
| "' CLASS 'org.apache.impala.extdatasource.AllTypesDataSource' API_VERSION 'V1'") |
| DROP_DATA_SRC_STMT = "DROP DATA SOURCE IF EXISTS %s" |
| TEST_DATA_SRC_NAMES = ["show_test_ds1", "show_test_ds2"] |
| AVRO_SCHEMA_LOC = get_fs_path("/test-warehouse/avro_schemas/functional/alltypes.json") |
| |
| @classmethod |
| def get_workload(self): |
| return 'functional-query' |
| |
| @classmethod |
| def add_test_dimensions(cls): |
| super(TestMetadataQueryStatements, cls).add_test_dimensions() |
| sync_ddl_opts = [0, 1] |
| if cls.exploration_strategy() != 'exhaustive': |
| # Cut down on test runtime by only running with SYNC_DDL=0 |
| sync_ddl_opts = [0] |
| |
| cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( |
| cluster_sizes=ALL_NODES_ONLY, |
| disable_codegen_options=[False], |
| batch_sizes=[0], |
| sync_ddl=sync_ddl_opts)) |
| cls.ImpalaTestMatrix.add_dimension( |
| create_uncompressed_text_dimension(cls.get_workload())) |
| |
| def test_use(self, vector): |
| self.run_test_case('QueryTest/use', vector) |
| |
| def test_show(self, vector): |
| self.run_test_case('QueryTest/show', vector) |
| |
| def test_show_stats(self, vector): |
| self.run_test_case('QueryTest/show-stats', vector, "functional") |
| |
| def test_describe_path(self, vector, unique_database): |
| self.run_test_case('QueryTest/describe-path', vector, unique_database) |
| |
| # Missing Coverage: Describe formatted compatibility between Impala and Hive when the |
| # data doesn't reside in hdfs. |
| @SkipIfIsilon.hive |
| @SkipIfS3.hive |
| @SkipIfABFS.hive |
| @SkipIfADLS.hive |
| @SkipIfLocal.hive |
| def test_describe_formatted(self, vector, unique_database): |
| # For describe formmated, we try to match Hive's output as closely as possible. |
| # However, we're inconsistent with our handling of NULLs vs theirs - Impala sometimes |
| # specifies 'NULL' where Hive uses an empty string, and Hive somtimes specifies 'null' |
| # with padding where Impala uses a sequence of blank spaces - and for now |
| # we want to leave it that way to not affect users who rely on this output. |
| def compare_describe_formatted(impala_results, hive_results): |
| for impala, hive in zip(re.split(',|\n', impala_results), |
| re.split(',|\n', hive_results)): |
| |
| if impala != hive: |
| # If they don't match, check if it's because of the inconsistent null handling. |
| impala = impala.replace(' ', '').lower() |
| hive = hive.replace(' ', '').lower() |
| if not ((impala == "'null'" and hive == "''") or |
| (impala == "''" and hive == "'null'")): |
| return False |
| return True |
| |
| # Describe a partitioned table. |
| self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes", |
| compare=compare_describe_formatted) |
| self.exec_and_compare_hive_and_impala_hs2( |
| "describe formatted functional_text_lzo.alltypes", |
| compare=compare_describe_formatted) |
| |
| # Describe an unpartitioned table. |
| self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem", |
| compare=compare_describe_formatted) |
| self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.jointbl", |
| compare=compare_describe_formatted) |
| |
| # Create and describe an unpartitioned and partitioned Avro table created |
| # by Impala without any column definitions. |
| # TODO: Instead of creating new tables here, change one of the existing |
| # Avro tables to be created without any column definitions. |
| self.client.execute("create database if not exists %s" % unique_database) |
| self.client.execute(( |
| "create table %s.%s with serdeproperties ('avro.schema.url'='%s') stored as avro" |
| % (unique_database, "avro_alltypes_nopart", self.AVRO_SCHEMA_LOC))) |
| self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_nopart", |
| compare=compare_describe_formatted) |
| |
| self.client.execute(( |
| "create table %s.%s partitioned by (year int, month int) " |
| "with serdeproperties ('avro.schema.url'='%s') stored as avro" |
| % (unique_database, "avro_alltypes_part", self.AVRO_SCHEMA_LOC))) |
| self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_part", |
| compare=compare_describe_formatted) |
| |
| self.exec_and_compare_hive_and_impala_hs2(\ |
| "describe formatted functional.alltypes_view_sub", |
| compare=compare_describe_formatted) |
| |
| @pytest.mark.execute_serially # due to data src setup/teardown |
| @SkipIfCatalogV2.data_sources_unsupported() |
| def test_show_data_sources(self, vector): |
| try: |
| self.__create_data_sources() |
| self.run_test_case('QueryTest/show-data-sources', vector) |
| finally: |
| self.__drop_data_sources() |
| |
| def __drop_data_sources(self): |
| for name in self.TEST_DATA_SRC_NAMES: |
| self.client.execute(self.DROP_DATA_SRC_STMT % (name,)) |
| |
| def __create_data_sources(self): |
| self.__drop_data_sources() |
| for name in self.TEST_DATA_SRC_NAMES: |
| self.client.execute(self.CREATE_DATA_SRC_STMT % (name,)) |
| |
| @SkipIfS3.hive |
| @SkipIfABFS.hive |
| @SkipIfADLS.hive |
| @SkipIfIsilon.hive |
| @SkipIfLocal.hive |
| @pytest.mark.execute_serially # because of use of hardcoded database |
| def test_describe_db(self, vector, cluster_properties): |
| self.__test_describe_db_cleanup() |
| try: |
| self.client.execute("create database impala_test_desc_db1") |
| self.client.execute("create database impala_test_desc_db2 " |
| "comment 'test comment'") |
| self.client.execute("create database impala_test_desc_db3 " |
| "location '" + get_fs_path("/testdb") + "'") |
| self.client.execute("create database impala_test_desc_db4 comment 'test comment' " |
| "location \"" + get_fs_path("/test2.db") + "\"") |
| self.run_stmt_in_hive("create database hive_test_desc_db comment 'test comment' " |
| "with dbproperties('pi' = '3.14', 'e' = '2.82')") |
| if cluster_properties.is_event_polling_enabled(): |
| # Using HMS event processor - wait until the database shows up. |
| self.wait_for_db_to_appear("hive_test_desc_db", timeout_s=30) |
| else: |
| # Invalidate metadata to pick up hive-created db. |
| self.client.execute("invalidate metadata") |
| self.run_test_case('QueryTest/describe-db', vector) |
| if not cluster_properties.is_catalog_v2_cluster(): |
| self.run_test_case('QueryTest/describe-hive-db', vector) |
| finally: |
| self.__test_describe_db_cleanup() |
| |
| def __test_describe_db_cleanup(self): |
| self.cleanup_db('hive_test_desc_db') |
| self.cleanup_db('impala_test_desc_db1') |
| self.cleanup_db('impala_test_desc_db2') |
| self.cleanup_db('impala_test_desc_db3') |
| self.cleanup_db('impala_test_desc_db4') |