blob: 64235349f66a479fd8a8ed71158d3e9f4754ec8a [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Impala tests for queries that query metadata and set session settings
import pytest
import re
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import (SkipIfIsilon, SkipIfS3, SkipIfABFS, SkipIfADLS,
SkipIfLocal, SkipIfCatalogV2)
from tests.common.test_dimensions import ALL_NODES_ONLY
from tests.common.test_dimensions import create_exec_option_dimension
from tests.common.test_dimensions import create_uncompressed_text_dimension
from tests.util.filesystem_utils import get_fs_path
# TODO: For these tests to pass, all table metadata must be created exhaustively.
# the tests should be modified to remove that requirement.
class TestMetadataQueryStatements(ImpalaTestSuite):
CREATE_DATA_SRC_STMT = ("CREATE DATA SOURCE %s LOCATION '" +
get_fs_path("/test-warehouse/data-sources/test-data-source.jar") +
"' CLASS 'org.apache.impala.extdatasource.AllTypesDataSource' API_VERSION 'V1'")
DROP_DATA_SRC_STMT = "DROP DATA SOURCE IF EXISTS %s"
TEST_DATA_SRC_NAMES = ["show_test_ds1", "show_test_ds2"]
AVRO_SCHEMA_LOC = get_fs_path("/test-warehouse/avro_schemas/functional/alltypes.json")
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestMetadataQueryStatements, cls).add_test_dimensions()
sync_ddl_opts = [0, 1]
if cls.exploration_strategy() != 'exhaustive':
# Cut down on test runtime by only running with SYNC_DDL=0
sync_ddl_opts = [0]
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
cluster_sizes=ALL_NODES_ONLY,
disable_codegen_options=[False],
batch_sizes=[0],
sync_ddl=sync_ddl_opts))
cls.ImpalaTestMatrix.add_dimension(
create_uncompressed_text_dimension(cls.get_workload()))
def test_use(self, vector):
self.run_test_case('QueryTest/use', vector)
def test_show(self, vector):
self.run_test_case('QueryTest/show', vector)
def test_show_stats(self, vector):
self.run_test_case('QueryTest/show-stats', vector, "functional")
def test_describe_path(self, vector, unique_database):
self.run_test_case('QueryTest/describe-path', vector, unique_database)
# Missing Coverage: Describe formatted compatibility between Impala and Hive when the
# data doesn't reside in hdfs.
@SkipIfIsilon.hive
@SkipIfS3.hive
@SkipIfABFS.hive
@SkipIfADLS.hive
@SkipIfLocal.hive
def test_describe_formatted(self, vector, unique_database):
# For describe formmated, we try to match Hive's output as closely as possible.
# However, we're inconsistent with our handling of NULLs vs theirs - Impala sometimes
# specifies 'NULL' where Hive uses an empty string, and Hive somtimes specifies 'null'
# with padding where Impala uses a sequence of blank spaces - and for now
# we want to leave it that way to not affect users who rely on this output.
def compare_describe_formatted(impala_results, hive_results):
for impala, hive in zip(re.split(',|\n', impala_results),
re.split(',|\n', hive_results)):
if impala != hive:
# If they don't match, check if it's because of the inconsistent null handling.
impala = impala.replace(' ', '').lower()
hive = hive.replace(' ', '').lower()
if not ((impala == "'null'" and hive == "''") or
(impala == "''" and hive == "'null'")):
return False
return True
# Describe a partitioned table.
self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes",
compare=compare_describe_formatted)
self.exec_and_compare_hive_and_impala_hs2(
"describe formatted functional_text_lzo.alltypes",
compare=compare_describe_formatted)
# Describe an unpartitioned table.
self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem",
compare=compare_describe_formatted)
self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.jointbl",
compare=compare_describe_formatted)
# Create and describe an unpartitioned and partitioned Avro table created
# by Impala without any column definitions.
# TODO: Instead of creating new tables here, change one of the existing
# Avro tables to be created without any column definitions.
self.client.execute("create database if not exists %s" % unique_database)
self.client.execute((
"create table %s.%s with serdeproperties ('avro.schema.url'='%s') stored as avro"
% (unique_database, "avro_alltypes_nopart", self.AVRO_SCHEMA_LOC)))
self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_nopart",
compare=compare_describe_formatted)
self.client.execute((
"create table %s.%s partitioned by (year int, month int) "
"with serdeproperties ('avro.schema.url'='%s') stored as avro"
% (unique_database, "avro_alltypes_part", self.AVRO_SCHEMA_LOC)))
self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_part",
compare=compare_describe_formatted)
self.exec_and_compare_hive_and_impala_hs2(\
"describe formatted functional.alltypes_view_sub",
compare=compare_describe_formatted)
@pytest.mark.execute_serially # due to data src setup/teardown
@SkipIfCatalogV2.data_sources_unsupported()
def test_show_data_sources(self, vector):
try:
self.__create_data_sources()
self.run_test_case('QueryTest/show-data-sources', vector)
finally:
self.__drop_data_sources()
def __drop_data_sources(self):
for name in self.TEST_DATA_SRC_NAMES:
self.client.execute(self.DROP_DATA_SRC_STMT % (name,))
def __create_data_sources(self):
self.__drop_data_sources()
for name in self.TEST_DATA_SRC_NAMES:
self.client.execute(self.CREATE_DATA_SRC_STMT % (name,))
@SkipIfS3.hive
@SkipIfABFS.hive
@SkipIfADLS.hive
@SkipIfIsilon.hive
@SkipIfLocal.hive
@pytest.mark.execute_serially # because of use of hardcoded database
def test_describe_db(self, vector, cluster_properties):
self.__test_describe_db_cleanup()
try:
self.client.execute("create database impala_test_desc_db1")
self.client.execute("create database impala_test_desc_db2 "
"comment 'test comment'")
self.client.execute("create database impala_test_desc_db3 "
"location '" + get_fs_path("/testdb") + "'")
self.client.execute("create database impala_test_desc_db4 comment 'test comment' "
"location \"" + get_fs_path("/test2.db") + "\"")
self.run_stmt_in_hive("create database hive_test_desc_db comment 'test comment' "
"with dbproperties('pi' = '3.14', 'e' = '2.82')")
if cluster_properties.is_event_polling_enabled():
# Using HMS event processor - wait until the database shows up.
self.wait_for_db_to_appear("hive_test_desc_db", timeout_s=30)
else:
# Invalidate metadata to pick up hive-created db.
self.client.execute("invalidate metadata")
self.run_test_case('QueryTest/describe-db', vector)
if not cluster_properties.is_catalog_v2_cluster():
self.run_test_case('QueryTest/describe-hive-db', vector)
finally:
self.__test_describe_db_cleanup()
def __test_describe_db_cleanup(self):
self.cleanup_db('hive_test_desc_db')
self.cleanup_db('impala_test_desc_db1')
self.cleanup_db('impala_test_desc_db2')
self.cleanup_db('impala_test_desc_db3')
self.cleanup_db('impala_test_desc_db4')