| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Targeted tests for date type. |
| |
| import pytest |
| from tests.common.file_utils import create_table_and_copy_files |
| from tests.common.impala_test_suite import ImpalaTestSuite |
| from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal |
| from tests.common.test_dimensions import (create_exec_option_dimension_from_dict, |
| create_client_protocol_dimension, hs2_parquet_constraint) |
| from tests.shell.util import ImpalaShell |
| |
| |
| class TestDateQueries(ImpalaTestSuite): |
| @classmethod |
| def get_workload(cls): |
| return 'functional-query' |
| |
| @classmethod |
| def add_test_dimensions(cls): |
| super(TestDateQueries, cls).add_test_dimensions() |
| cls.ImpalaTestMatrix.add_dimension( |
| create_exec_option_dimension_from_dict({ |
| 'batch_size': [0, 1], |
| 'disable_codegen': ['false', 'true'], |
| 'disable_codegen_rows_threshold': [0]})) |
| # DATE type is only supported for text, parquet and avro fileformat on HDFS and HBASE. |
| cls.ImpalaTestMatrix.add_constraint(lambda v: |
| v.get_value('table_format').file_format in ('text', 'hbase', 'parquet') |
| or (v.get_value('table_format').file_format == 'avro' |
| and v.get_value('table_format').compression_codec == 'snap')) |
| |
| # Run these queries through both beeswax and HS2 to get coverage of date returned |
| # via both protocols. |
| cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) |
| cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint) |
| |
| def test_queries(self, vector): |
| if vector.get_value('table_format').file_format == 'avro': |
| # Avro date test queries are in a separate test file. |
| # - Hive2 uses Julian Calendar for writing dates before 1582-10-15, whereas Impala |
| # uses proleptic Gregorian Calendar. This affects the results Impala gets when |
| # querying avro tables written by Hive2. |
| # - Hive3 on the other hand uses proleptic Gregorian Calendar to write dates. |
| self.run_test_case('QueryTest/avro_date', vector) |
| else: |
| self.run_test_case('QueryTest/date', vector) |
| |
| def test_partitioning(self, vector, unique_database): |
| """ Test partitioning by DATE. """ |
| # This test specifies databases explicitly. No need to execute it for anything other |
| # than text fileformat. |
| if vector.get_value('table_format').file_format != 'text': |
| pytest.skip() |
| self.run_test_case('QueryTest/date-partitioning', vector, use_db=unique_database) |
| |
| @SkipIfS3.qualified_path |
| @SkipIfABFS.qualified_path |
| @SkipIfADLS.qualified_path |
| @SkipIfLocal.qualified_path |
| def test_fileformat_support(self, vector, unique_database): |
| """ Test that scanning and writing DATE is supported for text and parquet tables. |
| Test that scanning DATE is supported for avro tables as well. |
| """ |
| # This test specifies databases and locations explicitly. No need to execute it for |
| # anything other than text fileformat on HDFS. |
| if vector.get_value('table_format').file_format != 'text': |
| pytest.skip() |
| |
| # Parquet table with date column. |
| TABLE_NAME = "parquet_date_tbl" |
| CREATE_SQL = "CREATE TABLE {0}.{1} (date_col DATE) STORED AS PARQUET".format( |
| unique_database, TABLE_NAME) |
| create_table_and_copy_files(self.client, CREATE_SQL, unique_database, TABLE_NAME, |
| ["/testdata/data/date_tbl.parquet"]) |
| # Avro table with date column. |
| TABLE_NAME = "avro_date_tbl" |
| CREATE_SQL = "CREATE TABLE {0}.{1} (date_col DATE) STORED AS AVRO".format( |
| unique_database, TABLE_NAME) |
| create_table_and_copy_files(self.client, CREATE_SQL, unique_database, TABLE_NAME, |
| ["/testdata/data/date_tbl.avro"]) |
| # Orc table with date column. |
| TABLE_NAME = "orc_date_tbl" |
| CREATE_SQL = "CREATE TABLE {0}.{1} (date_col DATE) STORED AS ORC".format( |
| unique_database, TABLE_NAME) |
| create_table_and_copy_files(self.client, CREATE_SQL, unique_database, TABLE_NAME, |
| ["/testdata/data/date_tbl.orc"]) |
| |
| # Partitioned table with parquet and avro partitions. |
| TABLE_NAME = "date_tbl" |
| CREATE_SQL = """CREATE TABLE {0}.{1} (date_col DATE) |
| PARTITIONED BY (date_part DATE)""".format(unique_database, TABLE_NAME) |
| self.client.execute(CREATE_SQL) |
| # Add partitions. |
| ADD_PART_SQL = """ALTER TABLE {0}.{1} ADD PARTITION (date_part='1899-12-31') |
| LOCATION '/test-warehouse/{0}.db/parquet_date_tbl' |
| PARTITION (date_part='1999-12-31') |
| LOCATION '/test-warehouse/{0}.db/avro_date_tbl' |
| """.format(unique_database, TABLE_NAME) |
| self.client.execute(ADD_PART_SQL) |
| # Parquet fileformat. |
| SET_PART_FF_SQL = """ALTER TABLE {0}.{1} PARTITION (date_part='1899-12-31') |
| SET FILEFORMAT PARQUET""".format(unique_database, TABLE_NAME) |
| self.client.execute(SET_PART_FF_SQL) |
| # Avro fileformat. |
| SET_PART_FF_SQL = """ALTER TABLE {0}.{1} PARTITION (date_part='1999-12-31') |
| SET FILEFORMAT AVRO""".format(unique_database, TABLE_NAME) |
| self.client.execute(SET_PART_FF_SQL) |
| # After adding the avro partition, metadata has to be invalidated, otherwise querying |
| # the table will fail with stale metadata error. |
| self.client.execute("INVALIDATE METADATA {0}.{1}".format(unique_database, TABLE_NAME)) |
| |
| # Test scanning/writing tables with different fileformats. |
| self.run_test_case('QueryTest/date-fileformat-support', vector, |
| use_db=unique_database) |