| ==== |
| ---- QUERY |
| # IMPALA-867: Test computing stats on Avro tables created by Hive with |
| # matching/mismatched column definitions and Avro schema. |
| # Clone the used tables here. |
| create table avro_hive_alltypes |
| like functional_avro_snap.alltypes; |
| create table avro_hive_alltypes_extra_coldef |
| like functional_avro_snap.alltypes_extra_coldef; |
| create table avro_hive_alltypes_missing_coldef |
| like functional_avro_snap.alltypes_missing_coldef; |
| create table avro_hive_alltypes_type_mismatch |
| like functional_avro_snap.alltypes_type_mismatch; |
| create table avro_hive_no_avro_schema |
| like functional_avro_snap.no_avro_schema; |
| ==== |
| ---- QUERY |
| # Avro table with matching column definitions and Avro schema |
| compute stats avro_hive_alltypes |
| ---- RESULTS |
| 'Updated 0 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_hive_alltypes |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_hive_alltypes |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','INT',0,0,4,4,-1,-1 |
| 'smallint_col','INT',0,0,4,4,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','STRING',0,0,0,0,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Non-empty Avro table with matching column definitions and Avro schema |
| create external table avro_hive_alltypes_ext |
| like functional_avro_snap.alltypes; |
| alter table avro_hive_alltypes_ext |
| set location '$FILESYSTEM_PREFIX/test-warehouse/alltypes_avro_snap'; |
| alter table avro_hive_alltypes_ext recover partitions; |
| compute stats avro_hive_alltypes_ext; |
| ---- RESULTS |
| 'Updated 24 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_hive_alltypes_ext |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| '2009','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| 'Total','',7300,24,regex:.*,'0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_hive_alltypes_ext |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',7300,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,3650,3650 |
| 'tinyint_col','INT',10,0,4,4,-1,-1 |
| 'smallint_col','INT',10,0,4,4,-1,-1 |
| 'int_col','INT',10,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',10,0,8,8,-1,-1 |
| 'float_col','FLOAT',10,0,4,4,-1,-1 |
| 'double_col','DOUBLE',10,0,8,8,-1,-1 |
| 'date_string_col','STRING',736,0,8,8,-1,-1 |
| 'string_col','STRING',10,0,1,1,-1,-1 |
| 'timestamp_col','STRING',7224,0,22,21.66438293457031,-1,-1 |
| 'year','INT',2,0,4,4,-1,-1 |
| 'month','INT',12,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Non-empty Avro table with matching column definitions and Avro schema, but with |
| # different partition schema. Note that we use INT for tinyint_col and smallint_col, |
| # and STRING for timestamp_col. See HIVE_TO_AVRO_TYPE_MAP in |
| # testdata/bin/generate-schema-statements.py |
| create external table avro_hive_alltypes_str_part ( |
| id int, |
| bool_col boolean, |
| tinyint_col int, |
| smallint_col int, |
| int_col int, |
| bigint_col bigint, |
| float_col float, |
| double_col double, |
| date_string_col string, |
| string_col string, |
| timestamp_col string |
| ) partitioned by ( |
| year string, |
| month string |
| ) |
| stored as avro |
| location '$FILESYSTEM_PREFIX/test-warehouse/alltypes_avro_snap'; |
| alter table avro_hive_alltypes_str_part recover partitions; |
| compute stats avro_hive_alltypes_str_part; |
| ---- RESULTS |
| 'Updated 24 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_hive_alltypes_str_part |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| '2009','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2009','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','1',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','2',280,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','3',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','4',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','5',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','6',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','7',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','8',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','9',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','10',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','11',300,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| '2010','12',310,1,regex:.*,'NOT CACHED','NOT CACHED','AVRO','false',regex:.*,'$ERASURECODE_POLICY' |
| 'Total','',7300,24,regex:.*,'0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_hive_alltypes_str_part |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',7300,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,3650,3650 |
| 'tinyint_col','INT',10,0,4,4,-1,-1 |
| 'smallint_col','INT',10,0,4,4,-1,-1 |
| 'int_col','INT',10,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',10,0,8,8,-1,-1 |
| 'float_col','FLOAT',10,0,4,4,-1,-1 |
| 'double_col','DOUBLE',10,0,8,8,-1,-1 |
| 'date_string_col','STRING',736,0,8,8,-1,-1 |
| 'string_col','STRING',10,0,1,1,-1,-1 |
| 'timestamp_col','STRING',7224,0,22,21.66438293457031,-1,-1 |
| 'year','STRING',2,0,-1,-1,-1,-1 |
| 'month','STRING',12,0,-1,-1,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Avro table with an extra column definition. |
| compute stats avro_hive_alltypes_extra_coldef |
| ---- RESULTS |
| 'Updated 0 partition(s) and 12 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_hive_alltypes_extra_coldef |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_hive_alltypes_extra_coldef |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','TINYINT',0,0,1,1,-1,-1 |
| 'smallint_col','SMALLINT',0,0,2,2,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','TIMESTAMP',0,0,16,16,-1,-1 |
| 'extra_col','STRING',0,0,0,0,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Avro table with missing two column definitions. |
| compute stats avro_hive_alltypes_missing_coldef |
| ---- RESULTS |
| 'Updated 0 partition(s) and 9 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_hive_alltypes_missing_coldef |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_hive_alltypes_missing_coldef |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'smallint_col','SMALLINT',0,0,2,2,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Avro table with one column definition having a different |
| # type than the Avro schema (bigint_col is a string). |
| compute stats avro_hive_alltypes_type_mismatch |
| ---- RESULTS |
| 'Updated 0 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_hive_alltypes_type_mismatch |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_hive_alltypes_type_mismatch |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','TINYINT',0,0,1,1,-1,-1 |
| 'smallint_col','SMALLINT',0,0,2,2,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','STRING',0,0,0,0,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','TIMESTAMP',0,0,16,16,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Avro table without an Avro schema created by Hive. |
| # The Avro schema is inferred from the column definitions, |
| compute stats avro_hive_no_avro_schema |
| ---- RESULTS |
| 'Updated 0 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_hive_no_avro_schema |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_hive_no_avro_schema |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','INT',0,0,4,4,-1,-1 |
| 'smallint_col','INT',0,0,4,4,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','STRING',0,0,0,0,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Test Avro table created without any column definitions. |
| create table avro_impala_alltypes_no_coldefs |
| partitioned by (year int, month int) |
| with serdeproperties |
| ('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json') |
| stored as avro; |
| ==== |
| ---- QUERY |
| compute stats avro_impala_alltypes_no_coldefs |
| ---- RESULTS |
| 'Updated 0 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_impala_alltypes_no_coldefs |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_impala_alltypes_no_coldefs |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','INT',0,0,4,4,-1,-1 |
| 'smallint_col','INT',0,0,4,4,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','STRING',0,0,0,0,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # IMPALA-1104: Test computing stats on Avro tables created by Impala |
| # with mismatched column definitions and Avro schema. Mismatched column name. |
| create table avro_impala_alltypes_bad_colname |
| (id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int, |
| bigint_col bigint, float_col float, double_col double, date_string_col string, |
| string_col string, timestamp_col timestamp) |
| partitioned by (year int, month int) |
| with serdeproperties |
| ('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json') |
| stored as avro; |
| ==== |
| ---- QUERY |
| compute stats avro_impala_alltypes_bad_colname |
| ---- RESULTS |
| 'Updated 0 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_impala_alltypes_bad_colname |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_impala_alltypes_bad_colname |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','INT',0,0,4,4,-1,-1 |
| 'smallint_col','INT',0,0,4,4,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','STRING',0,0,0,0,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # IMPALA-1104: Test computing stats on Avro tables created by Impala |
| # with mismatched column definitions and Avro schema. Mismatched column type. |
| create table avro_impala_alltypes_bad_coltype |
| (id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int, |
| bigint_col bigint, float_col float, double_col bigint, date_string_col string, |
| string_col string, timestamp_col timestamp) |
| partitioned by (year int, month int) |
| with serdeproperties |
| ('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json') |
| stored as avro; |
| ==== |
| ---- QUERY |
| compute stats avro_impala_alltypes_bad_coltype |
| ---- RESULTS |
| 'Updated 0 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show table stats avro_impala_alltypes_bad_coltype |
| ---- LABELS |
| YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY |
| ---- RESULTS |
| 'Total','',0,0,'0B','0B','','','','','' |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| show column stats avro_impala_alltypes_bad_coltype |
| ---- LABELS |
| COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','INT',0,0,4,4,-1,-1 |
| 'smallint_col','INT',0,0,4,4,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','STRING',0,0,0,0,-1,-1 |
| 'year','INT',0,0,4,4,-1,-1 |
| 'month','INT',0,0,4,4,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |
| ---- QUERY |
| # Test that compute stats on a Hive-created Avro table without column defs |
| # works (HIVE-6308, IMPALA-867). |
| create table alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef; |
| compute stats alltypes_no_coldef |
| ---- RESULTS |
| 'Updated 1 partition(s) and 11 column(s).' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| show column stats alltypes_no_coldef |
| ---- RESULTS |
| 'id','INT',0,0,4,4,-1,-1 |
| 'bool_col','BOOLEAN',2,0,1,1,0,0 |
| 'tinyint_col','INT',0,0,4,4,-1,-1 |
| 'smallint_col','INT',0,0,4,4,-1,-1 |
| 'int_col','INT',0,0,4,4,-1,-1 |
| 'bigint_col','BIGINT',0,0,8,8,-1,-1 |
| 'float_col','FLOAT',0,0,4,4,-1,-1 |
| 'double_col','DOUBLE',0,0,8,8,-1,-1 |
| 'date_string_col','STRING',0,0,0,0,-1,-1 |
| 'string_col','STRING',0,0,0,0,-1,-1 |
| 'timestamp_col','STRING',0,0,0,0,-1,-1 |
| ---- TYPES |
| STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT |
| ==== |