blob: a4118060ed89d9c72f6ad0e31b6f76ee1eb654c7 [file] [log] [blame]
====
---- QUERY
# IMPALA-867: Test computing stats on Avro tables created by Hive with
# matching/mismatched column definitions and Avro schema.
# Clone the used tables here.
create table avro_hive_alltypes
like functional_avro_snap.alltypes;
create table avro_hive_alltypes_extra_coldef
like functional_avro_snap.alltypes_extra_coldef;
create table avro_hive_alltypes_missing_coldef
like functional_avro_snap.alltypes_missing_coldef;
create table avro_hive_alltypes_type_mismatch
like functional_avro_snap.alltypes_type_mismatch;
create table avro_hive_no_avro_schema
like functional_avro_snap.no_avro_schema;
====
---- QUERY
# Avro table with matching column definitions and Avro schema
compute stats avro_hive_alltypes
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_alltypes
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,0,4,4
'bool_col','BOOLEAN',2,0,1,1
'tinyint_col','INT',0,0,4,4
'smallint_col','INT',0,0,4,4
'int_col','INT',0,0,4,4
'bigint_col','BIGINT',0,0,8,8
'float_col','FLOAT',0,0,4,4
'double_col','DOUBLE',0,0,8,8
'date_string_col','STRING',0,0,0,0
'string_col','STRING',0,0,0,0
'timestamp_col','STRING',0,0,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table with an extra column definition.
compute stats avro_hive_alltypes_extra_coldef
---- CATCH
AnalysisException: Cannot COMPUTE STATS on Avro table 'avro_hive_alltypes_extra_coldef' because its column definitions do not match those in the Avro schema.
Missing Avro-schema column corresponding to column definition 'extra_col' of type 'string' at position '10'.
Please re-create the table with column definitions, e.g., using the result of 'SHOW CREATE TABLE'
====
---- QUERY
show table stats avro_hive_alltypes_extra_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',-1,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes_extra_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',-1,-1,4,4
'bool_col','BOOLEAN',-1,-1,1,1
'tinyint_col','INT',-1,-1,4,4
'smallint_col','INT',-1,-1,4,4
'int_col','INT',-1,-1,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'double_col','DOUBLE',-1,-1,8,8
'date_string_col','STRING',-1,-1,-1,-1
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','STRING',-1,-1,-1,-1
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table with missing two column definitions.
compute stats avro_hive_alltypes_missing_coldef
---- CATCH
MESSAGE: AnalysisException: Cannot COMPUTE STATS on Avro table 'avro_hive_alltypes_missing_coldef' because its column definitions do not match those in the Avro schema.
Definition of column 'smallint_col' of type 'smallint' does not match the Avro-schema column 'tinyint_col' of type 'INT' at position '2'.
Please re-create the table with column definitions, e.g., using the result of 'SHOW CREATE TABLE'
====
---- QUERY
show table stats avro_hive_alltypes_missing_coldef
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',-1,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes_missing_coldef
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'bigint_col','BIGINT',-1,-1,8,8
'bool_col','BOOLEAN',-1,-1,1,1
'date_string_col','STRING',-1,-1,-1,-1
'double_col','DOUBLE',-1,-1,8,8
'float_col','FLOAT',-1,-1,4,4
'id','INT',-1,-1,4,4
'int_col','INT',-1,-1,4,4
'month','INT',0,0,4,4
'smallint_col','INT',-1,-1,4,4
'string_col','STRING',-1,-1,-1,-1
'timestamp_col','STRING',-1,-1,-1,-1
'tinyint_col','INT',-1,-1,4,4
'year','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table with one column definition having a different
# type than the Avro schema (bigint_col is a string).
compute stats avro_hive_alltypes_type_mismatch
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_alltypes_type_mismatch
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_alltypes_type_mismatch
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,0,4,4
'bool_col','BOOLEAN',2,0,1,1
'tinyint_col','INT',0,0,4,4
'smallint_col','INT',0,0,4,4
'int_col','INT',0,0,4,4
'bigint_col','BIGINT',-1,-1,8,8
'float_col','FLOAT',0,0,4,4
'double_col','DOUBLE',0,0,8,8
'date_string_col','STRING',0,0,0,0
'string_col','STRING',0,0,0,0
'timestamp_col','STRING',-1,-1,-1,-1
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table without an Avro schema created by Hive.
# The Avro schema is inferred from the column definitions,
compute stats avro_hive_no_avro_schema
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_hive_no_avro_schema
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_hive_no_avro_schema
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,0,4,4
'bool_col','BOOLEAN',2,0,1,1
'tinyint_col','INT',0,0,4,4
'smallint_col','INT',0,0,4,4
'int_col','INT',0,0,4,4
'bigint_col','BIGINT',0,0,8,8
'float_col','FLOAT',0,0,4,4
'double_col','DOUBLE',0,0,8,8
'date_string_col','STRING',0,0,0,0
'string_col','STRING',0,0,0,0
'timestamp_col','STRING',0,0,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Test Avro table created without any column definitions.
create table avro_impala_alltypes_no_coldefs
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats avro_impala_alltypes_no_coldefs
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_impala_alltypes_no_coldefs
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_impala_alltypes_no_coldefs
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,0,4,4
'bool_col','BOOLEAN',2,0,1,1
'tinyint_col','INT',0,0,4,4
'smallint_col','INT',0,0,4,4
'int_col','INT',0,0,4,4
'bigint_col','BIGINT',0,0,8,8
'float_col','FLOAT',0,0,4,4
'double_col','DOUBLE',0,0,8,8
'date_string_col','STRING',0,0,0,0
'string_col','STRING',0,0,0,0
'timestamp_col','STRING',0,0,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column name.
create table avro_impala_alltypes_bad_colname
(id int, bool_col boolean, tinyint_col int, smallint_col int, bad_int_col int,
bigint_col bigint, float_col float, double_col double, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats avro_impala_alltypes_bad_colname
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_impala_alltypes_bad_colname
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_impala_alltypes_bad_colname
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,0,4,4
'bool_col','BOOLEAN',2,0,1,1
'tinyint_col','INT',0,0,4,4
'smallint_col','INT',0,0,4,4
'int_col','INT',0,0,4,4
'bigint_col','BIGINT',0,0,8,8
'float_col','FLOAT',0,0,4,4
'double_col','DOUBLE',0,0,8,8
'date_string_col','STRING',0,0,0,0
'string_col','STRING',0,0,0,0
'timestamp_col','STRING',0,0,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
# with mismatched column definitions and Avro schema. Mismatched column type.
create table avro_impala_alltypes_bad_coltype
(id int, bool_col boolean, tinyint_col int, smallint_col int, int_col int,
bigint_col bigint, float_col float, double_col bigint, date_string_col string,
string_col string, timestamp_col timestamp)
partitioned by (year int, month int)
with serdeproperties
('avro.schema.url'='$FILESYSTEM_PREFIX/test-warehouse/avro_schemas/functional/alltypes.json')
stored as avro;
====
---- QUERY
compute stats avro_impala_alltypes_bad_coltype
---- RESULTS
'Updated 0 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show table stats avro_impala_alltypes_bad_coltype
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
---- RESULTS
'Total','',0,0,'0B','0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
---- QUERY
show column stats avro_impala_alltypes_bad_coltype
---- LABELS
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
---- RESULTS
'id','INT',0,0,4,4
'bool_col','BOOLEAN',2,0,1,1
'tinyint_col','INT',0,0,4,4
'smallint_col','INT',0,0,4,4
'int_col','INT',0,0,4,4
'bigint_col','BIGINT',0,0,8,8
'float_col','FLOAT',0,0,4,4
'double_col','DOUBLE',0,0,8,8
'date_string_col','STRING',0,0,0,0
'string_col','STRING',0,0,0,0
'timestamp_col','STRING',0,0,0,0
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Test that compute stats on a Hive-created Avro table without column defs
# works (HIVE-6308, IMPALA-867).
create table alltypes_no_coldef like functional_avro_snap.alltypes_no_coldef;
compute stats alltypes_no_coldef
---- RESULTS
'Updated 1 partition(s) and 11 column(s).'
---- TYPES
STRING
====
---- QUERY
show column stats alltypes_no_coldef
---- RESULTS
'id','INT',0,0,4,4
'bool_col','BOOLEAN',2,0,1,1
'tinyint_col','INT',0,0,4,4
'smallint_col','INT',0,0,4,4
'int_col','INT',0,0,4,4
'bigint_col','BIGINT',0,0,8,8
'float_col','FLOAT',0,0,4,4
'double_col','DOUBLE',0,0,8,8
'date_string_col','STRING',0,0,0,0
'string_col','STRING',0,0,0,0
'timestamp_col','STRING',0,0,0,0
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====