blob: 980c7bb2a2a0b12e6b5dc587be6e9a01ebbfc0aa [file] [log] [blame]
-- all primitive types
-- timestamp_w_tz TIMESTAMP WITH LOCAL TIME ZONE is not supported by hive's parquet implementation
CREATE EXTERNAL TABLE test_all_types(tinyint_type TINYINT, smallint_type SMALLINT, bigint_type BIGINT, int_type INT, float_type FLOAT, double_type double, decimal_type DECIMAL(4,2), timestamp_type TIMESTAMP, date_type DATE, string_type STRING, varchar_type VARCHAR(100), char_type CHAR(34), boolean_type BOOLEAN, binary_type BINARY) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_all_types';
-- insert two rows (the other tables only have 1 row)
INSERT INTO test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
(1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
SELECT * FROM test_all_types;
DESCRIBE test_all_types;
-- CREATE A LIKE table
CREATE TABLE like_test_all_types LIKE FILE PARQUET '${system:test.tmp.dir}/test_all_types/000000_0';
INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
(1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
SELECT * FROM like_test_all_types;
DESCRIBE like_test_all_types;
DROP TABLE test_all_types;
DROP TABLE like_test_all_types;
-- test hive.parquet.infer.binary.as string
SET hive.parquet.infer.binary.as = String;
CREATE TABLE like_test_all_types LIKE FILE PARQUET '${system:test.tmp.dir}/test_all_types/000000_0';
INSERT INTO like_test_all_types VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
(1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
SELECT * FROM like_test_all_types;
DESCRIBE like_test_all_types;
DROP TABLE test_all_types;
DROP TABLE like_test_all_types;
SET hive.parquet.infer.binary.as = binary;
-- complex types (struct, array, map, union)
-- union type is not supported by PARQUET in hive
-- array
CREATE EXTERNAL TABLE test_array(str_array array<string>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_array';
DESCRIBE test_array;
INSERT INTO test_array SELECT array("bob", "sue");
SELECT * FROM test_array;
CREATE TABLE like_test_array LIKE FILE PARQUET '${system:test.tmp.dir}/test_array/000000_0';
DESCRIBE like_test_array;
INSERT INTO like_test_array SELECT array("bob", "sue");
SELECT * FROM like_test_array;
DROP TABLE like_test_array;
-- map
CREATE EXTERNAL TABLE test_map(simple_map map<int, string>, map_to_struct map<string, struct<i : int>>, map_to_map map<date,map<int, string>>, map_to_array map<binary, array<array<int>>>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_map';
DESCRIBE test_map;
INSERT INTO test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)));
SELECT * FROM test_map;
CREATE TABLE like_test_map LIKE FILE PARQUET '${system:test.tmp.dir}/test_map/000000_0';
DESCRIBE like_test_map;
INSERT INTO like_test_map SELECT map(10, "foo"), map("bar", named_struct("i", 99)), map(cast('1984-01-01' as date), map(10, "goodbye")), map(cast("binary" as binary), array(array(1,2,3)));
SELECT * FROM like_test_map;
DROP TABLE like_test_map;
-- struct
CREATE EXTERNAL TABLE test_complex_struct(struct_type struct<tinyint_type : tinyint, smallint_type : smallint, bigint_type : bigint, int_type : int, float_type : float, double_type : double, decimal_type : DECIMAL(4,2), timestamp_type : TIMESTAMP, date_type : DATE, string_type : STRING, varchar_type : VARCHAR(100), char_type : CHAR(34), boolean_type : boolean, binary_type : binary>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_complex_struct';
DESCRIBE test_complex_struct;
-- disable CBO due to the fact that type conversion causes CBO failure which causes the test to fail
-- non-CBO path works (HIVE-26398)
SET hive.cbo.enable=false;
INSERT INTO test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", cast('varchar1' as varchar(100)), "char_type", cast('char' as char(34)), "boolean_type", true, "binary_type", cast('binary_maybe' as binary));
SET hive.cbo.enable=true;
SELECT * FROM test_complex_struct;
-- varchar/char get created as string due to the fact that the parquet file has no information to derive this types and they are stored as string
CREATE TABLE like_test_complex_struct LIKE FILE PARQUET '${system:test.tmp.dir}/test_complex_struct/000000_0';
DESCRIBE like_test_complex_struct;
-- disable CBO due to the fact that type conversion causes CBO failure which causes the test to fail
-- non-CBO path works (HIVE-26398)
SET hive.cbo.enable=false;
INSERT INTO like_test_complex_struct SELECT named_struct("tinyint_type", cast(1 as tinyint), "smallint_type", cast(2 as smallint), "bigint_type", cast(3 as bigint), "int_type", 4, "float_type", cast(2.2 as float), "double_type", cast(2.2 as double), "decimal_type", cast(20.22 as decimal(4,2)), "timestamp_type", cast('2022-06-30 10:20:30' as timestamp), "date_type", cast('2020-04-23' as date), "string_type", 'str1', "varchar_type", 'varchar1', "char_type", 'char', "boolean_type", true, "binary_type", cast('binary_maybe' as binary));
SET hive.cbo.enable=true;
SELECT * FROM like_test_complex_struct;
DROP TABLE like_test_complex_struct;
-- test complex types that contain other complex types
CREATE EXTERNAL TABLE test_complex_complex(struct_type struct<i : int, s : string, m : map<string, array<int>>, struct_i : struct<str : string>>) STORED AS PARQUET LOCATION '${system:test.tmp.dir}/test_complex_complex';
DESCRIBE test_complex_complex;
INSERT INTO test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"));
SELECT * FROM test_complex_complex;
CREATE TABLE like_test_complex_complex LIKE FILE PARQUET '${system:test.tmp.dir}/test_complex_complex/000000_0';
DESCRIBE like_test_complex_complex;
INSERT INTO like_test_complex_complex SELECT named_struct("i", 10, "s", "hello, world", "m", map("arr", array(1,2,3,4)), "struct_i", named_struct("str", "test_str"));
SELECT * FROM like_test_complex_complex;
DROP TABLE like_test_complex_complex;
-- test adding partitioning to the destination table
CREATE TABLE like_test_partitioning LIKE FILE PARQUET '${system:test.tmp.dir}/test_all_types/000000_0' PARTITIONED BY (year STRING, month STRING);
DESCRIBE like_test_partitioning;
INSERT INTO like_test_partitioning PARTITION (year='1984', month='1') VALUES (1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe'),
(1, 2, 3, 4, 2.2, 2.2, 20.20, '2022-06-30 10:20:30', '2020-04-23', 'str1', 'varchar1', 'char', true, 'binary_maybe');
SELECT * FROM like_test_partitioning;
DROP TABLE like_test_partitioning;