blob: 2d4c672a643925b1ab21c76eaa767baa7ec23a10 [file] [log] [blame]
====
---- QUERY
# Create a table that is a subset of 'alltypes' table, i.e. it only
# contains the data types supported by Iceberg.
create table iceberg_alltypes(
id INT COMMENT 'Add a comment',
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_alltypes
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes
order by id
limit 5;
---- RUNTIME_PROFILE
NumModifiedRows: 5
====
---- QUERY
select * from iceberg_alltypes;
---- RESULTS
0,true,0,0,0,0,2009-01-01,'0',2009-01-01 00:00:00
1,false,1,10,1.100000023841858,10.1,2009-01-01,'1',2009-01-01 00:01:00
2,true,2,20,2.200000047683716,20.2,2009-01-01,'2',2009-01-01 00:02:00.100000000
3,false,3,30,3.299999952316284,30.3,2009-01-01,'3',2009-01-01 00:03:00.300000000
4,true,4,40,4.400000095367432,40.4,2009-01-01,'4',2009-01-01 00:04:00.600000000
---- TYPES
INT, BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Create table with decimal types
CREATE TABLE decimal_tbl (
d1 DECIMAL(9,0),
d2 DECIMAL(10,0),
d3 DECIMAL(20,10),
d4 DECIMAL(38,38),
d5 DECIMAL(10,5),
d6 DECIMAL(9,0)
)
STORED AS iceberg
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into decimal_tbl select * from functional_parquet.decimal_tbl;
select * from decimal_tbl;
---- RESULTS
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
---- TYPES
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
# Create non-Iceberg table with INT96 nanos.
create table int96_nanos (ts timestamp) stored as parquet;
====
---- QUERY
# Insert edge values as "normal" int96 timestamps that can represent all values.
set parquet_timestamp_type=INT96_NANOS;
insert into int96_nanos values
("1400-01-01"),
("2019-01-18 00:00:00.000000001"),
("2019-01-18 00:00:00.000001"),
("2019-01-18 00:00:00.001"),
("2019-01-18 23:59:59.999"),
("2019-01-18 23:59:59.999999"),
("2019-01-18 23:59:59.999999999")
====
---- QUERY
# Iceberg should write timestamps as INT64 micros.
create table ts_iceberg (ts timestamp) stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
insert into ts_iceberg select * from int96_nanos;
select * from ts_iceberg;
---- RESULTS
1400-01-01 00:00:00
2019-01-18 00:00:00
2019-01-18 00:00:00.000001000
2019-01-18 00:00:00.001000000
2019-01-18 23:59:59.999000000
2019-01-18 23:59:59.999999000
2019-01-18 23:59:59.999999000
====
---- QUERY
# Insert into hadoop catalog.
create table iceberg_hadoop_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test');
insert into iceberg_hadoop_cat values (1), (2), (3);
---- RUNTIME_PROFILE
NumModifiedRows: 3
====
---- QUERY
select * from iceberg_hadoop_cat;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/$DATABASE/iceberg_hadoop_cat/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Insert into hadoop catalog with custom table identifier.
create table iceberg_hadoop_cat_ti (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/hadoop_catalog_test',
'iceberg.table_identifier'='test.custom_db.int_table');
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
---- RUNTIME_PROFILE
NumModifiedRows: 3
====
---- QUERY
select * from iceberg_hadoop_cat_ti;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat_ti;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/test/custom_db/int_table/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Insert into table stored in Iceberg's HiveCatalog
create table iceberg_hive_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hive.catalog');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_hive_cat values (7);
select * from iceberg_hive_cat;
---- RESULTS
7
---- TYPES
INT
====
---- QUERY
# DROP the synchronized Iceberg table (data is purged).
drop table iceberg_hive_cat
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Insert into hive catalog with custom location.
create table iceberg_hive_cat_custom_loc (i int)
stored as iceberg
location '$WAREHOUSE_LOCATION_PREFIX/test-warehouse/$DATABASE.db/custom_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog');
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
---- RUNTIME_PROFILE
NumModifiedRows: 3
====
---- QUERY
select * from iceberg_hive_cat_custom_loc;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hive_cat_custom_loc;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/custom_hive_cat/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Create a table that is a subset of 'alltypes' table, i.e. it only
# contains the data types supported by Iceberg.
create table iceberg_alltypes_parq_tblprop(
id INT COMMENT 'Add a comment',
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
stored as iceberg
tblproperties('write.format.default'='parquet',
'write.parquet.row-group-size-bytes'='8388608',
'write.parquet.compression-codec'='gzip',
'write.parquet.page-size-bytes'='65536',
'write.parquet.dict-size-bytes'='1073741824'
);
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RUNTIME_PROFILE
NumModifiedRows: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='536870912',
'write.parquet.compression-codec'='none',
'write.parquet.page-size-bytes'='131072',
'write.parquet.dict-size-bytes'='805306368');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RUNTIME_PROFILE
NumModifiedRows: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1073741824',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='1',
'write.parquet.page-size-bytes'='196608',
'write.parquet.dict-size-bytes'='536870912');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RUNTIME_PROFILE
NumModifiedRows: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1610612736',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='13',
'write.parquet.page-size-bytes'='262144',
'write.parquet.dict-size-bytes'='402653184');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RUNTIME_PROFILE
NumModifiedRows: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='1879048192',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='18',
'write.parquet.page-size-bytes'='327680',
'write.parquet.dict-size-bytes'='134217728');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RUNTIME_PROFILE
NumModifiedRows: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop set tblproperties (
'write.parquet.row-group-size-bytes'='2146435072',
'write.parquet.compression-codec'='zstd',
'write.parquet.compression-level'='22',
'write.parquet.page-size-bytes'='1048576',
'write.parquet.dict-size-bytes'='65536');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RUNTIME_PROFILE
NumModifiedRows: 7300
====
---- QUERY
alter table iceberg_alltypes_parq_tblprop unset tblproperties (
'write.parquet.row-group-size-bytes',
'write.parquet.compression-codec',
'write.parquet.compression-level',
'write.parquet.page-size-bytes',
'write.parquet.dict-size-bytes');
====
---- QUERY
insert into iceberg_alltypes_parq_tblprop
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes;
---- RUNTIME_PROFILE
NumModifiedRows: 7300
====
---- QUERY
select count(*) from iceberg_alltypes_parq_tblprop;
---- RESULTS
51100
---- TYPES
BIGINT
====
---- QUERY
set MT_DOP=1;
create table lineitem_iceberg_comment
stored as iceberg
tblproperties("write.parquet.page-size-bytes"="1048576")
as select l_comment from tpch_parquet.lineitem;
---- RESULTS
row_regex:.*6001215.*
====