blob: fd9993c9dacfcf32682177ee4a75ad90cb8f466d [file] [log] [blame]
====
---- QUERY
CREATE TABLE ice_optimize_part (int_col int, string_col string, bool_col boolean)
PARTITIONED BY SPEC(int_col)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='2');
====
---- QUERY
# Insert a value then delete everything from the table.
INSERT INTO ice_optimize_part VALUES(1, 'one', true);
DELETE FROM ice_optimize_part WHERE bool_col=true;
SHOW FILES IN ice_optimize_part;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize_part/data/int_col=1/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize_part/data/int_col=1/delete-.*parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Testing that OPTIMIZE merged the data and delete files and the result is an empty snapshot.
OPTIMIZE TABLE ice_optimize_part;
SHOW FILES IN ice_optimize_part;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Testing that OPTIMIZE empty table is no-op.
DESCRIBE HISTORY ice_optimize_part;
---- LABELS
creation_time,snapshot_id,parent_id,is_current_ancestor
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'.*','.*','NULL','TRUE'
row_regex:'.*','.*','.*','TRUE'
row_regex:'.*','.*','.*','TRUE'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
OPTIMIZE TABLE ice_optimize_part;
DESCRIBE HISTORY ice_optimize_part;
---- LABELS
creation_time,snapshot_id,parent_id,is_current_ancestor
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'.*','.*','NULL','TRUE'
row_regex:'.*','.*','.*','TRUE'
row_regex:'.*','.*','.*','TRUE'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
CREATE TABLE ice_optimize (int_col int, string_col string, bool_col boolean)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='2');
====
---- QUERY
# Insert a value then delete everything from the table.
INSERT INTO ice_optimize VALUES(1, 'one', true);
DELETE FROM ice_optimize WHERE bool_col=true;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/delete-.*parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Testing that OPTIMIZE merged the data and delete files and the result is an empty snapshot.
OPTIMIZE TABLE ice_optimize;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Testing that OPTIMIZE empty table is no-op.
DESCRIBE HISTORY ice_optimize;
---- LABELS
creation_time,snapshot_id,parent_id,is_current_ancestor
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'.*','.*','NULL','TRUE'
row_regex:'.*','.*','.*','TRUE'
row_regex:'.*','.*','.*','TRUE'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
OPTIMIZE TABLE ice_optimize;
DESCRIBE HISTORY ice_optimize;
---- LABELS
creation_time,snapshot_id,parent_id,is_current_ancestor
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'.*','.*','NULL','TRUE'
row_regex:'.*','.*','.*','TRUE'
row_regex:'.*','.*','.*','TRUE'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Insert rows one by one to write multiple small files.
INSERT INTO ice_optimize VALUES(1, 'one', true);
INSERT INTO ice_optimize VALUES(2, 'two', false);
INSERT INTO ice_optimize VALUES(2, 'two', true);
INSERT INTO ice_optimize VALUES(3, 'three', true);
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# OPTIMIZE TABLE should create 1 data file per executor, resulting in a total of 3 files.
OPTIMIZE TABLE ice_optimize;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
SELECT * FROM ice_optimize;
---- RESULTS
1,'one',true
2,'two',false
2,'two',true
3,'three',true
---- TYPES
INT,STRING,BOOLEAN
====
---- QUERY
DELETE FROM ice_optimize WHERE int_col = 2;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/delete-.*parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Checking that MAX_FS_WRITERS has an effect on the number of files written.
# Also checking that the delete file was merged and there is no delete file in the table.
SET MAX_FS_WRITERS=1;
OPTIMIZE TABLE ice_optimize;
SET MAX_FS_WRITERS=0;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- RESULTS: VERIFY_IS_NOT_IN
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/delete-.*parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
SELECT * FROM ice_optimize;
---- RESULTS
1,'one',true
3,'three',true
---- TYPES
INT,STRING,BOOLEAN
====
---- QUERY
# Schema evolution should work and return correct results according to the latest schema.
ALTER TABLE ice_optimize DROP COLUMN string_col;
ALTER TABLE ice_optimize ADD COLUMN date_col DATE;
ALTER TABLE ice_optimize ADD COLUMN double_col DOUBLE;
INSERT INTO ice_optimize VALUES((4, false, '2024-01-22', 4.444), (1, false, '2024-02-02', 1.101));
OPTIMIZE TABLE ice_optimize;
SELECT * FROM ice_optimize;
---- RESULTS
1,true,NULL,NULL
3,true,NULL,NULL
4,false,2024-01-22,4.444
1,false,2024-02-02,1.101
---- TYPES
INT,BOOLEAN,DATE,DOUBLE
====
---- QUERY
# OPTIMIZE TABLE should use the latest partition spec and create 1 file per partition.
ALTER TABLE ice_optimize SET PARTITION SPEC(int_col);
INSERT INTO ice_optimize VALUES((2, false, '2024-01-22', 2.2), (3, false, '2024-02-22', 3.3));
OPTIMIZE TABLE ice_optimize;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=1.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=2.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=3.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=4.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
SELECT * FROM ice_optimize;
---- RESULTS
1,true,NULL,NULL
3,true,NULL,NULL
4,false,2024-01-22,4.444
1,false,2024-02-02,1.101
2,false,2024-01-22,2.2
3,false,2024-02-22,3.3
---- TYPES
INT,BOOLEAN,DATE,DOUBLE
====
---- QUERY
# OPTIMIZE TABLE should handle partition evolution and merge update deltas as well.
ALTER TABLE ice_optimize SET PARTITION SPEC(month(date_col));
UPDATE ice_optimize SET date_col='2023-01-07' WHERE bool_col=true;
OPTIMIZE TABLE ice_optimize;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/date_col_month=2023-01/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/date_col_month=2024-01/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/date_col_month=2024-02/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
ALTER TABLE ice_optimize SET PARTITION SPEC(bucket(2,int_col));
OPTIMIZE TABLE ice_optimize;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col_bucket_2=0/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col_bucket_2=1/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
ALTER TABLE ice_optimize ADD COLUMN string_col STRING;
INSERT INTO ice_optimize VALUES((10, true, '2024-01-01', 10.1010, 'impala'), (11, true, '2024-01-01', 11.11, 'iceberg'));
ALTER TABLE ice_optimize SET PARTITION SPEC(truncate(2, string_col));
OPTIMIZE TABLE ice_optimize;
SHOW FILES IN ice_optimize;
---- LABELS
Path,Size,Partition,EC Policy
---- RESULTS: VERIFY_IS_EQUAL
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/string_col_trunc_2=__HIVE_DEFAULT_PARTITION__/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/string_col_trunc_2=ic/.*.0.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/string_col_trunc_2=im/.*.0.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING, STRING, STRING, STRING
====
---- QUERY
# Make sure that the table content is not modified by OPTIMIZE TABLE.
SELECT * FROM ice_optimize;
---- RESULTS
1,true,2023-01-07,NULL,'NULL'
3,true,2023-01-07,NULL,'NULL'
4,false,2024-01-22,4.444,'NULL'
1,false,2024-02-02,1.101,'NULL'
2,false,2024-01-22,2.2,'NULL'
3,false,2024-02-22,3.3,'NULL'
10,true,2024-01-01,10.1010,'impala'
11,true,2024-01-01,11.11,'iceberg'
---- TYPES
INT,BOOLEAN,DATE,DOUBLE,STRING
====
---- QUERY
# Regression test for IMPALA-13598.
# OPTIMIZE table with many partitions (with low mem_limit).
# Then we also check the number of files to verify that
# the inputs of the writers are actually sorted(/clustered).
CREATE TABLE ice_tpch_many_parts
PARTITIONED BY SPEC(truncate(1000, l_orderkey))
STORED BY ICEBERG
AS SELECT * FROM tpch_parquet.lineitem
WHERE l_orderkey % 2 = 0;
INSERT INTO ice_tpch_many_parts
SELECT * FROM tpch_parquet.lineitem WHERE l_orderkey % 2 = 1;
SET MEM_LIMIT=600m;
OPTIMIZE TABLE ice_tpch_many_parts;
SELECT count(*) FROM $DATABASE.ice_tpch_many_parts.`files`;
---- RESULTS
6001
---- TYPES
BIGINT
====