| ==== |
| ---- QUERY |
| CREATE TABLE ice_optimize_part (int_col int, string_col string, bool_col boolean) |
| PARTITIONED BY SPEC(int_col) |
| STORED BY ICEBERG |
| TBLPROPERTIES ('format-version'='2'); |
| ==== |
| ---- QUERY |
| # Insert a value then delete everything from the table. |
| INSERT INTO ice_optimize_part VALUES(1, 'one', true); |
| DELETE FROM ice_optimize_part WHERE bool_col=true; |
| SHOW FILES IN ice_optimize_part; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_SUBSET |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize_part/data/int_col=1/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize_part/data/int_col=1/delete-.*parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # Testing that OPTIMIZE merged the data and delete files and the result is an empty snapshot. |
| OPTIMIZE TABLE ice_optimize_part; |
| SHOW FILES IN ice_optimize_part; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # Testing that OPTIMIZE empty table is no-op. |
| DESCRIBE HISTORY ice_optimize_part; |
| ---- LABELS |
| creation_time,snapshot_id,parent_id,is_current_ancestor |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'.*','.*','NULL','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| OPTIMIZE TABLE ice_optimize_part; |
| DESCRIBE HISTORY ice_optimize_part; |
| ---- LABELS |
| creation_time,snapshot_id,parent_id,is_current_ancestor |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'.*','.*','NULL','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| CREATE TABLE ice_optimize (int_col int, string_col string, bool_col boolean) |
| STORED BY ICEBERG |
| TBLPROPERTIES ('format-version'='2'); |
| ==== |
| ---- QUERY |
| # Insert a value then delete everything from the table. |
| INSERT INTO ice_optimize VALUES(1, 'one', true); |
| DELETE FROM ice_optimize WHERE bool_col=true; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_SUBSET |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/delete-.*parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # Testing that OPTIMIZE merged the data and delete files and the result is an empty snapshot. |
| OPTIMIZE TABLE ice_optimize; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # Testing that OPTIMIZE empty table is no-op. |
| DESCRIBE HISTORY ice_optimize; |
| ---- LABELS |
| creation_time,snapshot_id,parent_id,is_current_ancestor |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'.*','.*','NULL','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| OPTIMIZE TABLE ice_optimize; |
| DESCRIBE HISTORY ice_optimize; |
| ---- LABELS |
| creation_time,snapshot_id,parent_id,is_current_ancestor |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'.*','.*','NULL','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| row_regex:'.*','.*','.*','TRUE' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # Insert rows one by one to write multiple small files. |
| INSERT INTO ice_optimize VALUES(1, 'one', true); |
| INSERT INTO ice_optimize VALUES(2, 'two', false); |
| INSERT INTO ice_optimize VALUES(2, 'two', true); |
| INSERT INTO ice_optimize VALUES(3, 'three', true); |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # OPTIMIZE TABLE should create 1 data file per executor, resulting in a total of 3 files. |
| OPTIMIZE TABLE ice_optimize; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| SELECT * FROM ice_optimize; |
| ---- RESULTS |
| 1,'one',true |
| 2,'two',false |
| 2,'two',true |
| 3,'three',true |
| ---- TYPES |
| INT,STRING,BOOLEAN |
| ==== |
| ---- QUERY |
| DELETE FROM ice_optimize WHERE int_col = 2; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_SUBSET |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/delete-.*parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # Checking that MAX_FS_WRITERS has an effect on the number of files written. |
| # Also checking that the delete file was merged and there is no delete file in the table. |
| SET MAX_FS_WRITERS=1; |
| OPTIMIZE TABLE ice_optimize; |
| SET MAX_FS_WRITERS=0; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| ---- RESULTS: VERIFY_IS_NOT_IN |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/delete-.*parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| SELECT * FROM ice_optimize; |
| ---- RESULTS |
| 1,'one',true |
| 3,'three',true |
| ---- TYPES |
| INT,STRING,BOOLEAN |
| ==== |
| ---- QUERY |
| # Schema evolution should work and return correct results according to the latest schema. |
| ALTER TABLE ice_optimize DROP COLUMN string_col; |
| ALTER TABLE ice_optimize ADD COLUMN date_col DATE; |
| ALTER TABLE ice_optimize ADD COLUMN double_col DOUBLE; |
| INSERT INTO ice_optimize VALUES((4, false, '2024-01-22', 4.444), (1, false, '2024-02-02', 1.101)); |
| OPTIMIZE TABLE ice_optimize; |
| SELECT * FROM ice_optimize; |
| ---- RESULTS |
| 1,true,NULL,NULL |
| 3,true,NULL,NULL |
| 4,false,2024-01-22,4.444 |
| 1,false,2024-02-02,1.101 |
| ---- TYPES |
| INT,BOOLEAN,DATE,DOUBLE |
| ==== |
| ---- QUERY |
| # OPTIMIZE TABLE should use the latest partition spec and create 1 file per partition. |
| ALTER TABLE ice_optimize SET PARTITION SPEC(int_col); |
| INSERT INTO ice_optimize VALUES((2, false, '2024-01-22', 2.2), (3, false, '2024-02-22', 3.3)); |
| OPTIMIZE TABLE ice_optimize; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=1.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=2.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=3.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col=4.*.0.parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| SELECT * FROM ice_optimize; |
| ---- RESULTS |
| 1,true,NULL,NULL |
| 3,true,NULL,NULL |
| 4,false,2024-01-22,4.444 |
| 1,false,2024-02-02,1.101 |
| 2,false,2024-01-22,2.2 |
| 3,false,2024-02-22,3.3 |
| ---- TYPES |
| INT,BOOLEAN,DATE,DOUBLE |
| ==== |
| ---- QUERY |
| # OPTIMIZE TABLE should handle partition evolution and merge update deltas as well. |
| ALTER TABLE ice_optimize SET PARTITION SPEC(month(date_col)); |
| UPDATE ice_optimize SET date_col='2023-01-07' WHERE bool_col=true; |
| OPTIMIZE TABLE ice_optimize; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/date_col_month=2023-01/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/date_col_month=2024-01/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/date_col_month=2024-02/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| ALTER TABLE ice_optimize SET PARTITION SPEC(bucket(2,int_col)); |
| OPTIMIZE TABLE ice_optimize; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col_bucket_2=0/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/int_col_bucket_2=1/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| ALTER TABLE ice_optimize ADD COLUMN string_col STRING; |
| INSERT INTO ice_optimize VALUES((10, true, '2024-01-01', 10.1010, 'impala'), (11, true, '2024-01-01', 11.11, 'iceberg')); |
| ALTER TABLE ice_optimize SET PARTITION SPEC(truncate(2, string_col)); |
| OPTIMIZE TABLE ice_optimize; |
| SHOW FILES IN ice_optimize; |
| ---- LABELS |
| Path,Size,Partition,EC Policy |
| ---- RESULTS: VERIFY_IS_EQUAL |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/string_col_trunc_2=__HIVE_DEFAULT_PARTITION__/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/string_col_trunc_2=ic/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_optimize/data/string_col_trunc_2=im/.*.0.parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING, STRING, STRING, STRING |
| ==== |
| ---- QUERY |
| # Make sure that the table content is not modified by OPTIMIZE TABLE. |
| SELECT * FROM ice_optimize; |
| ---- RESULTS |
| 1,true,2023-01-07,NULL,'NULL' |
| 3,true,2023-01-07,NULL,'NULL' |
| 4,false,2024-01-22,4.444,'NULL' |
| 1,false,2024-02-02,1.101,'NULL' |
| 2,false,2024-01-22,2.2,'NULL' |
| 3,false,2024-02-22,3.3,'NULL' |
| 10,true,2024-01-01,10.1010,'impala' |
| 11,true,2024-01-01,11.11,'iceberg' |
| ---- TYPES |
| INT,BOOLEAN,DATE,DOUBLE,STRING |
| ==== |
| ---- QUERY |
| # Regression test for IMPALA-13598. |
| # OPTIMIZE table with many partitions (with low mem_limit). |
| # Then we also check the number of files to verify that |
| # the inputs of the writers are actually sorted(/clustered). |
| CREATE TABLE ice_tpch_many_parts |
| PARTITIONED BY SPEC(truncate(1000, l_orderkey)) |
| STORED BY ICEBERG |
| AS SELECT * FROM tpch_parquet.lineitem |
| WHERE l_orderkey % 2 = 0; |
| |
| INSERT INTO ice_tpch_many_parts |
| SELECT * FROM tpch_parquet.lineitem WHERE l_orderkey % 2 = 1; |
| |
| SET MEM_LIMIT=600m; |
| OPTIMIZE TABLE ice_tpch_many_parts; |
| SELECT count(*) FROM $DATABASE.ice_tpch_many_parts.`files`; |
| ---- RESULTS |
| 6001 |
| ---- TYPES |
| BIGINT |
| ==== |