| ==== |
| ---- QUERY |
| # Table creation and initial data loading |
| create table target(id int, bool_col boolean, int_col int, float_col float, |
| decimal_col decimal(20,0), date_col date, string_col string) |
| stored as iceberg tblproperties("format-version"="2"); |
| |
| create table source(id int, bool_col boolean, int_col int, float_col float, |
| decimal_col decimal(20,0), date_col date, string_col string) |
| stored as iceberg tblproperties("format-version"="2"); |
| |
| insert into source select id, bool_col, int_col, |
| float_col, cast(bigint_col as decimal(20,0)), to_date(timestamp_col), |
| string_col from functional.alltypes order by id limit 7; |
| ==== |
| ---- QUERY |
| # Merge into unpartitioned target table from the source table |
| # using when not matched insert case as the target table is empty now |
| merge into target using source on target.id = source.id |
| when not matched then insert values( |
| source.id, source.bool_col, source.int_col, |
| source.float_col, source.decimal_col, |
| source.date_col, source.string_col) |
| ---- DML_RESULTS: target |
| 3,false,3,3.29999995232,30,2009-01-01,'3' |
| 5,false,5,5.5,50,2009-01-01,'5' |
| 1,false,1,1.10000002384,10,2009-01-01,'1' |
| 4,true,4,4.40000009537,40,2009-01-01,'4' |
| 0,true,0,0.0,0,2009-01-01,'0' |
| 6,true,6,6.59999990463,60,2009-01-01,'6' |
| 2,true,2,2.20000004768,20,2009-01-01,'2' |
| ---- TYPES |
| INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING |
| ---- RUNTIME_PROFILE |
| NumModifiedRows: 7 |
| NumDeletedRows: 0 |
| ==== |
| ---- QUERY |
| # Merge into unpartitioned target table from the source table |
| # using update case to update int_col to a constant values |
| merge into target using source on target.id = source.id |
| when matched and source.id % 2 = 1 then update set int_col = 555 |
| when matched and source.id % 2 = 0 then update set int_col = 222 |
| ---- DML_RESULTS: target |
| 3,false,555,3.29999995232,30,2009-01-01,'3' |
| 5,false,555,5.5,50,2009-01-01,'5' |
| 1,false,555,1.10000002384,10,2009-01-01,'1' |
| 4,true,222,4.40000009537,40,2009-01-01,'4' |
| 0,true,222,0.0,0,2009-01-01,'0' |
| 6,true,222,6.59999990463,60,2009-01-01,'6' |
| 2,true,222,2.20000004768,20,2009-01-01,'2' |
| ---- TYPES |
| INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING |
| ---- RUNTIME_PROFILE |
| NumModifiedRows: 7 |
| NumDeletedRows: 7 |
| ==== |
| ---- QUERY |
| # Merge into unpartitioned target table from the source table |
| # Using when matched delete case to delete some values from the target table |
| merge into target using source on target.id = source.id |
| when matched and source.id % 2 = 1 then delete |
| ---- DML_RESULTS: target |
| 4,true,222,4.40000009537,40,2009-01-01,'4' |
| 0,true,222,0.0,0,2009-01-01,'0' |
| 6,true,222,6.59999990463,60,2009-01-01,'6' |
| 2,true,222,2.20000004768,20,2009-01-01,'2' |
| ---- TYPES |
| INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING |
| ---- RUNTIME_PROFILE |
| NumModifiedRows: 0 |
| NumDeletedRows: 3 |
| ==== |
| ---- QUERY |
| # Merge into unpartitioned target table from the source table |
| # using multiple cases to insert/update/delete specific rows |
| merge into target using source on target.id = source.id |
| when matched and source.id = 6 then delete |
| when matched and target.id % 2 = 0 then update set string_col = concat(source.string_col, " case 2") |
| when not matched and source.id = 5 then insert (id, int_col) values (source.id, source.int_col) |
| ---- DML_RESULTS: target |
| 4,true,222,4.40000009537,40,2009-01-01,'4 case 2' |
| 0,true,222,0.0,0,2009-01-01,'0 case 2' |
| 2,true,222,2.20000004768,20,2009-01-01,'2 case 2' |
| 5,NULL,5,NULL,NULL,NULL,'NULL' |
| ---- TYPES |
| INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING |
| ---- RUNTIME_PROFILE |
| NumModifiedRows: 4 |
| NumDeletedRows: 4 |
| ==== |
| ---- QUERY |
| # Merge into unpartitioned target table from the source table |
| # using not matched by source clause |
| merge into target using source on target.id = cast(source.id + 1 as int) |
| when not matched by source then update set date_col = '2022-12-12' |
| ---- DML_RESULTS: target |
| 4,true,222,4.40000009537,40,2009-01-01,'4 case 2' |
| 0,true,222,0.0,0,2022-12-12,'0 case 2' |
| 2,true,222,2.20000004768,20,2009-01-01,'2 case 2' |
| 5,NULL,5,NULL,NULL,NULL,'NULL' |
| ---- TYPES |
| INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING |
| ==== |
| ---- QUERY |
| # Validate the number of snapshots written to target |
| select count(1) snapshots from $DATABASE.target.snapshots |
| ---- RESULTS |
| 5 |
| ---- TYPES |
| BIGINT |
| ==== |
| ---- QUERY |
| # Validate the files written for target |
| show files in target |
| ---- RESULTS: VERIFY_IS_SUBSET |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY' |
| row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY' |
| ---- TYPES |
| STRING,STRING,STRING,STRING |
| ==== |
| ---- QUERY |
| # Merge into unpartitioned target table from the source table |
| # using not matched and insert * clause |
| merge into target using source on target.id = source.id |
| when not matched then insert * |
| ---- DML_RESULTS: target |
| 0,true,222,0,0,2022-12-12,'0 case 2' |
| 1,false,1,1.100000023841858,10,2009-01-01,'1' |
| 2,true,222,2.200000047683716,20,2009-01-01,'2 case 2' |
| 3,false,3,3.299999952316284,30,2009-01-01,'3' |
| 4,true,222,4.400000095367432,40,2009-01-01,'4 case 2' |
| 5,NULL,5,NULL,NULL,NULL,'NULL' |
| 6,true,6,6.599999904632568,60,2009-01-01,'6' |
| ---- TYPES |
| INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING |
| ==== |
| |
| ---- QUERY |
| # Regression test for IMPALA-13770: using a native UDF as a filter predicate and target |
| # value of a MERGE statement. |
| # Note: even with the incorrect behaviour before IMPALA-13770 the test may pass and the |
| # values can also be updated successfully. This is because the crash happens after the |
| # query is closed, when releasing resources. This test is a valid regression test because |
| # even if it passes, there will be a crash: some later queries are likely to fail and |
| # there will be a minidump in the build artifacts, so the build will be marked FAILED. |
| create function if not exists identity(int) returns int location 'UDF_LOCATION' symbol='Identity'; |
| create table merge_with_udf(int_col INT) |
| stored by iceberg |
| tblproperties ('format-version'='2'); |
| insert into merge_with_udf values (1), (2); |
| ==== |
| ---- QUERY |
| merge into merge_with_udf t |
| using merge_with_udf s on s.int_col = t.int_col |
| when matched and s.int_col != identity(t.int_col) + 1 |
| then update set int_col = identity(s.int_col) |
| ---- DML_RESULTS: merge_with_udf |
| 1 |
| 2 |
| ---- TYPES |
| INT |
| ---- RUNTIME_PROFILE |
| NumModifiedRows: 2 |
| NumDeletedRows: 2 |
| ==== |