blob: cd1a4f390d188131908fdcf1bea7e72eb369872f [file] [log] [blame]
====
---- QUERY
# Table creation and initial data loading
create table target(id int, bool_col boolean, int_col int, float_col float,
decimal_col decimal(20,0), date_col date, string_col string)
stored as iceberg tblproperties("format-version"="2");
create table source(id int, bool_col boolean, int_col int, float_col float,
decimal_col decimal(20,0), date_col date, string_col string)
stored as iceberg tblproperties("format-version"="2");
insert into source select id, bool_col, int_col,
float_col, cast(bigint_col as decimal(20,0)), to_date(timestamp_col),
string_col from functional.alltypes order by id limit 7;
====
---- QUERY
# Merge into unpartitioned target table from the source table
# using when not matched insert case as the target table is empty now
merge into target using source on target.id = source.id
when not matched then insert values(
source.id, source.bool_col, source.int_col,
source.float_col, source.decimal_col,
source.date_col, source.string_col)
---- DML_RESULTS: target
3,false,3,3.29999995232,30,2009-01-01,'3'
5,false,5,5.5,50,2009-01-01,'5'
1,false,1,1.10000002384,10,2009-01-01,'1'
4,true,4,4.40000009537,40,2009-01-01,'4'
0,true,0,0.0,0,2009-01-01,'0'
6,true,6,6.59999990463,60,2009-01-01,'6'
2,true,2,2.20000004768,20,2009-01-01,'2'
---- TYPES
INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
---- RUNTIME_PROFILE
NumModifiedRows: 7
NumDeletedRows: 0
====
---- QUERY
# Merge into unpartitioned target table from the source table
# using update case to update int_col to a constant values
merge into target using source on target.id = source.id
when matched and source.id % 2 = 1 then update set int_col = 555
when matched and source.id % 2 = 0 then update set int_col = 222
---- DML_RESULTS: target
3,false,555,3.29999995232,30,2009-01-01,'3'
5,false,555,5.5,50,2009-01-01,'5'
1,false,555,1.10000002384,10,2009-01-01,'1'
4,true,222,4.40000009537,40,2009-01-01,'4'
0,true,222,0.0,0,2009-01-01,'0'
6,true,222,6.59999990463,60,2009-01-01,'6'
2,true,222,2.20000004768,20,2009-01-01,'2'
---- TYPES
INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
---- RUNTIME_PROFILE
NumModifiedRows: 7
NumDeletedRows: 7
====
---- QUERY
# Merge into unpartitioned target table from the source table
# Using when matched delete case to delete some values from the target table
merge into target using source on target.id = source.id
when matched and source.id % 2 = 1 then delete
---- DML_RESULTS: target
4,true,222,4.40000009537,40,2009-01-01,'4'
0,true,222,0.0,0,2009-01-01,'0'
6,true,222,6.59999990463,60,2009-01-01,'6'
2,true,222,2.20000004768,20,2009-01-01,'2'
---- TYPES
INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
---- RUNTIME_PROFILE
NumModifiedRows: 0
NumDeletedRows: 3
====
---- QUERY
# Merge into unpartitioned target table from the source table
# using multiple cases to insert/update/delete specific rows
merge into target using source on target.id = source.id
when matched and source.id = 6 then delete
when matched and target.id % 2 = 0 then update set string_col = concat(source.string_col, " case 2")
when not matched and source.id = 5 then insert (id, int_col) values (source.id, source.int_col)
---- DML_RESULTS: target
4,true,222,4.40000009537,40,2009-01-01,'4 case 2'
0,true,222,0.0,0,2009-01-01,'0 case 2'
2,true,222,2.20000004768,20,2009-01-01,'2 case 2'
5,NULL,5,NULL,NULL,NULL,'NULL'
---- TYPES
INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
---- RUNTIME_PROFILE
NumModifiedRows: 4
NumDeletedRows: 4
====
---- QUERY
# Merge into unpartitioned target table from the source table
# using not matched by source clause
merge into target using source on target.id = cast(source.id + 1 as int)
when not matched by source then update set date_col = '2022-12-12'
---- DML_RESULTS: target
4,true,222,4.40000009537,40,2009-01-01,'4 case 2'
0,true,222,0.0,0,2022-12-12,'0 case 2'
2,true,222,2.20000004768,20,2009-01-01,'2 case 2'
5,NULL,5,NULL,NULL,NULL,'NULL'
---- TYPES
INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
====
---- QUERY
# Validate the number of snapshots written to target
select count(1) snapshots from $DATABASE.target.snapshots
---- RESULTS
5
---- TYPES
BIGINT
====
---- QUERY
# Validate the files written for target
show files in target
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
# Merge into unpartitioned target table from the source table
# using not matched and insert * clause
merge into target using source on target.id = source.id
when not matched then insert *
---- DML_RESULTS: target
0,true,222,0,0,2022-12-12,'0 case 2'
1,false,1,1.100000023841858,10,2009-01-01,'1'
2,true,222,2.200000047683716,20,2009-01-01,'2 case 2'
3,false,3,3.299999952316284,30,2009-01-01,'3'
4,true,222,4.400000095367432,40,2009-01-01,'4 case 2'
5,NULL,5,NULL,NULL,NULL,'NULL'
6,true,6,6.599999904632568,60,2009-01-01,'6'
---- TYPES
INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
====
---- QUERY
# Regression test for IMPALA-13770: using a native UDF as a filter predicate and target
# value of a MERGE statement.
# Note: even with the incorrect behaviour before IMPALA-13770 the test may pass and the
# values can also be updated successfully. This is because the crash happens after the
# query is closed, when releasing resources. This test is a valid regression test because
# even if it passes, there will be a crash: some later queries are likely to fail and
# there will be a minidump in the build artifacts, so the build will be marked FAILED.
create function if not exists identity(int) returns int location 'UDF_LOCATION' symbol='Identity';
create table merge_with_udf(int_col INT)
stored by iceberg
tblproperties ('format-version'='2');
insert into merge_with_udf values (1), (2);
====
---- QUERY
merge into merge_with_udf t
using merge_with_udf s on s.int_col = t.int_col
when matched and s.int_col != identity(t.int_col) + 1
then update set int_col = identity(s.int_col)
---- DML_RESULTS: merge_with_udf
1
2
---- TYPES
INT
---- RUNTIME_PROFILE
NumModifiedRows: 2
NumDeletedRows: 2
====