testdata/workloads/functional-query/queries/QueryTest/iceberg-merge.test - impala - Git at Google

 ====
 ---- QUERY
 # Table creation and initial data loading
 create table target(id int, bool_col boolean, int_col int, float_col float,
     decimal_col decimal(20,0), date_col date, string_col string)
   stored as iceberg tblproperties("format-version"="2");

 create table source(id int, bool_col boolean, int_col int, float_col float,
     decimal_col decimal(20,0), date_col date, string_col string)
   stored as iceberg tblproperties("format-version"="2");

 insert into source select id, bool_col, int_col,
   float_col, cast(bigint_col as decimal(20,0)), to_date(timestamp_col),
   string_col from functional.alltypes order by id limit 7;
 ====
 ---- QUERY
 # Merge into unpartitioned target table from the source table
 # using when not matched insert case as the target table is empty now
 merge into target using source on target.id = source.id
 when not matched then insert values(
     source.id, source.bool_col, source.int_col,
     source.float_col, source.decimal_col,
     source.date_col, source.string_col)
 ---- DML_RESULTS: target
 3,false,3,3.29999995232,30,2009-01-01,'3'
 5,false,5,5.5,50,2009-01-01,'5'
 1,false,1,1.10000002384,10,2009-01-01,'1'
 4,true,4,4.40000009537,40,2009-01-01,'4'
 0,true,0,0.0,0,2009-01-01,'0'
 6,true,6,6.59999990463,60,2009-01-01,'6'
 2,true,2,2.20000004768,20,2009-01-01,'2'
 ---- TYPES
 INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
 ---- RUNTIME_PROFILE
 NumModifiedRows: 7
 NumDeletedRows: 0
 ====
 ---- QUERY
 # Merge into unpartitioned target table from the source table
 # using update case to update int_col to a constant values
 merge into target using source on target.id = source.id
 when matched and source.id % 2 = 1 then update set int_col = 555
 when matched and source.id % 2 = 0 then update set int_col = 222
 ---- DML_RESULTS: target
 3,false,555,3.29999995232,30,2009-01-01,'3'
 5,false,555,5.5,50,2009-01-01,'5'
 1,false,555,1.10000002384,10,2009-01-01,'1'
 4,true,222,4.40000009537,40,2009-01-01,'4'
 0,true,222,0.0,0,2009-01-01,'0'
 6,true,222,6.59999990463,60,2009-01-01,'6'
 2,true,222,2.20000004768,20,2009-01-01,'2'
 ---- TYPES
 INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
 ---- RUNTIME_PROFILE
 NumModifiedRows: 7
 NumDeletedRows: 7
 ====
 ---- QUERY
 # Merge into unpartitioned target table from the source table
 # Using when matched delete case to delete some values from the target table
 merge into target using source on target.id = source.id
 when matched and source.id % 2 = 1 then delete
 ---- DML_RESULTS: target
 4,true,222,4.40000009537,40,2009-01-01,'4'
 0,true,222,0.0,0,2009-01-01,'0'
 6,true,222,6.59999990463,60,2009-01-01,'6'
 2,true,222,2.20000004768,20,2009-01-01,'2'
 ---- TYPES
 INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
 ---- RUNTIME_PROFILE
 NumModifiedRows: 0
 NumDeletedRows: 3
 ====
 ---- QUERY
 # Merge into unpartitioned target table from the source table
 # using multiple cases to insert/update/delete specific rows
 merge into target using source on target.id = source.id
 when matched and source.id = 6 then delete
 when matched and target.id % 2 = 0 then update set string_col = concat(source.string_col, " case 2")
 when not matched and source.id = 5 then insert (id, int_col) values (source.id, source.int_col)
 ---- DML_RESULTS: target
 4,true,222,4.40000009537,40,2009-01-01,'4 case 2'
 0,true,222,0.0,0,2009-01-01,'0 case 2'
 2,true,222,2.20000004768,20,2009-01-01,'2 case 2'
 5,NULL,5,NULL,NULL,NULL,'NULL'
 ---- TYPES
 INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
 ---- RUNTIME_PROFILE
 NumModifiedRows: 4
 NumDeletedRows: 4
 ====
 ---- QUERY
 # Merge into unpartitioned target table from the source table
 # using not matched by source clause
 merge into target using source on target.id = cast(source.id + 1 as int)
 when not matched by source then update set date_col = '2022-12-12'
 ---- DML_RESULTS: target
 4,true,222,4.40000009537,40,2009-01-01,'4 case 2'
 0,true,222,0.0,0,2022-12-12,'0 case 2'
 2,true,222,2.20000004768,20,2009-01-01,'2 case 2'
 5,NULL,5,NULL,NULL,NULL,'NULL'
 ---- TYPES
 INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
 ====
 ---- QUERY
 # Validate the number of snapshots written to target
 select count(1) snapshots from $DATABASE.target.snapshots
 ---- RESULTS
 5
 ---- TYPES
 BIGINT
 ====
 ---- QUERY
 # Validate the files written for target
 show files in target
 ---- RESULTS: VERIFY_IS_SUBSET
 row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY'
 row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY'
 row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-).*.parq','.*','','$ERASURECODE_POLICY'
 row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY'
 row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY'
 row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-.*.parq','.*','','$ERASURECODE_POLICY'
 ---- TYPES
 STRING,STRING,STRING,STRING
 ====
 ---- QUERY
 # Merge into unpartitioned target table from the source table
 # using not matched and insert * clause
 merge into target using source on target.id = source.id
 when not matched then insert *
 ---- DML_RESULTS: target
 0,true,222,0,0,2022-12-12,'0 case 2'
 1,false,1,1.100000023841858,10,2009-01-01,'1'
 2,true,222,2.200000047683716,20,2009-01-01,'2 case 2'
 3,false,3,3.299999952316284,30,2009-01-01,'3'
 4,true,222,4.400000095367432,40,2009-01-01,'4 case 2'
 5,NULL,5,NULL,NULL,NULL,'NULL'
 6,true,6,6.599999904632568,60,2009-01-01,'6'
 ---- TYPES
 INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
 ====

 ---- QUERY
 # Regression test for IMPALA-13770: using a native UDF as a filter predicate and target
 # value of a MERGE statement.
 # Note: even with the incorrect behaviour before IMPALA-13770 the test may pass and the
 # values can also be updated successfully. This is because the crash happens after the
 # query is closed, when releasing resources. This test is a valid regression test because
 # even if it passes, there will be a crash: some later queries are likely to fail and
 # there will be a minidump in the build artifacts, so the build will be marked FAILED.
 create function if not exists identity(int) returns int location 'UDF_LOCATION' symbol='Identity';
 create table merge_with_udf(int_col INT)
 stored by iceberg
 tblproperties ('format-version'='2');
 insert into merge_with_udf values (1), (2);
 ====
 ---- QUERY
 merge into merge_with_udf t
 using merge_with_udf s on s.int_col = t.int_col
 when matched and s.int_col != identity(t.int_col) + 1
   then update set int_col = identity(s.int_col)
 ---- DML_RESULTS: merge_with_udf
 1
 2
 ---- TYPES
 INT
 ---- RUNTIME_PROFILE
 NumModifiedRows: 2
 NumDeletedRows: 2
 ====
	====
	---- QUERY
	# Table creation and initial data loading
	create table target(id int, bool_col boolean, int_col int, float_col float,
	decimal_col decimal(20,0), date_col date, string_col string)
	stored as iceberg tblproperties("format-version"="2");

	create table source(id int, bool_col boolean, int_col int, float_col float,
	decimal_col decimal(20,0), date_col date, string_col string)
	stored as iceberg tblproperties("format-version"="2");

	insert into source select id, bool_col, int_col,
	float_col, cast(bigint_col as decimal(20,0)), to_date(timestamp_col),
	string_col from functional.alltypes order by id limit 7;
	====
	---- QUERY
	# Merge into unpartitioned target table from the source table
	# using when not matched insert case as the target table is empty now
	merge into target using source on target.id = source.id
	when not matched then insert values(
	source.id, source.bool_col, source.int_col,
	source.float_col, source.decimal_col,
	source.date_col, source.string_col)
	---- DML_RESULTS: target
	3,false,3,3.29999995232,30,2009-01-01,'3'
	5,false,5,5.5,50,2009-01-01,'5'
	1,false,1,1.10000002384,10,2009-01-01,'1'
	4,true,4,4.40000009537,40,2009-01-01,'4'
	0,true,0,0.0,0,2009-01-01,'0'
	6,true,6,6.59999990463,60,2009-01-01,'6'
	2,true,2,2.20000004768,20,2009-01-01,'2'
	---- TYPES
	INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
	---- RUNTIME_PROFILE
	NumModifiedRows: 7
	NumDeletedRows: 0
	====
	---- QUERY
	# Merge into unpartitioned target table from the source table
	# using update case to update int_col to a constant values
	merge into target using source on target.id = source.id
	when matched and source.id % 2 = 1 then update set int_col = 555
	when matched and source.id % 2 = 0 then update set int_col = 222
	---- DML_RESULTS: target
	3,false,555,3.29999995232,30,2009-01-01,'3'
	5,false,555,5.5,50,2009-01-01,'5'
	1,false,555,1.10000002384,10,2009-01-01,'1'
	4,true,222,4.40000009537,40,2009-01-01,'4'
	0,true,222,0.0,0,2009-01-01,'0'
	6,true,222,6.59999990463,60,2009-01-01,'6'
	2,true,222,2.20000004768,20,2009-01-01,'2'
	---- TYPES
	INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
	---- RUNTIME_PROFILE
	NumModifiedRows: 7
	NumDeletedRows: 7
	====
	---- QUERY
	# Merge into unpartitioned target table from the source table
	# Using when matched delete case to delete some values from the target table
	merge into target using source on target.id = source.id
	when matched and source.id % 2 = 1 then delete
	---- DML_RESULTS: target
	4,true,222,4.40000009537,40,2009-01-01,'4'
	0,true,222,0.0,0,2009-01-01,'0'
	6,true,222,6.59999990463,60,2009-01-01,'6'
	2,true,222,2.20000004768,20,2009-01-01,'2'
	---- TYPES
	INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
	---- RUNTIME_PROFILE
	NumModifiedRows: 0
	NumDeletedRows: 3
	====
	---- QUERY
	# Merge into unpartitioned target table from the source table
	# using multiple cases to insert/update/delete specific rows
	merge into target using source on target.id = source.id
	when matched and source.id = 6 then delete
	when matched and target.id % 2 = 0 then update set string_col = concat(source.string_col, " case 2")
	when not matched and source.id = 5 then insert (id, int_col) values (source.id, source.int_col)
	---- DML_RESULTS: target
	4,true,222,4.40000009537,40,2009-01-01,'4 case 2'
	0,true,222,0.0,0,2009-01-01,'0 case 2'
	2,true,222,2.20000004768,20,2009-01-01,'2 case 2'
	5,NULL,5,NULL,NULL,NULL,'NULL'
	---- TYPES
	INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
	---- RUNTIME_PROFILE
	NumModifiedRows: 4
	NumDeletedRows: 4
	====
	---- QUERY
	# Merge into unpartitioned target table from the source table
	# using not matched by source clause
	merge into target using source on target.id = cast(source.id + 1 as int)
	when not matched by source then update set date_col = '2022-12-12'
	---- DML_RESULTS: target
	4,true,222,4.40000009537,40,2009-01-01,'4 case 2'
	0,true,222,0.0,0,2022-12-12,'0 case 2'
	2,true,222,2.20000004768,20,2009-01-01,'2 case 2'
	5,NULL,5,NULL,NULL,NULL,'NULL'
	---- TYPES
	INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
	====
	---- QUERY
	# Validate the number of snapshots written to target
	select count(1) snapshots from $DATABASE.target.snapshots
	---- RESULTS
	5
	---- TYPES
	BIGINT
	====
	---- QUERY
	# Validate the files written for target
	show files in target
	---- RESULTS: VERIFY_IS_SUBSET
	row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-)..parq','.','','$ERASURECODE_POLICY'
	row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-)..parq','.','','$ERASURECODE_POLICY'
	row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/(?!delete-)..parq','.','','$ERASURECODE_POLICY'
	row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-..parq','.','','$ERASURECODE_POLICY'
	row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-..parq','.','','$ERASURECODE_POLICY'
	row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/target/data/delete-..parq','.','','$ERASURECODE_POLICY'
	---- TYPES
	STRING,STRING,STRING,STRING
	====
	---- QUERY
	# Merge into unpartitioned target table from the source table
	# using not matched and insert * clause
	merge into target using source on target.id = source.id
	when not matched then insert *
	---- DML_RESULTS: target
	0,true,222,0,0,2022-12-12,'0 case 2'
	1,false,1,1.100000023841858,10,2009-01-01,'1'
	2,true,222,2.200000047683716,20,2009-01-01,'2 case 2'
	3,false,3,3.299999952316284,30,2009-01-01,'3'
	4,true,222,4.400000095367432,40,2009-01-01,'4 case 2'
	5,NULL,5,NULL,NULL,NULL,'NULL'
	6,true,6,6.599999904632568,60,2009-01-01,'6'
	---- TYPES
	INT,BOOLEAN,INT,FLOAT,DECIMAL,DATE,STRING
	====

	---- QUERY
	# Regression test for IMPALA-13770: using a native UDF as a filter predicate and target
	# value of a MERGE statement.
	# Note: even with the incorrect behaviour before IMPALA-13770 the test may pass and the
	# values can also be updated successfully. This is because the crash happens after the
	# query is closed, when releasing resources. This test is a valid regression test because
	# even if it passes, there will be a crash: some later queries are likely to fail and
	# there will be a minidump in the build artifacts, so the build will be marked FAILED.
	create function if not exists identity(int) returns int location 'UDF_LOCATION' symbol='Identity';
	create table merge_with_udf(int_col INT)
	stored by iceberg
	tblproperties ('format-version'='2');
	insert into merge_with_udf values (1), (2);
	====
	---- QUERY
	merge into merge_with_udf t
	using merge_with_udf s on s.int_col = t.int_col
	when matched and s.int_col != identity(t.int_col) + 1
	then update set int_col = identity(s.int_col)
	---- DML_RESULTS: merge_with_udf
	1
	2
	---- TYPES
	INT
	---- RUNTIME_PROFILE
	NumModifiedRows: 2
	NumDeletedRows: 2
	====