blob: 15ab6f2d14935071e87ca72967cedaf0c2d6b9db [file]
====
---- QUERY
# Identity partition - deletion vector created in the correct partition directory
CREATE TABLE ice_v3_dv_id_part (id int, cat string, val int)
PARTITIONED BY SPEC (cat)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_id_part VALUES
(1, 'A', 10), (2, 'A', 20), (3, 'B', 30), (4, 'B', 40), (5, 'C', 50);
DELETE FROM ice_v3_dv_id_part WHERE cat = 'A' AND id = 1;
SELECT * FROM ice_v3_dv_id_part ORDER BY id;
---- RESULTS
2,'A',20
3,'B',30
4,'B',40
5,'C',50
---- TYPES
INT,STRING,INT
====
---- QUERY
# After the first DELETE (only partition 'A' touched), we have 3 data files (one per
# identity partition A/B/C) and 1 DV file for the affected partition.
SELECT content, file_format, referenced_data_file IS NOT NULL, content_offset IS NOT NULL
FROM $DATABASE.ice_v3_dv_id_part.`files`
ORDER BY content, file_format;
---- RESULTS
0,'PARQUET',false,false
0,'PARQUET',false,false
0,'PARQUET',false,false
1,'PUFFIN',true,true
---- TYPES
INT,STRING,BOOLEAN,BOOLEAN
====
---- QUERY
# Second delete in the same partition - DV merge within a single partition
DELETE FROM ice_v3_dv_id_part WHERE cat = 'A' AND id = 2;
SELECT * FROM ice_v3_dv_id_part ORDER BY id;
---- RESULTS
3,'B',30
4,'B',40
5,'C',50
---- TYPES
INT,STRING,INT
====
---- QUERY
# Cross-partition delete - deletion vectors created per partition
DELETE FROM ice_v3_dv_id_part WHERE id IN (3, 5);
SELECT * FROM ice_v3_dv_id_part ORDER BY id;
---- RESULTS
4,'B',40
---- TYPES
INT,STRING,INT
====
---- QUERY
# Delete remaining partition to empty the table, verify count is zero
DELETE FROM ice_v3_dv_id_part WHERE cat = 'B';
SELECT count(*) FROM ice_v3_dv_id_part;
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Truncate partition transform - rows sharing a truncated key go to same partition
CREATE TABLE ice_v3_dv_trunc_part (id int, name string, val int)
PARTITIONED BY SPEC (truncate(2, name))
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_trunc_part VALUES
(1, 'apple', 10), (2, 'apricot', 20), (3, 'banana', 30), (4, 'berry', 40), (5, 'cherry', 50);
# 'apple' and 'apricot' share truncated key 'ap'; deleting by LIKE hits the same partition
DELETE FROM ice_v3_dv_trunc_part WHERE name LIKE 'ap%';
SELECT * FROM ice_v3_dv_trunc_part ORDER BY id;
---- RESULTS
3,'banana',30
4,'berry',40
5,'cherry',50
---- TYPES
INT,STRING,INT
====
---- QUERY
# Second delete in the truncated partition - DV merge on truncated partition
DELETE FROM ice_v3_dv_trunc_part WHERE name LIKE 'ba%';
SELECT * FROM ice_v3_dv_trunc_part ORDER BY id;
---- RESULTS
4,'berry',40
5,'cherry',50
---- TYPES
INT,STRING,INT
====
---- QUERY
# Bucket partition - deletes distribute across buckets
CREATE TABLE ice_v3_dv_bucket_part (id int, hash_col int, string_data string)
PARTITIONED BY SPEC (bucket(5, hash_col))
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_bucket_part VALUES
(1, 10, 'a'), (2, 20, 'b'), (3, 30, 'c'), (4, 40, 'd'), (5, 50, 'e');
DELETE FROM ice_v3_dv_bucket_part WHERE hash_col IN (20, 40);
SELECT * FROM ice_v3_dv_bucket_part ORDER BY id;
---- RESULTS
1,10,'a'
3,30,'c'
5,50,'e'
---- TYPES
INT,INT,STRING
====
---- QUERY
# Second delete - cumulative DV merge across buckets
DELETE FROM ice_v3_dv_bucket_part WHERE id = 3;
SELECT * FROM ice_v3_dv_bucket_part ORDER BY id;
---- RESULTS
1,10,'a'
5,50,'e'
---- TYPES
INT,INT,STRING
====
---- QUERY
# Multi-column partition spec - deletion vectors per (col1, col2) partition cell
CREATE TABLE ice_v3_dv_multi_part (id int, region string, yr int, val int)
PARTITIONED BY SPEC (region, bucket(3, yr))
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_multi_part VALUES
(1, 'US', 2022, 100), (2, 'US', 2023, 200),
(3, 'EU', 2022, 300), (4, 'EU', 2023, 400),
(5, 'AP', 2022, 500);
# Partition-pruning delete: only touches EU partitions
DELETE FROM ice_v3_dv_multi_part WHERE region = 'EU';
SELECT * FROM ice_v3_dv_multi_part ORDER BY id;
---- RESULTS
1,'US',2022,100
2,'US',2023,200
5,'AP',2022,500
---- TYPES
INT,STRING,INT,INT
====
---- QUERY
# Cross-partition delete on non-partition column
DELETE FROM ice_v3_dv_multi_part WHERE yr = 2022;
SELECT * FROM ice_v3_dv_multi_part ORDER BY id;
---- RESULTS
2,'US',2023,200
---- TYPES
INT,STRING,INT,INT
====
---- QUERY
# Temporal partition (DAY transform) - delete within and across day partitions
CREATE TABLE ice_v3_dv_day_part (id int, ts timestamp, payload string)
PARTITIONED BY SPEC (day(ts))
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_day_part VALUES
(1, '2024-01-01 08:00:00', 'a'),
(2, '2024-01-01 20:00:00', 'b'),
(3, '2024-01-02 10:00:00', 'c'),
(4, '2024-01-03 12:00:00', 'd');
DELETE FROM ice_v3_dv_day_part WHERE id IN (1, 3);
SELECT * FROM ice_v3_dv_day_part ORDER BY id;
---- RESULTS
2,2024-01-01 20:00:00,'b'
4,2024-01-03 12:00:00,'d'
---- TYPES
INT,TIMESTAMP,STRING
====
---- QUERY
# YEAR partition transform - multiple rows per year, DV merge across years
CREATE TABLE ice_v3_dv_year_part (id int, event_date date, amount decimal(10,2))
PARTITIONED BY SPEC (year(event_date))
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_year_part VALUES
(1, '2022-03-15', 100.00),
(2, '2022-09-20', 200.00),
(3, '2023-01-10', 300.00),
(4, '2023-11-05', 400.00),
(5, '2024-06-01', 500.00);
DELETE FROM ice_v3_dv_year_part WHERE year(event_date) = 2022;
SELECT * FROM ice_v3_dv_year_part ORDER BY id;
---- RESULTS
3,2023-01-10,300.00
4,2023-11-05,400.00
5,2024-06-01,500.00
---- TYPES
INT,DATE,DECIMAL
====
---- QUERY
# Second delete - merges DV with prior vector in 2023 partition
DELETE FROM ice_v3_dv_year_part WHERE id = 4;
SELECT count(*) FROM ice_v3_dv_year_part;
---- RESULTS
2
---- TYPES
BIGINT
====
---- QUERY
# NULL partition values with deletion vectors
CREATE TABLE ice_v3_dv_null_part (id int, part_key string, val int)
PARTITIONED BY SPEC (part_key)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_null_part VALUES
(1, 'X', 10), (2, NULL, 20), (3, 'X', 30), (4, NULL, 40);
# Delete from NULL partition
DELETE FROM ice_v3_dv_null_part WHERE part_key IS NULL;
SELECT * FROM ice_v3_dv_null_part ORDER BY id;
---- RESULTS
1,'X',10
3,'X',30
---- TYPES
INT,STRING,INT
====
---- QUERY
# NULL-partition delete: 2 data files (partition 'X' and NULL partition) + 1 DV for the NULL partition
SELECT content, file_format, referenced_data_file IS NOT NULL, content_offset IS NOT NULL
FROM $DATABASE.ice_v3_dv_null_part.`files`
ORDER BY content, file_format;
---- RESULTS
0,'PARQUET',false,false
0,'PARQUET',false,false
1,'PUFFIN',true,true
---- TYPES
INT,STRING,BOOLEAN,BOOLEAN
====
---- QUERY
# Aggregate correctness after partition-specific deletes
CREATE TABLE ice_v3_dv_agg_part (region string, category string, sales int)
PARTITIONED BY SPEC (region)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_agg_part VALUES
('US', 'A', 100), ('US', 'A', 200), ('US', 'B', 150),
('EU', 'A', 300), ('EU', 'B', 50), ('EU', 'B', 75);
DELETE FROM ice_v3_dv_agg_part WHERE region = 'US' AND sales < 150;
SELECT region, category, sum(sales), count(*)
FROM ice_v3_dv_agg_part
GROUP BY region, category
ORDER BY region, category;
---- RESULTS
'EU','A',300,1
'EU','B',125,2
'US','A',200,1
'US','B',150,1
---- TYPES
STRING,STRING,BIGINT,BIGINT
====
---- QUERY
# Large-scale partitioned delete - stress RoaringBitmap per-partition
CREATE TABLE ice_v3_dv_large_part (id bigint, part_key int)
PARTITIONED BY SPEC (bucket(10, part_key))
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_large_part
SELECT id, id % 100
FROM functional.alltypes
ORDER BY id LIMIT 5000;
# Delete roughly half the rows spread across all 10 buckets
DELETE FROM ice_v3_dv_large_part WHERE id % 2 = 0;
SELECT count(*) FROM ice_v3_dv_large_part;
---- RESULTS
2500
---- TYPES
BIGINT
====
---- QUERY
# Second large delete - DV merge per partition bucket
DELETE FROM ice_v3_dv_large_part WHERE id % 3 = 0;
SELECT count(*) FROM ice_v3_dv_large_part;
---- RESULTS
1667
---- TYPES
BIGINT
====
---- QUERY
# Basic SORT BY (linear sort) - deletion vectors on a sorted table
CREATE TABLE ice_v3_dv_sorted (id int, score int, label string)
SORT BY (score)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_sorted VALUES
(1, 85, 'good'), (2, 90, 'great'), (3, 75, 'ok'), (4, 95, 'excellent'), (5, 60, 'poor');
DELETE FROM ice_v3_dv_sorted WHERE score < 80;
SELECT * FROM ice_v3_dv_sorted ORDER BY id;
---- RESULTS
1,85,'good'
2,90,'great'
4,95,'excellent'
---- TYPES
INT,INT,STRING
====
---- QUERY
# Unpartitioned sorted table: 1 data file + 1 DV file after first delete
SELECT content, file_format, referenced_data_file IS NOT NULL, content_offset IS NOT NULL
FROM $DATABASE.ice_v3_dv_sorted.`files`
ORDER BY content;
---- RESULTS
0,'PARQUET',false,false
1,'PUFFIN',true,true
---- TYPES
INT,STRING,BOOLEAN,BOOLEAN
====
---- QUERY
# Second delete - DV merge on sorted file
DELETE FROM ice_v3_dv_sorted WHERE id = 2;
SELECT * FROM ice_v3_dv_sorted ORDER BY id;
---- RESULTS
1,85,'good'
4,95,'excellent'
---- TYPES
INT,INT,STRING
====
---- QUERY
# Multi-column SORT BY - deletion vector covers rows sorted by (cat, score)
CREATE TABLE ice_v3_dv_multi_sort (id int, cat string, score int, ts timestamp)
SORT BY (cat, score)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_multi_sort VALUES
(1, 'A', 85, '2024-01-01 10:00:00'),
(2, 'A', 90, '2024-01-01 11:00:00'),
(3, 'A', 70, '2024-01-01 12:00:00'),
(4, 'B', 80, '2024-01-02 09:00:00'),
(5, 'B', 95, '2024-01-02 10:00:00');
# Delete rows from the middle of the sort order
DELETE FROM ice_v3_dv_multi_sort WHERE cat = 'A' AND score < 90;
SELECT * FROM ice_v3_dv_multi_sort ORDER BY id;
---- RESULTS
2,'A',90,2024-01-01 11:00:00
4,'B',80,2024-01-02 09:00:00
5,'B',95,2024-01-02 10:00:00
---- TYPES
INT,STRING,INT,TIMESTAMP
====
---- QUERY
# Second delete across both sort-key groups
DELETE FROM ice_v3_dv_multi_sort WHERE score > 85;
SELECT * FROM ice_v3_dv_multi_sort ORDER BY id;
---- RESULTS
4,'B',80,2024-01-02 09:00:00
---- TYPES
INT,STRING,INT,TIMESTAMP
====
---- QUERY
# SORT BY ZORDER - deletion vectors on a Z-order-sorted table
CREATE TABLE ice_v3_dv_zorder (id int, x int, y int, string_data string)
SORT BY ZORDER (x, y)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_zorder VALUES
(1, 10, 20, 'a'), (2, 15, 25, 'b'), (3, 10, 30, 'c'),
(4, 20, 20, 'd'), (5, 20, 30, 'e');
DELETE FROM ice_v3_dv_zorder WHERE x <= 10;
SELECT * FROM ice_v3_dv_zorder ORDER BY id;
---- RESULTS
2,15,25,'b'
4,20,20,'d'
5,20,30,'e'
---- TYPES
INT,INT,INT,STRING
====
---- QUERY
# Z-order sorted unpartitioned table: 1 data file + 1 DV file after first delete
SELECT content, file_format, referenced_data_file IS NOT NULL, content_offset IS NOT NULL
FROM $DATABASE.ice_v3_dv_zorder.`files`
ORDER BY content;
---- RESULTS
0,'PARQUET',false,false
1,'PUFFIN',true,true
---- TYPES
INT,STRING,BOOLEAN,BOOLEAN
====
---- QUERY
# Second ZORDER delete - DV merge with previous vector
DELETE FROM ice_v3_dv_zorder WHERE y > 25;
SELECT * FROM ice_v3_dv_zorder ORDER BY id;
---- RESULTS
2,15,25,'b'
4,20,20,'d'
---- TYPES
INT,INT,INT,STRING
====
---- QUERY
# SORT BY ZORDER with more dimensions and cumulative deletes
CREATE TABLE ice_v3_dv_zorder3d (id int, a int, b int, c int)
SORT BY ZORDER (a, b, c)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_zorder3d VALUES
(1, 1, 1, 1), (2, 1, 2, 3), (3, 2, 1, 4),
(4, 2, 3, 1), (5, 3, 2, 2), (6, 3, 3, 3);
DELETE FROM ice_v3_dv_zorder3d WHERE a = 1;
DELETE FROM ice_v3_dv_zorder3d WHERE b = 3;
SELECT * FROM ice_v3_dv_zorder3d ORDER BY id;
---- RESULTS
3,2,1,4
5,3,2,2
---- TYPES
INT,INT,INT,INT
====
---- QUERY
# SORT BY on a timestamp column - verify correctness after deletes
CREATE TABLE ice_v3_dv_ts_sort (id int, created timestamp, status string)
SORT BY (created)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_ts_sort VALUES
(1, '2024-01-01 08:00:00', 'active'),
(2, '2024-01-02 09:00:00', 'inactive'),
(3, '2024-01-03 10:00:00', 'active'),
(4, '2024-01-04 11:00:00', 'inactive');
DELETE FROM ice_v3_dv_ts_sort WHERE status = 'inactive';
SELECT * FROM ice_v3_dv_ts_sort ORDER BY id;
---- RESULTS
1,2024-01-01 08:00:00,'active'
3,2024-01-03 10:00:00,'active'
---- TYPES
INT,TIMESTAMP,STRING
====
---- QUERY
# Sequential inserts and deletes on sorted table - multiple data files with DVs
CREATE TABLE ice_v3_dv_sort_seq (id int, batch int, val int)
SORT BY (val)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_sort_seq VALUES (1, 1, 100), (2, 1, 200), (3, 1, 300);
DELETE FROM ice_v3_dv_sort_seq WHERE val = 200;
INSERT INTO ice_v3_dv_sort_seq VALUES (4, 2, 150), (5, 2, 250), (6, 2, 350);
DELETE FROM ice_v3_dv_sort_seq WHERE val > 300;
SELECT * FROM ice_v3_dv_sort_seq ORDER BY id;
---- RESULTS
1,1,100
3,1,300
4,2,150
5,2,250
---- TYPES
INT,INT,INT
====
---- QUERY
# Identity partition + single-column sort - per-partition DVs on sorted data
CREATE TABLE ice_v3_dv_part_sort (id int, region string, score int, label string)
PARTITIONED BY SPEC (region)
SORT BY (score)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_part_sort VALUES
(1, 'US', 85, 'a'), (2, 'US', 90, 'b'), (3, 'US', 70, 'c'),
(4, 'EU', 80, 'd'), (5, 'EU', 95, 'e'), (6, 'EU', 60, 'f');
# Delete within a single partition, from a non-boundary position in sort order
DELETE FROM ice_v3_dv_part_sort WHERE region = 'US' AND score < 85;
SELECT * FROM ice_v3_dv_part_sort ORDER BY id;
---- RESULTS
1,'US',85,'a'
2,'US',90,'b'
4,'EU',80,'d'
5,'EU',95,'e'
6,'EU',60,'f'
---- TYPES
INT,STRING,INT,STRING
====
---- QUERY
# Identity-partitioned sorted table: 2 data files (US and EU) + 1 DV for the US partition
SELECT content, file_format, referenced_data_file IS NOT NULL, content_offset IS NOT NULL
FROM $DATABASE.ice_v3_dv_part_sort.`files`
ORDER BY content, file_format;
---- RESULTS
0,'PARQUET',false,false
0,'PARQUET',false,false
1,'PUFFIN',true,true
---- TYPES
INT,STRING,BOOLEAN,BOOLEAN
====
---- QUERY
# Cross-partition delete - DVs created in both partition directories
DELETE FROM ice_v3_dv_part_sort WHERE score > 88;
SELECT * FROM ice_v3_dv_part_sort ORDER BY id;
---- RESULTS
1,'US',85,'a'
4,'EU',80,'d'
6,'EU',60,'f'
---- TYPES
INT,STRING,INT,STRING
====
---- QUERY
# Aggregate correctness after partition+sort deletes
SELECT region, avg(score), count(*)
FROM ice_v3_dv_part_sort
GROUP BY region
ORDER BY region;
---- RESULTS
'EU',70.0,2
'US',85.0,1
---- TYPES
STRING,DOUBLE,BIGINT
====
---- QUERY
# Bucket partition + multi-column sort - DV interacts with both dimensions
CREATE TABLE ice_v3_dv_bucket_msort (id int, user_id int, category string, amount double)
PARTITIONED BY SPEC (bucket(4, user_id))
SORT BY (category, amount)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_bucket_msort VALUES
(1, 100, 'A', 1.5), (2, 100, 'B', 2.5), (3, 200, 'A', 3.5),
(4, 200, 'B', 4.5), (5, 300, 'A', 5.5), (6, 300, 'B', 6.5);
DELETE FROM ice_v3_dv_bucket_msort WHERE category = 'A' AND amount < 4.0;
SELECT * FROM ice_v3_dv_bucket_msort ORDER BY id;
---- RESULTS
2,100,'B',2.5
4,200,'B',4.5
5,300,'A',5.5
6,300,'B',6.5
---- TYPES
INT,INT,STRING,DOUBLE
====
---- QUERY
# Second delete - DV merge for buckets already written in previous delete
DELETE FROM ice_v3_dv_bucket_msort WHERE user_id = 200;
SELECT * FROM ice_v3_dv_bucket_msort ORDER BY id;
---- RESULTS
2,100,'B',2.5
5,300,'A',5.5
6,300,'B',6.5
---- TYPES
INT,INT,STRING,DOUBLE
====
---- QUERY
# Truncate partition + SORT BY - truncated keys route rows; sort order is
# per-file within each truncated partition
CREATE TABLE ice_v3_dv_trunc_sort (id int, name string, val int)
PARTITIONED BY SPEC (truncate(3, name))
SORT BY (val)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_trunc_sort VALUES
(1, 'alpha', 30), (2, 'almond', 10), (3, 'beta', 50),
(4, 'belong', 20), (5, 'gamma', 40);
# 'alpha' and 'almond' share truncated key 'alm'/'alp'... truncate(3,'alpha')='alp', truncate(3,'almond')='alm'
# delete within two different truncated partitions
DELETE FROM ice_v3_dv_trunc_sort WHERE val IN (10, 50);
SELECT * FROM ice_v3_dv_trunc_sort ORDER BY id;
---- RESULTS
1,'alpha',30
4,'belong',20
5,'gamma',40
---- TYPES
INT,STRING,INT
====
---- QUERY
# Year partition + SORT BY - time-series pattern
CREATE TABLE ice_v3_dv_year_sort (id int, event_date date, metric string, value int)
PARTITIONED BY SPEC (year(event_date))
SORT BY (event_date, metric)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_year_sort VALUES
(1, '2022-03-01', 'cpu', 80),
(2, '2022-07-15', 'mem', 60),
(3, '2023-02-10', 'cpu', 70),
(4, '2023-08-20', 'mem', 90),
(5, '2024-01-05', 'cpu', 50);
DELETE FROM ice_v3_dv_year_sort WHERE year(event_date) = 2022 AND metric = 'cpu';
SELECT * FROM ice_v3_dv_year_sort ORDER BY id;
---- RESULTS
2,2022-07-15,'mem',60
3,2023-02-10,'cpu',70
4,2023-08-20,'mem',90
5,2024-01-05,'cpu',50
---- TYPES
INT,DATE,STRING,INT
====
---- QUERY
# Cross-year delete - multiple partition DVs created in one statement
DELETE FROM ice_v3_dv_year_sort WHERE metric = 'mem';
SELECT * FROM ice_v3_dv_year_sort ORDER BY id;
---- RESULTS
3,2023-02-10,'cpu',70
5,2024-01-05,'cpu',50
---- TYPES
INT,DATE,STRING,INT
====
---- QUERY
# Multi-column partition + ZORDER sort - most complex structural combination
CREATE TABLE ice_v3_dv_mc_part_zorder (
id int, country string, yr int, x int, y int, val double)
PARTITIONED BY SPEC (country, bucket(3, yr))
SORT BY ZORDER (x, y)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_mc_part_zorder VALUES
(1, 'US', 2023, 10, 10, 1.1),
(2, 'US', 2023, 20, 20, 2.2),
(3, 'US', 2024, 10, 30, 3.3),
(4, 'EU', 2023, 15, 15, 4.4),
(5, 'EU', 2024, 25, 25, 5.5);
DELETE FROM ice_v3_dv_mc_part_zorder WHERE country = 'US' AND x <= 10;
SELECT * FROM ice_v3_dv_mc_part_zorder ORDER BY id;
---- RESULTS
2,'US',2023,20,20,2.2
4,'EU',2023,15,15,4.4
5,'EU',2024,25,25,5.5
---- TYPES
INT,STRING,INT,INT,INT,DOUBLE
====
---- QUERY
# Cross-partition ZORDER delete
DELETE FROM ice_v3_dv_mc_part_zorder WHERE y > 20;
SELECT * FROM ice_v3_dv_mc_part_zorder ORDER BY id;
---- RESULTS
2,'US',2023,20,20,2.2
4,'EU',2023,15,15,4.4
---- TYPES
INT,STRING,INT,INT,INT,DOUBLE
====
---- QUERY
# Cumulative DV merges - partition+sort table with many sequential deletes
CREATE TABLE ice_v3_dv_cumulative (id int, grp string, sort_key int, string_data string)
PARTITIONED BY SPEC (grp)
SORT BY (sort_key)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_cumulative VALUES
(1, 'X', 10, 'a'), (2, 'X', 20, 'b'), (3, 'X', 30, 'c'),
(4, 'Y', 10, 'd'), (5, 'Y', 20, 'e'), (6, 'Y', 30, 'f');
# Delete 1
DELETE FROM ice_v3_dv_cumulative WHERE sort_key = 10;
SELECT count(*) FROM ice_v3_dv_cumulative;
---- RESULTS
4
---- TYPES
BIGINT
====
---- QUERY
# First cumulative delete hits both partitions (X and Y): 2 data files + 2 DV files
SELECT content, file_format, referenced_data_file IS NOT NULL, content_offset IS NOT NULL
FROM $DATABASE.ice_v3_dv_cumulative.`files`
ORDER BY content, file_format;
---- RESULTS
0,'PARQUET',false,false
0,'PARQUET',false,false
1,'PUFFIN',true,true
1,'PUFFIN',true,true
---- TYPES
INT,STRING,BOOLEAN,BOOLEAN
====
---- QUERY
# Delete 2 - merges with existing DV per partition
DELETE FROM ice_v3_dv_cumulative WHERE sort_key = 30;
SELECT count(*) FROM ice_v3_dv_cumulative;
---- RESULTS
2
---- TYPES
BIGINT
====
---- QUERY
# Delete 3 - removes the remaining row in one partition entirely
DELETE FROM ice_v3_dv_cumulative WHERE grp = 'X';
SELECT * FROM ice_v3_dv_cumulative ORDER BY id;
---- RESULTS
5,'Y',20,'e'
---- TYPES
INT,STRING,INT,STRING
====
---- QUERY
# Sequential inserts between deletes on partitioned+sorted table
CREATE TABLE ice_v3_dv_ps_seq (id int, part_key string, sort_key int)
PARTITIONED BY SPEC (part_key)
SORT BY (sort_key)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_ps_seq VALUES (1, 'A', 100), (2, 'A', 200), (3, 'B', 100);
DELETE FROM ice_v3_dv_ps_seq WHERE sort_key = 200;
INSERT INTO ice_v3_dv_ps_seq VALUES (4, 'A', 300), (5, 'B', 200);
DELETE FROM ice_v3_dv_ps_seq WHERE sort_key = 100;
SELECT * FROM ice_v3_dv_ps_seq ORDER BY id;
---- RESULTS
4,'A',300
5,'B',200
---- TYPES
INT,STRING,INT
====
---- QUERY
# DELETE with subquery on partitioned+sorted table
CREATE TABLE ice_v3_dv_ps_sub (id int, part_key string, value int)
PARTITIONED BY SPEC (part_key)
SORT BY (value)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_ps_sub VALUES
(1, 'A', 10), (2, 'A', 50), (3, 'B', 20), (4, 'B', 80);
DELETE FROM ice_v3_dv_ps_sub
WHERE id IN (SELECT id FROM ice_v3_dv_ps_sub WHERE value > 40);
SELECT * FROM ice_v3_dv_ps_sub ORDER BY id;
---- RESULTS
1,'A',10
3,'B',20
---- TYPES
INT,STRING,INT
====
---- QUERY
# FILE__POSITION delete on partitioned+sorted table
CREATE TABLE ice_v3_dv_ps_filepos (id int, part_key string, sort_key int)
PARTITIONED BY SPEC (part_key)
SORT BY (sort_key)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_ps_filepos VALUES (10, 'A', 300), (20, 'A', 100), (30, 'B', 200);
# FILE__POSITION=0 refers to the first record in a file; on sorted data that is the
# row with the lowest sort_key written by the INSERT above ('A',100)
DELETE FROM ice_v3_dv_ps_filepos WHERE part_key = 'A' AND FILE__POSITION = 0;
SELECT * FROM ice_v3_dv_ps_filepos ORDER BY id;
---- RESULTS: VERIFY_IS_SUBSET
10,'A',300
30,'B',200
---- TYPES
INT,STRING,INT
====
---- QUERY
# Large-scale delete on partitioned+sorted table - RoaringBitmap at scale
CREATE TABLE ice_v3_dv_ps_large (id bigint, bucket_key int, sort_key int)
PARTITIONED BY SPEC (bucket(8, bucket_key))
SORT BY (sort_key)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_ps_large
SELECT id, id % 80, id % 500
FROM functional.alltypes
ORDER BY id LIMIT 8000;
# Delete about half the rows across all 8 buckets
DELETE FROM ice_v3_dv_ps_large WHERE id % 2 = 0;
SELECT count(*) FROM ice_v3_dv_ps_large;
---- RESULTS
3650
---- TYPES
BIGINT
====
---- QUERY
# Second large batch delete - DV merge per bucket partition
DELETE FROM ice_v3_dv_ps_large WHERE sort_key < 100;
SELECT count(*) FROM ice_v3_dv_ps_large;
---- RESULTS
2900
---- TYPES
BIGINT
====
---- QUERY
# Partition evolution from unpartitioned to partitioned+sorted
CREATE TABLE ice_v3_dv_evolve (id int, cat string, val int)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
# Phase 1: unpartitioned, no sort
INSERT INTO ice_v3_dv_evolve VALUES (1, 'A', 10), (2, 'B', 20), (3, 'A', 30);
DELETE FROM ice_v3_dv_evolve WHERE id = 2;
SELECT * FROM ice_v3_dv_evolve ORDER BY id;
---- RESULTS
1,'A',10
3,'A',30
---- TYPES
INT,STRING,INT
====
---- QUERY
ALTER TABLE ice_v3_dv_evolve SET PARTITION SPEC (cat);
# Phase 2: new inserts go into identity partition; old unpartitioned data still readable
INSERT INTO ice_v3_dv_evolve VALUES (4, 'A', 40), (5, 'B', 50);
DELETE FROM ice_v3_dv_evolve WHERE val = 40;
SELECT * FROM ice_v3_dv_evolve ORDER BY id;
---- RESULTS
1,'A',10
3,'A',30
5,'B',50
---- TYPES
INT,STRING,INT
====
---- QUERY
# JOIN between two partitioned+sorted v3 tables - DVs applied on both sides
CREATE TABLE ice_v3_dv_ps_join1 (id int, grp string, score int)
PARTITIONED BY SPEC (grp)
SORT BY (score)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
CREATE TABLE ice_v3_dv_ps_join2 (id int, grp string, detail string)
PARTITIONED BY SPEC (grp)
SORT BY (id)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_ps_join1 VALUES
(1, 'X', 80), (2, 'X', 90), (3, 'Y', 70), (4, 'Y', 95);
INSERT INTO ice_v3_dv_ps_join2 VALUES
(1, 'X', 'detail_1'), (2, 'X', 'detail_2'), (3, 'Y', 'detail_3'), (4, 'Y', 'detail_4');
DELETE FROM ice_v3_dv_ps_join1 WHERE score < 80;
DELETE FROM ice_v3_dv_ps_join2 WHERE id IN (2, 4);
SELECT j1.id, j1.grp, j1.score, j2.detail
FROM ice_v3_dv_ps_join1 j1
JOIN ice_v3_dv_ps_join2 j2 ON j1.id = j2.id
ORDER BY j1.id;
---- RESULTS
1,'X',80,'detail_1'
---- TYPES
INT,STRING,INT,STRING
====
---- QUERY
# Window function correctness after partition+sort deletes
CREATE TABLE ice_v3_dv_ps_window (id int, grp string, sort_key int, val int)
PARTITIONED BY SPEC (grp)
SORT BY (sort_key)
STORED BY ICEBERG
TBLPROPERTIES ('format-version'='3');
INSERT INTO ice_v3_dv_ps_window VALUES
(1, 'A', 1, 10), (2, 'A', 2, 20), (3, 'A', 3, 30),
(4, 'B', 1, 40), (5, 'B', 2, 50);
DELETE FROM ice_v3_dv_ps_window WHERE id IN (1, 4);
SELECT id, grp, val,
row_number() OVER (PARTITION BY grp ORDER BY sort_key) AS rn
FROM ice_v3_dv_ps_window
ORDER BY id;
---- RESULTS
2,'A',20,1
3,'A',30,2
5,'B',50,1
---- TYPES
INT,STRING,INT,BIGINT
====