blob: 8f45cb375f46131b81e2065adf2b6c964ee281cb [file] [log] [blame]
====
---- QUERY
CREATE TABLE iceberg_dict_runtime_filter (
col_1 INT,
col_2 STRING
)
STORED AS ICEBERG;
====
---- QUERY
INSERT INTO iceberg_dict_runtime_filter VALUES (1, "a");
====
---- QUERY
INSERT INTO iceberg_dict_runtime_filter VALUES (2, "b");
====
---- QUERY
# This insert will create a new file with PLAIN encoded col_2, because NULL is not present
# in parquet dictionary. This row group will be skipped before dictionary filtering.
INSERT INTO iceberg_dict_runtime_filter VALUES (3, NULL);
====
---- QUERY
# Test that runtime filters are applied on row groups.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
# 1 row group is filtered by the left hand side scanner's runtime filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter a
JOIN iceberg_dict_runtime_filter b
ON a.col_2 = b.col_2 AND b.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 1
aggregation(SUM, RowGroups total): 2
====
---- QUERY
# Test multiple filters on the same slot id.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
# 2 files (row groups) are filtered by Iceberg, the middle scanner does not filter.
# 1 row group is filtered by the left most side scanner's runtime filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter a
JOIN iceberg_dict_runtime_filter b ON a.col_2 = b.col_2 AND b.col_1 = 1
JOIN iceberg_dict_runtime_filter c ON a.col_2 = c.col_2 AND c.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 1
aggregation(SUM, RowGroups total): 4
====
---- QUERY
# Multiple columns in the join predicate, runtime filter should be skipped because only
# single slot predicates are supported.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter a
JOIN iceberg_dict_runtime_filter b
ON CONCAT(a.col_2, CAST(a.col_1 as STRING)) = b.col_2 AND b.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 0
aggregation(SUM, RowGroups total): 0
====
---- QUERY
# An expression in the join predicate, should be evaluated normally.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
# 2 row groups are filtered by the left side scanner's runtime filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter a
JOIN iceberg_dict_runtime_filter b
ON CONCAT(a.col_2, "1") = b.col_2 AND b.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 2
aggregation(SUM, RowGroups total): 2
====
---- QUERY
CREATE TABLE iceberg_dict_runtime_filter_partitioned (
col_2 STRING
)
PARTITIONED BY (col_1 INT)
STORED AS ICEBERG;
====
---- QUERY
# (NULL, 3) will create a new file with PLAIN encoded col_2, because NULL is not present
# in parquet dictionary. This row group will be skipped before dictionary filtering.
INSERT INTO iceberg_dict_runtime_filter_partitioned
PARTITION(col_1)
VALUES ("a", 1), ("b", 2), (NULL, 3);
====
---- QUERY
# Test that runtime filters are applied on row groups.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
# 1 row group is filtered by the left hand side scanner's runtime filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter_partitioned a
JOIN iceberg_dict_runtime_filter_partitioned b
ON a.col_2 = b.col_2 AND b.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 1
aggregation(SUM, RowGroups total): 2
====
---- QUERY
# Test multiple filters on the same slot id.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
# 2 files (row groups) are filtered by Iceberg, the middle scanner does not filter.
# 1 row group is filtered by the left most side scanner's runtime filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter_partitioned a
JOIN iceberg_dict_runtime_filter_partitioned b ON a.col_2 = b.col_2 AND b.col_1 = 1
JOIN iceberg_dict_runtime_filter_partitioned c ON a.col_2 = c.col_2 AND c.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 1
aggregation(SUM, RowGroups total): 4
====
---- QUERY
# Multiple columns in the join predicate, runtime filter should be skipped because only
# single slot predicates are supported.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter_partitioned a
JOIN iceberg_dict_runtime_filter_partitioned b
ON CONCAT(a.col_2, CAST(a.col_1 as STRING)) = b.col_2 AND b.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 0
aggregation(SUM, RowGroups total): 0
====
---- QUERY
# An expression in the join predicate, should be evaluated normally.
# 2 files (row groups) are filtered by Iceberg, the right side scanner does not filter.
# 2 row groups are filtered by the left side scanner's runtime filter.
SET RUNTIME_FILTER_WAIT_TIME_MS=$RUNTIME_FILTER_WAIT_TIME_MS;
SELECT * FROM iceberg_dict_runtime_filter_partitioned a
JOIN iceberg_dict_runtime_filter_partitioned b
ON CONCAT(a.col_2, "1") = b.col_2 AND b.col_1 = 1;
---- RUNTIME_PROFILE
aggregation(SUM, NumDictFilteredRowGroups): 2
aggregation(SUM, RowGroups total): 2
====