| ==== |
| ---- QUERY |
| # Test Iceberg V3 default values across multiple file formats |
| # |
| # This test covers: |
| # - Parquet: Full read (initial-default) |
| # - ORC: Read (initial-default) support only |
| # - Avro: Read (initial-default) support only |
| # |
| # Note: Impala only supports writing to Parquet format Iceberg tables. |
| # ORC and Avro formats are read-only for Iceberg in Impala. |
| |
| # Test initial-default values for Parquet format |
| # Column 'j' has initial-default = -1 and write-default = -999 |
| # First row: i=1, j is missing (should use initial-default -1) |
| # Second row: i=2, j=2 (explicit value) |
| SELECT * FROM iceberg_v3_default_value ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test initial-default values for ORC format |
| SELECT * FROM iceberg_v3_default_value_orc ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test initial-default values for Avro format |
| SELECT * FROM iceberg_v3_default_value_avro ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Parquet: Filter exactly on the initial-default value |
| SELECT i, j FROM iceberg_v3_default_value WHERE j = -1 ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # ORC: Filter exactly on the initial-default value |
| SELECT i, j FROM iceberg_v3_default_value_orc WHERE j = -1 ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Avro: Filter exactly on the initial-default value |
| SELECT i, j FROM iceberg_v3_default_value_avro WHERE j = -1 ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Parquet: Filter exactly on explicit value |
| SELECT i, j FROM iceberg_v3_default_value WHERE j > 0 ORDER BY i; |
| ---- RESULTS |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # ORC: Filter exactly on explicit value |
| SELECT i, j FROM iceberg_v3_default_value_orc WHERE j > 0 ORDER BY i; |
| ---- RESULTS |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Avro: Filter exactly on explicit value |
| SELECT i, j FROM iceberg_v3_default_value_avro WHERE j > 0 ORDER BY i; |
| ---- RESULTS |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Parquet: initial-default is a real INT, not SQL NULL — no rows match j IS NULL |
| SELECT i, j FROM iceberg_v3_default_value WHERE j IS NULL ORDER BY i; |
| ---- RESULTS |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # ORC: no NULL j after default fill |
| SELECT i, j FROM iceberg_v3_default_value_orc WHERE j IS NULL ORDER BY i; |
| ---- RESULTS |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Avro: no NULL j after default fill |
| SELECT i, j FROM iceberg_v3_default_value_avro WHERE j IS NULL ORDER BY i; |
| ---- RESULTS |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Parquet: verify all materialized defaults and explicitly stored values evaluate to NOT NULL |
| SELECT i, j FROM iceberg_v3_default_value WHERE j IS NOT NULL ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # ORC: verify NOT NULL evaluation |
| SELECT i, j FROM iceberg_v3_default_value_orc WHERE j IS NOT NULL ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Avro: verify NOT NULL evaluation |
| SELECT i, j FROM iceberg_v3_default_value_avro WHERE j IS NOT NULL ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| 2,2 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Parquet: Compound predicate pushdown testing |
| SELECT i, j FROM iceberg_v3_default_value WHERE i = 1 AND j = -1; |
| ---- RESULTS |
| 1,-1 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # ORC: Compound predicate pushdown testing |
| SELECT i, j FROM iceberg_v3_default_value_orc WHERE i = 1 AND j = -1; |
| ---- RESULTS |
| 1,-1 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Avro: Compound predicate pushdown testing |
| SELECT i, j FROM iceberg_v3_default_value_avro WHERE i = 1 AND j = -1; |
| ---- RESULTS |
| 1,-1 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Time travel — first snapshot: schema had only i |
| SELECT * FROM iceberg_v3_default_value FOR SYSTEM_VERSION AS OF 7752776210164022855; |
| ---- RESULTS |
| 1 |
| ---- TYPES |
| INT |
| ==== |
| ---- QUERY |
| # Time travel — ORC first snapshot (schema without j) |
| SELECT * FROM iceberg_v3_default_value_orc FOR SYSTEM_VERSION AS OF 1536392834658079256; |
| ---- RESULTS |
| 1 |
| ---- TYPES |
| INT |
| ==== |
| ---- QUERY |
| # Time travel — Avro first snapshot (schema without j) |
| SELECT * FROM iceberg_v3_default_value_avro FOR SYSTEM_VERSION AS OF 2819462914350205316; |
| ---- RESULTS |
| 1 |
| ---- TYPES |
| INT |
| ==== |
| ---- QUERY |
| # Tests Iceberg V3 `initial-default` read projection against partition evolution. |
| # Validates that the scanner correctly projects the read default value for older unpartitioned files, |
| # while successfully overriding it with partition key values (or explicit NULLs) for newly partitioned data files. |
| |
| # Table was created initially with column 'i' and records (1), (2) were inserted. |
| # It was then partitioned on the newly added column 'j'. Has initial-default = -1 and write-default = -999 |
| # Three new records (3,5), (4,10) & (5,NULL) were inserted. |
| # For row: i=1 & i=2, j is missing (should use initial-default -1) |
| # For others, it must use the partition key value, explicit NULL |
| SELECT * FROM test_default_part ORDER BY i; |
| ---- RESULTS |
| 1,-1 |
| 2,-1 |
| 3,5 |
| 4,10 |
| 5,NULL |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test Case 2: Filter on the Default Value |
| # This validates that partition pruning doesn't accidentally skip old files |
| # that lack partition metadata for j. |
| SELECT i FROM test_default_part WHERE j = -1 ORDER BY i; |
| ---- RESULTS |
| 1 |
| 2 |
| ---- TYPES |
| INT |
| ==== |
| ---- QUERY |
| # Test Case 3: Filter on the Partition Key |
| # This tests that expression evaluation works correctly on the newly partitioned |
| # data and doesn't get confused by the initial default. |
| SELECT i FROM test_default_part WHERE j = 5; |
| ---- RESULTS |
| 3 |
| ---- TYPES |
| INT |
| ==== |
| ---- QUERY |
| # Test Case 4: Aggregation and Grouping |
| SELECT j, COUNT(i) as row_count |
| FROM test_default_part |
| GROUP BY j |
| ORDER BY j; |
| ---- RESULTS |
| -1,2 |
| 5,1 |
| 10,1 |
| NULL,1 |
| ---- TYPES |
| INT,BIGINT |
| ==== |
| ---- QUERY |
| # Table test_complex_default has three columns: |
| # i (int), j STRUCT<f1: INT, f2: STRING>, ts_tz TIMESTAMPTZ |
| # Nested field j.f1 with an int initial-default in metadata. |
| # Selecting only column i should succeed because j is not materialized. |
| SELECT i FROM test_complex_default ORDER BY i; |
| ---- RESULTS |
| 1 |
| 2 |
| ---- TYPES |
| INT |
| ==== |
| ---- QUERY |
| # Nested field j.f2 does not have any initial-default in metadata. |
| # Selecting the nested field j.f2 specifically should succeed. |
| SELECT j.f2 FROM test_complex_default; |
| ---- RESULTS |
| 'hello' |
| 'NULL' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| SELECT f2 FROM (SELECT j.f1 as f1, j.f2 as f2 FROM test_complex_default) v; |
| ---- RESULTS |
| 'hello' |
| 'NULL' |
| ---- TYPES |
| STRING |
| ==== |
| ---- QUERY |
| # Negative-epoch TIMESTAMP initial-default (microseconds before Unix epoch) for id=1. |
| SELECT id, col_timestamp FROM iceberg_v3_all_types WHERE id = 1; |
| ---- RESULTS |
| 1,1969-12-31 23:59:59.123456000 |
| ---- TYPES |
| INT,TIMESTAMP |
| ==== |
| ---- QUERY |
| # Test all supported primitive types with initial-default values |
| # Row 1: Uses initial-default for all columns (id=1 inserted before schema evolution) |
| # Row 2: Explicit values override defaults (id=2 inserted after adding columns) |
| # |
| # Tests initial-default for: BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DECIMAL, |
| # STRING, DATE, TIMESTAMP (pre-epoch, epoch, ISO) |
| # col_timestamp_epoch / col_timestamp_iso: not present in older data files, |
| # so both rows use their initial-default from metadata. |
| SELECT id, col_boolean, col_int, col_bigint, col_float, col_double, col_decimal, |
| col_string, col_date, col_timestamp, col_timestamp_epoch, col_timestamp_iso |
| FROM iceberg_v3_all_types ORDER BY id; |
| ---- RESULTS |
| 1,false,-1,-1,1.5,2.5,123.45,'initial_default',2024-01-01,1969-12-31 23:59:59.123456000,1970-01-01 00:00:00,2024-01-15 10:30:45.123456000 |
| 2,true,42,9999999999,3.140000104904175,2.71828,999.99,'explicit',2024-12-25,2024-12-25 10:30:45.123456000,1970-01-01 00:00:00,2024-01-15 10:30:45.123456000 |
| ---- TYPES |
| INT,BOOLEAN,INT,BIGINT,FLOAT,DOUBLE,DECIMAL,STRING,DATE,TIMESTAMP,TIMESTAMP,TIMESTAMP |
| ==== |
| ---- QUERY |
| # Test write-default: INSERT omitting column j should use write-default value -999 |
| # Column j has initial-default=-1 and write-default=-999 in metadata |
| INSERT INTO iceberg_v3_default_value (i) VALUES (100); |
| SELECT i, j FROM iceberg_v3_default_value WHERE i = 100; |
| ---- RESULTS |
| 100,-999 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test write-default with explicit value |
| # Explicitly providing a value should override write-default |
| INSERT INTO iceberg_v3_default_value VALUES (101, 50); |
| SELECT i, j FROM iceberg_v3_default_value WHERE i = 101; |
| ---- RESULTS |
| 101,50 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test write-default with explicit NULL |
| # Explicitly specifying NULL should write NULL, not the write-default |
| INSERT INTO iceberg_v3_default_value VALUES (102, NULL); |
| SELECT i, j FROM iceberg_v3_default_value WHERE i = 102; |
| ---- RESULTS |
| 102,NULL |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Verify all inserted rows with defaults |
| SELECT i, j FROM iceberg_v3_default_value WHERE i >= 100 ORDER BY i; |
| ---- RESULTS |
| 100,-999 |
| 101,50 |
| 102,NULL |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test write-default on test_default_part: INSERT omitting partitioned column j should use write-default value -999 |
| # Column j has initial-default=-1 and write-default=-999 in metadata |
| # Since j is a partition column, the write-default also determines the partition |
| INSERT INTO test_default_part (i) VALUES (100); |
| SELECT i, j FROM test_default_part WHERE i = 100; |
| ---- RESULTS |
| 100,-999 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test write-default on test_default_part with explicit partition value |
| # Explicitly providing partition value should override write-default |
| INSERT INTO test_default_part VALUES (101, 50); |
| SELECT i, j FROM test_default_part WHERE i = 101; |
| ---- RESULTS |
| 101,50 |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Test write-default on test_default_part with explicit NULL partition |
| # Explicitly specifying NULL for partition column should write NULL, not the write-default |
| INSERT INTO test_default_part VALUES (102, NULL); |
| SELECT i, j FROM test_default_part WHERE i = 102; |
| ---- RESULTS |
| 102,NULL |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Verify all inserted rows with write-default on partitioned table |
| SELECT i, j FROM test_default_part WHERE i >= 100 ORDER BY i; |
| ---- RESULTS |
| 100,-999 |
| 101,50 |
| 102,NULL |
| ---- TYPES |
| INT,INT |
| ==== |
| ---- QUERY |
| # Supported write-default types: BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DECIMAL, STRING, DATE |
| # NOT supported: TIMESTAMP, BINARY |
| # |
| # write-default values: |
| # - col_boolean: true, col_int: -999, col_bigint: -999999999 |
| # - col_float: 9.99, col_double: 8.88, col_decimal: 999.99 |
| # - col_string: 'write_default', col_date: '2024-12-31' |
| INSERT INTO iceberg_v3_all_types (id, col_timestamp, col_binary) |
| VALUES (100, CAST('2024-01-01 00:00:00' AS TIMESTAMP), CAST(unhex('CAFEBABE') AS BINARY)); |
| SELECT id, col_boolean, col_int, col_bigint, col_float, col_double, col_decimal, col_string, col_date |
| FROM iceberg_v3_all_types WHERE id = 100; |
| ---- RESULTS |
| 100,true,-999,-999999999,9.989999771118164,8.88,999.99,'write_default',2024-12-31 |
| ---- TYPES |
| INT,BOOLEAN,INT,BIGINT,FLOAT,DOUBLE,DECIMAL,STRING,DATE |
| ==== |
| ---- QUERY |
| # Test write-default with explicit values - explicit values should override write-default |
| # Column list omits col_timestamp_epoch / col_timestamp_iso (initial-default only); those use NULL. |
| INSERT INTO iceberg_v3_all_types ( |
| id, col_boolean, col_int, col_bigint, col_float, col_double, col_decimal, col_string, col_date, |
| col_timestamp, col_binary) |
| VALUES |
| (101, true, 123, 456789, 1.23, 4.56, 78.90, 'custom', |
| CAST('2025-06-15' AS DATE), CAST('2025-06-15 12:34:56.123456' AS TIMESTAMP), CAST(unhex('ABCD1234') AS BINARY)); |
| SELECT id, col_boolean, col_int, col_bigint, col_float, col_double, col_decimal, col_string, col_date |
| FROM iceberg_v3_all_types WHERE id = 101; |
| ---- RESULTS |
| 101,true,123,456789,1.2300000190734863,4.56,78.90,'custom',2025-06-15 |
| ---- TYPES |
| INT,BOOLEAN,INT,BIGINT,FLOAT,DOUBLE,DECIMAL,STRING,DATE |
| ==== |
| ---- QUERY |
| # Test write-default with explicit NULL - NULL should override write-default |
| INSERT INTO iceberg_v3_all_types ( |
| id, col_boolean, col_int, col_bigint, col_float, col_double, col_decimal, col_string, col_date, |
| col_timestamp, col_binary) |
| VALUES |
| (102, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
| CAST('2024-01-01 00:00:00' AS TIMESTAMP), CAST(unhex('DEADBEEF') AS BINARY)); |
| SELECT id, col_boolean, col_int, col_bigint, col_float, col_double, col_decimal, col_string, col_date |
| FROM iceberg_v3_all_types WHERE id = 102; |
| ---- RESULTS |
| 102,NULL,NULL,NULL,NULL,NULL,NULL,'NULL',NULL |
| ---- TYPES |
| INT,BOOLEAN,INT,BIGINT,FLOAT,DOUBLE,DECIMAL,STRING,DATE |
| ==== |
| ---- QUERY |
| # Verify all rows with write-default insertions |
| SELECT id, col_boolean, col_int, col_bigint, col_float, col_double, col_decimal, col_string, col_date |
| FROM iceberg_v3_all_types |
| WHERE id >= 100 |
| ORDER BY id; |
| ---- RESULTS |
| 100,true,-999,-999999999,9.989999771118164,8.88,999.99,'write_default',2024-12-31 |
| 101,true,123,456789,1.2300000190734863,4.56,78.90,'custom',2025-06-15 |
| 102,NULL,NULL,NULL,NULL,NULL,NULL,'NULL',NULL |
| ---- TYPES |
| INT,BOOLEAN,INT,BIGINT,FLOAT,DOUBLE,DECIMAL,STRING,DATE |
| ==== |