blob: 1c0fc76534a2b1acfae4f70f983f7067c6e50e8a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import static org.apache.iceberg.expressions.Expressions.bucket;
import static org.apache.iceberg.expressions.Expressions.day;
import static org.apache.iceberg.expressions.Expressions.hour;
import static org.apache.iceberg.expressions.Expressions.month;
import static org.apache.iceberg.expressions.Expressions.ref;
import static org.apache.iceberg.expressions.Expressions.truncate;
import static org.apache.iceberg.expressions.Expressions.year;
@RunWith(Parameterized.class)
public class TestUpdatePartitionSpec extends TableTestBase {
private static final Schema SCHEMA = new Schema(
Types.NestedField.required(1, "id", Types.LongType.get()),
Types.NestedField.required(2, "ts", Types.TimestampType.withZone()),
Types.NestedField.required(3, "category", Types.StringType.get()),
Types.NestedField.optional(4, "data", Types.StringType.get())
);
private static final PartitionSpec UNPARTITIONED = PartitionSpec.builderFor(SCHEMA).build();
private static final PartitionSpec PARTITIONED = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.bucket("id", 16, "shard")
.build();
@Parameterized.Parameters(name = "formatVersion = {0}")
public static Object[] parameters() {
return new Object[] { 1, 2 };
}
public TestUpdatePartitionSpec(int formatVersion) {
super(formatVersion);
}
@Test
public void testAddIdentityByName() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField("category")
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddIdentityByTerm() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(ref("category"))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddYear() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(year("ts"))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.year("ts")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddMonth() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(month("ts"))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.month("ts")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddDay() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(day("ts"))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.day("ts")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddHour() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(hour("ts"))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.hour("ts")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddBucket() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(bucket("id", 16))
.apply();
// added fields have different default names to avoid conflicts
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.bucket("id", 16, "id_bucket_16")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddTruncate() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(truncate("data", 4))
.apply();
// added fields have different default names to avoid conflicts
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.truncate("data", 4, "data_trunc_4")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddNamedPartition() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField("shard", bucket("id", 16))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.bucket("id", 16, "shard")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddToExisting() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField(truncate("data", 4))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.bucket("id", 16, "shard")
.truncate("data", 4, "data_trunc_4")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testMultipleAdds() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField("category")
.addField(day("ts"))
.addField("shard", bucket("id", 16))
.addField("prefix", truncate("data", 4))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.bucket("id", 16, "shard")
.truncate("data", 4, "prefix")
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testAddHourToDay() {
// multiple partitions for the same source with different time granularity is not allowed by the builder, but is
// allowed when updating a spec so that existing columns in metadata continue to work.
PartitionSpec byDay = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(day("ts"))
.apply();
PartitionSpec byHour = new BaseUpdatePartitionSpec(formatVersion, byDay)
.addField(hour("ts"))
.apply();
Assert.assertEquals("Should have a day and an hour time field",
ImmutableList.of(
new PartitionField(2, 1000, "ts_day", Transforms.day(Types.TimestampType.withZone())),
new PartitionField(2, 1001, "ts_hour", Transforms.hour(Types.TimestampType.withZone()))),
byHour.fields());
}
@Test
public void testAddMultipleBuckets() {
PartitionSpec bucket16 = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(bucket("id", 16))
.apply();
PartitionSpec bucket8 = new BaseUpdatePartitionSpec(formatVersion, bucket16)
.addField(bucket("id", 8))
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.bucket("id", 16, "id_bucket_16")
.bucket("id", 8, "id_bucket_8")
.build();
Assert.assertEquals("Should have a day and an hour time field", expected, bucket8);
}
@Test
public void testRemoveIdentityByName() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField("category")
.apply();
PartitionSpec v1Expected = PartitionSpec.builderFor(SCHEMA)
.alwaysNull("category", "category")
.day("ts")
.bucket("id", 16, "shard")
.build();
V1Assert.assertEquals("Should match expected spec", v1Expected, updated);
PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA)
.add(id("ts"), 1001, "ts_day", Transforms.day(Types.TimestampType.withZone()))
.add(id("id"), 1002, "shard", Transforms.bucket(Types.LongType.get(), 16))
.build();
V2Assert.assertEquals("Should match expected spec", v2Expected, updated);
}
@Test
public void testRemoveBucketByName() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField("shard")
.apply();
PartitionSpec v1Expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.alwaysNull("id", "shard")
.build();
V1Assert.assertEquals("Should match expected spec", v1Expected, updated);
PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA)
.add(id("category"), 1000, "category", Transforms.identity(Types.StringType.get()))
.add(id("ts"), 1001, "ts_day", Transforms.day(Types.TimestampType.withZone()))
.build();
V2Assert.assertEquals("Should match expected spec", v2Expected, updated);
}
@Test
public void testRemoveIdentityByEquivalent() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField(ref("category"))
.apply();
PartitionSpec v1Expected = PartitionSpec.builderFor(SCHEMA)
.alwaysNull("category", "category")
.day("ts")
.bucket("id", 16, "shard")
.build();
V1Assert.assertEquals("Should match expected spec", v1Expected, updated);
PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA)
.add(id("ts"), 1001, "ts_day", Transforms.day(Types.TimestampType.withZone()))
.add(id("id"), 1002, "shard", Transforms.bucket(Types.LongType.get(), 16))
.build();
V2Assert.assertEquals("Should match expected spec", v2Expected, updated);
}
@Test
public void testRemoveDayByEquivalent() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField(day("ts"))
.apply();
PartitionSpec v1Expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.alwaysNull("ts", "ts_day")
.bucket("id", 16, "shard")
.build();
V1Assert.assertEquals("Should match expected spec", v1Expected, updated);
PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA)
.add(id("category"), 1000, "category", Transforms.identity(Types.StringType.get()))
.add(id("id"), 1002, "shard", Transforms.bucket(Types.LongType.get(), 16))
.build();
V2Assert.assertEquals("Should match expected spec", v2Expected, updated);
}
@Test
public void testRemoveBucketByEquivalent() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField(bucket("id", 16))
.apply();
PartitionSpec v1Expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.alwaysNull("id", "shard")
.build();
V1Assert.assertEquals("Should match expected spec", v1Expected, updated);
PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.build();
V2Assert.assertEquals("Should match expected spec", v2Expected, updated);
}
@Test
public void testRename() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.renameField("shard", "id_bucket") // rename back to default
.apply();
PartitionSpec expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.bucket("id", 16)
.build();
Assert.assertEquals("Should match expected spec", expected, updated);
}
@Test
public void testMultipleChanges() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.renameField("shard", "id_bucket") // rename back to default
.removeField(day("ts"))
.addField("prefix", truncate("data", 4))
.apply();
PartitionSpec v1Expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.alwaysNull("ts", "ts_day")
.bucket("id", 16)
.truncate("data", 4, "prefix")
.build();
V1Assert.assertEquals("Should match expected spec", v1Expected, updated);
PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA)
.add(id("category"), 1000, "category", Transforms.identity(Types.StringType.get()))
.add(id("id"), 1002, "id_bucket", Transforms.bucket(Types.LongType.get(), 16))
.add(id("data"), 1003, "prefix", Transforms.truncate(Types.StringType.get(), 4))
.build();
V2Assert.assertEquals("Should match expected spec", v2Expected, updated);
}
@Test
public void testAddDeletedName() {
PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField(bucket("id", 16))
.apply();
PartitionSpec v1Expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.alwaysNull("id", "shard")
.build();
V1Assert.assertEquals("Should match expected spec", v1Expected, updated);
PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA)
.identity("category")
.day("ts")
.build();
V2Assert.assertEquals("Should match expected spec", v2Expected, updated);
}
@Test
public void testRemoveNewlyAddedFieldByName() {
AssertHelpers.assertThrows("Should fail trying to remove unknown field",
IllegalArgumentException.class, "Cannot delete newly added field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField("prefix", truncate("data", 4))
.removeField("prefix")
);
}
@Test
public void testRemoveNewlyAddedFieldByTransform() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot delete newly added field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField("prefix", truncate("data", 4))
.removeField(truncate("data", 4)));
}
@Test
public void testAddAlreadyAddedFieldByTransform() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add duplicate partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField("prefix", truncate("data", 4))
.addField(truncate("data", 4)));
}
@Test
public void testAddAlreadyAddedFieldByName() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add duplicate partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField("prefix", truncate("data", 4))
.addField("prefix", truncate("data", 6)));
}
@Test
public void testAddRedundantTimePartition() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add redundant partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED)
.addField(day("ts"))
.addField(hour("ts"))); // conflicts with hour
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add redundant partition",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField(hour("ts")) // does not conflict with day because day already exists
.addField(month("ts"))); // conflicts with hour
}
@Test
public void testAddDeletedField() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add duplicate partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField("shard")
.addField(bucket("id", 16))); // duplicates shard
}
@Test
public void testAddDuplicateByName() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add duplicate partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField("category"));
}
@Test
public void testAddDuplicateByRef() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add duplicate partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField(ref("category")));
}
@Test
public void testAddDuplicateTransform() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add duplicate partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField(bucket("id", 16)));
}
@Test
public void testAddNamedDuplicate() {
AssertHelpers.assertThrows("Should fail adding a duplicate field",
IllegalArgumentException.class, "Cannot add duplicate partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField("b16", bucket("id", 16)));
}
@Test
public void testRemoveUnknownFieldByName() {
AssertHelpers.assertThrows("Should fail trying to remove unknown field",
IllegalArgumentException.class, "Cannot find partition field to remove",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).removeField("moon")
);
}
@Test
public void testRemoveUnknownFieldByEquivalent() {
AssertHelpers.assertThrows("Should fail trying to remove unknown field",
IllegalArgumentException.class, "Cannot find partition field to remove",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).removeField(hour("ts")) // day(ts) exists
);
}
@Test
public void testRenameUnknownField() {
AssertHelpers.assertThrows("Should fail trying to rename an unknown field",
IllegalArgumentException.class, "Cannot find partition field to rename",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).renameField("shake", "seal")
);
}
@Test
public void testRenameAfterAdd() {
AssertHelpers.assertThrows("Should fail trying to rename an added field",
IllegalArgumentException.class, "Cannot rename newly added partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.addField("data_trunc", truncate("data", 4))
.renameField("data_trunc", "prefix")
);
}
@Test
public void testDeleteAndRename() {
AssertHelpers.assertThrows("Should fail trying to rename a deleted field",
IllegalArgumentException.class, "Cannot rename and delete partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.renameField("shard", "id_bucket")
.removeField(bucket("id", 16)));
}
@Test
public void testRenameAndDelete() {
AssertHelpers.assertThrows("Should fail trying to delete a renamed field",
IllegalArgumentException.class, "Cannot delete and rename partition field",
() -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED)
.removeField(bucket("id", 16))
.renameField("shard", "id_bucket"));
}
private static int id(String name) {
return SCHEMA.findField(name).fieldId();
}
}