blob: 6512ea114f2349972398b3d53cbcc315f1bbe9ef [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import org.apache.iceberg.ManifestEntry.Status;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import static org.apache.iceberg.expressions.Expressions.and;
import static org.apache.iceberg.expressions.Expressions.equal;
import static org.apache.iceberg.expressions.Expressions.lessThan;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
@RunWith(Parameterized.class)
public class TestOverwrite extends TableTestBase {
private static final Schema DATE_SCHEMA = new Schema(
required(1, "id", Types.LongType.get()),
optional(2, "data", Types.StringType.get()),
required(3, "date", Types.StringType.get()));
private static final PartitionSpec PARTITION_BY_DATE = PartitionSpec
.builderFor(DATE_SCHEMA)
.identity("date")
.build();
private static final String TABLE_NAME = "overwrite_table";
private static final DataFile FILE_0_TO_4 = DataFiles.builder(PARTITION_BY_DATE)
.withPath("/path/to/data-1.parquet")
.withFileSizeInBytes(0)
.withPartitionPath("date=2018-06-08")
.withMetrics(new Metrics(5L,
null, // no column sizes
ImmutableMap.of(1, 5L, 2, 3L), // value count
ImmutableMap.of(1, 0L, 2, 2L), // null count
ImmutableMap.of(1, longToBuffer(0L)), // lower bounds
ImmutableMap.of(1, longToBuffer(4L)) // upper bounds
))
.build();
private static final DataFile FILE_5_TO_9 = DataFiles.builder(PARTITION_BY_DATE)
.withPath("/path/to/data-2.parquet")
.withFileSizeInBytes(0)
.withPartitionPath("date=2018-06-09")
.withMetrics(new Metrics(5L,
null, // no column sizes
ImmutableMap.of(1, 5L, 2, 3L), // value count
ImmutableMap.of(1, 0L, 2, 2L), // null count
ImmutableMap.of(1, longToBuffer(5L)), // lower bounds
ImmutableMap.of(1, longToBuffer(9L)) // upper bounds
))
.build();
private static final DataFile FILE_10_TO_14 = DataFiles.builder(PARTITION_BY_DATE)
.withPath("/path/to/data-2.parquet")
.withFileSizeInBytes(0)
.withPartitionPath("date=2018-06-09")
.withMetrics(new Metrics(5L,
null, // no column sizes
ImmutableMap.of(1, 5L, 2, 3L), // value count
ImmutableMap.of(1, 0L, 2, 2L), // null count
ImmutableMap.of(1, longToBuffer(5L)), // lower bounds
ImmutableMap.of(1, longToBuffer(9L)) // upper bounds
))
.build();
@Parameterized.Parameters(name = "formatVersion = {0}")
public static Object[] parameters() {
return new Object[] { 1, 2 };
}
public TestOverwrite(int formatVersion) {
super(formatVersion);
}
private static ByteBuffer longToBuffer(long value) {
return ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(0, value);
}
private Table table = null;
@Before
public void createTestTable() throws IOException {
File tableDir = temp.newFolder();
Assert.assertTrue(tableDir.delete());
this.table = TestTables.create(tableDir, TABLE_NAME, DATE_SCHEMA, PARTITION_BY_DATE, formatVersion);
table.newAppend()
.appendFile(FILE_0_TO_4)
.appendFile(FILE_5_TO_9)
.commit();
}
@Test
public void testOverwriteWithoutAppend() {
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId = base.currentSnapshot().snapshotId();
table.newOverwrite()
.overwriteByRowFilter(equal("date", "2018-06-08"))
.commit();
long overwriteId = table.currentSnapshot().snapshotId();
Assert.assertNotEquals("Should create a new snapshot", baseId, overwriteId);
Assert.assertEquals("Table should have one manifest",
1, table.currentSnapshot().allManifests().size());
validateManifestEntries(table.currentSnapshot().allManifests().get(0),
ids(overwriteId, baseId),
files(FILE_0_TO_4, FILE_5_TO_9),
statuses(Status.DELETED, Status.EXISTING));
}
@Test
public void testOverwriteFailsDelete() {
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId = base.currentSnapshot().snapshotId();
OverwriteFiles overwrite = table.newOverwrite()
.overwriteByRowFilter(and(equal("date", "2018-06-09"), lessThan("id", 9)));
AssertHelpers.assertThrows("Should reject commit with file not matching delete expression",
ValidationException.class, "Cannot delete file where some, but not all, rows match filter",
overwrite::commit);
Assert.assertEquals("Should not create a new snapshot",
baseId, table.currentSnapshot().snapshotId());
}
@Test
public void testOverwriteWithAppendOutsideOfDelete() {
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId = base.currentSnapshot().snapshotId();
table.newOverwrite()
.overwriteByRowFilter(equal("date", "2018-06-08"))
.addFile(FILE_10_TO_14) // in 2018-06-09, NOT in 2018-06-08
.commit();
long overwriteId = table.currentSnapshot().snapshotId();
Assert.assertNotEquals("Should create a new snapshot", baseId, overwriteId);
Assert.assertEquals("Table should have 2 manifests",
2, table.currentSnapshot().allManifests().size());
// manifest is not merged because it is less than the minimum
validateManifestEntries(table.currentSnapshot().allManifests().get(0),
ids(overwriteId),
files(FILE_10_TO_14),
statuses(Status.ADDED));
validateManifestEntries(table.currentSnapshot().allManifests().get(1),
ids(overwriteId, baseId),
files(FILE_0_TO_4, FILE_5_TO_9),
statuses(Status.DELETED, Status.EXISTING));
}
@Test
public void testOverwriteWithMergedAppendOutsideOfDelete() {
// ensure the overwrite results in a merge
table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit();
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId = base.currentSnapshot().snapshotId();
table.newOverwrite()
.overwriteByRowFilter(equal("date", "2018-06-08"))
.addFile(FILE_10_TO_14) // in 2018-06-09, NOT in 2018-06-08
.commit();
long overwriteId = table.currentSnapshot().snapshotId();
Assert.assertNotEquals("Should create a new snapshot", baseId, overwriteId);
Assert.assertEquals("Table should have one merged manifest",
1, table.currentSnapshot().allManifests().size());
validateManifestEntries(table.currentSnapshot().allManifests().get(0),
ids(overwriteId, overwriteId, baseId),
files(FILE_10_TO_14, FILE_0_TO_4, FILE_5_TO_9),
statuses(Status.ADDED, Status.DELETED, Status.EXISTING));
}
@Test
public void testValidatedOverwriteWithAppendOutsideOfDelete() {
// ensure the overwrite results in a merge
table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit();
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId = base.currentSnapshot().snapshotId();
OverwriteFiles overwrite = table.newOverwrite()
.overwriteByRowFilter(equal("date", "2018-06-08"))
.addFile(FILE_10_TO_14) // in 2018-06-09, NOT in 2018-06-08
.validateAddedFilesMatchOverwriteFilter();
AssertHelpers.assertThrows("Should reject commit with file not matching delete expression",
ValidationException.class, "Cannot append file with rows that do not match filter",
overwrite::commit);
Assert.assertEquals("Should not create a new snapshot",
baseId, table.currentSnapshot().snapshotId());
}
@Test
public void testValidatedOverwriteWithAppendOutsideOfDeleteMetrics() {
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId = base.currentSnapshot().snapshotId();
OverwriteFiles overwrite = table.newOverwrite()
.overwriteByRowFilter(and(equal("date", "2018-06-09"), lessThan("id", 10)))
.addFile(FILE_10_TO_14) // in 2018-06-09 matches, but IDs are outside range
.validateAddedFilesMatchOverwriteFilter();
AssertHelpers.assertThrows("Should reject commit with file not matching delete expression",
ValidationException.class, "Cannot append file with rows that do not match filter",
overwrite::commit);
Assert.assertEquals("Should not create a new snapshot",
baseId, table.currentSnapshot().snapshotId());
}
@Test
public void testValidatedOverwriteWithAppendSuccess() {
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId = base.currentSnapshot().snapshotId();
OverwriteFiles overwrite = table.newOverwrite()
.overwriteByRowFilter(and(equal("date", "2018-06-09"), lessThan("id", 20)))
.addFile(FILE_10_TO_14) // in 2018-06-09 matches and IDs are inside range
.validateAddedFilesMatchOverwriteFilter();
AssertHelpers.assertThrows("Should reject commit with file not matching delete expression",
ValidationException.class, "Cannot append file with rows that do not match filter",
overwrite::commit);
Assert.assertEquals("Should not create a new snapshot",
baseId, table.currentSnapshot().snapshotId());
}
}