blob: f052fac23e874a063bf9b548d892f084e499ccef [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import org.apache.iceberg.ManifestEntry.Status;
import org.apache.iceberg.expressions.Expressions;
import org.junit.Assert;
import org.junit.Test;
public class TestRowDelta extends V2TableTestBase {
@Test
public void testAddDeleteFile() {
table.newRowDelta()
.addRows(FILE_A)
.addDeletes(FILE_A_DELETES)
.addDeletes(FILE_B_DELETES)
.commit();
Snapshot snap = table.currentSnapshot();
Assert.assertEquals("Commit should produce sequence number 1", 1, snap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Delta commit should use operation 'overwrite'", DataOperations.OVERWRITE, snap.operation());
Assert.assertEquals("Should produce 1 data manifest", 1, snap.dataManifests().size());
validateManifest(
snap.dataManifests().get(0),
seqs(1),
ids(snap.snapshotId()),
files(FILE_A),
statuses(Status.ADDED));
Assert.assertEquals("Should produce 1 delete manifest", 1, snap.deleteManifests().size());
validateDeleteManifest(
snap.deleteManifests().get(0),
seqs(1, 1),
ids(snap.snapshotId(), snap.snapshotId()),
files(FILE_A_DELETES, FILE_B_DELETES),
statuses(Status.ADDED, Status.ADDED));
}
@Test
public void testOverwriteWithDeleteFile() {
table.newRowDelta()
.addRows(FILE_A)
.addDeletes(FILE_A_DELETES)
.addDeletes(FILE_B_DELETES)
.commit();
long deltaSnapshotId = table.currentSnapshot().snapshotId();
Assert.assertEquals("Commit should produce sequence number 1", 1, table.currentSnapshot().sequenceNumber());
Assert.assertEquals("Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber());
// overwriting by a filter will also remove delete files that match because all matching data files are removed.
table.newOverwrite()
.overwriteByRowFilter(Expressions.equal(Expressions.bucket("data", 16), 0))
.commit();
Snapshot snap = table.currentSnapshot();
Assert.assertEquals("Commit should produce sequence number 2", 2, snap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 2", 2, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Should produce 1 data manifest", 1, snap.dataManifests().size());
validateManifest(
snap.dataManifests().get(0),
seqs(2),
ids(snap.snapshotId()),
files(FILE_A),
statuses(Status.DELETED));
Assert.assertEquals("Should produce 1 delete manifest", 1, snap.deleteManifests().size());
validateDeleteManifest(
snap.deleteManifests().get(0),
seqs(2, 1),
ids(snap.snapshotId(), deltaSnapshotId),
files(FILE_A_DELETES, FILE_B_DELETES),
statuses(Status.DELETED, Status.EXISTING));
}
@Test
public void testReplacePartitionsWithDeleteFile() {
table.newRowDelta()
.addRows(FILE_A)
.addDeletes(FILE_A_DELETES)
.addDeletes(FILE_B_DELETES)
.commit();
long deltaSnapshotId = table.currentSnapshot().snapshotId();
Assert.assertEquals("Commit should produce sequence number 1", 1, table.currentSnapshot().sequenceNumber());
Assert.assertEquals("Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber());
// overwriting the partition will also remove delete files that match because all matching data files are removed.
table.newReplacePartitions()
.addFile(FILE_A2)
.commit();
Snapshot snap = table.currentSnapshot();
Assert.assertEquals("Commit should produce sequence number 2", 2, snap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 2", 2, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Should produce 2 data manifests", 2, snap.dataManifests().size());
int deleteManifestPos = snap.dataManifests().get(0).deletedFilesCount() > 0 ? 0 : 1;
validateManifest(
snap.dataManifests().get(deleteManifestPos),
seqs(2),
ids(snap.snapshotId()),
files(FILE_A),
statuses(Status.DELETED));
int appendManifestPos = deleteManifestPos == 0 ? 1 : 0;
validateManifest(
snap.dataManifests().get(appendManifestPos),
seqs(2),
ids(snap.snapshotId()),
files(FILE_A2),
statuses(Status.ADDED));
Assert.assertEquals("Should produce 1 delete manifest", 1, snap.deleteManifests().size());
validateDeleteManifest(
snap.deleteManifests().get(0),
seqs(2, 1),
ids(snap.snapshotId(), deltaSnapshotId),
files(FILE_A_DELETES, FILE_B_DELETES),
statuses(Status.DELETED, Status.EXISTING));
}
@Test
public void testDeleteByExpressionWithDeleteFile() {
table.newRowDelta()
.addRows(FILE_A)
.addDeletes(FILE_A_DELETES)
.addDeletes(FILE_B_DELETES)
.commit();
long deltaSnapshotId = table.currentSnapshot().snapshotId();
Assert.assertEquals("Commit should produce sequence number 1", 1, table.currentSnapshot().sequenceNumber());
Assert.assertEquals("Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber());
// deleting with a filter will also remove delete files that match because all matching data files are removed.
table.newDelete()
.deleteFromRowFilter(Expressions.alwaysTrue())
.commit();
Snapshot snap = table.currentSnapshot();
Assert.assertEquals("Commit should produce sequence number 2", 2, snap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 2", 2, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Should produce 1 data manifest", 1, snap.dataManifests().size());
validateManifest(
snap.dataManifests().get(0),
seqs(2),
ids(snap.snapshotId()),
files(FILE_A),
statuses(Status.DELETED));
Assert.assertEquals("Should produce 1 delete manifest", 1, snap.deleteManifests().size());
validateDeleteManifest(
snap.deleteManifests().get(0),
seqs(2, 2),
ids(snap.snapshotId(), snap.snapshotId()),
files(FILE_A_DELETES, FILE_B_DELETES),
statuses(Status.DELETED, Status.DELETED));
}
@Test
public void testDeleteDataFileWithDeleteFile() {
table.newRowDelta()
.addRows(FILE_A)
.addDeletes(FILE_A_DELETES)
.commit();
long deltaSnapshotId = table.currentSnapshot().snapshotId();
Assert.assertEquals("Commit should produce sequence number 1", 1, table.currentSnapshot().sequenceNumber());
Assert.assertEquals("Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber());
// deleting a specific data file will not affect a delete file
table.newDelete()
.deleteFile(FILE_A)
.commit();
Snapshot deleteSnap = table.currentSnapshot();
Assert.assertEquals("Commit should produce sequence number 2", 2, deleteSnap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 2", 2, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Should produce 1 data manifest", 1, deleteSnap.dataManifests().size());
validateManifest(
deleteSnap.dataManifests().get(0),
seqs(2),
ids(deleteSnap.snapshotId()),
files(FILE_A),
statuses(Status.DELETED));
Assert.assertEquals("Should produce 1 delete manifest", 1, deleteSnap.deleteManifests().size());
validateDeleteManifest(
deleteSnap.deleteManifests().get(0),
seqs(1),
ids(deltaSnapshotId),
files(FILE_A_DELETES),
statuses(Status.ADDED));
// the manifest that removed FILE_A will be dropped next commit, causing the min sequence number of all data files
// to be 2, the largest known sequence number. this will cause FILE_A_DELETES to be removed because it is too old
// to apply to any data files.
table.newDelete()
.deleteFile("no-such-file")
.commit();
Snapshot nextSnap = table.currentSnapshot();
Assert.assertEquals("Append should produce sequence number 3", 3, nextSnap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 3", 3, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Should have 0 data manifests", 0, nextSnap.dataManifests().size());
Assert.assertEquals("Should produce 1 delete manifest", 1, nextSnap.deleteManifests().size());
validateDeleteManifest(
nextSnap.deleteManifests().get(0),
seqs(3),
ids(nextSnap.snapshotId()),
files(FILE_A_DELETES),
statuses(Status.DELETED));
}
@Test
public void testFastAppendDoesNotRemoveStaleDeleteFiles() {
table.newRowDelta()
.addRows(FILE_A)
.addDeletes(FILE_A_DELETES)
.commit();
long deltaSnapshotId = table.currentSnapshot().snapshotId();
Assert.assertEquals("Commit should produce sequence number 1", 1, table.currentSnapshot().sequenceNumber());
Assert.assertEquals("Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber());
// deleting a specific data file will not affect a delete file
table.newDelete()
.deleteFile(FILE_A)
.commit();
Snapshot deleteSnap = table.currentSnapshot();
Assert.assertEquals("Commit should produce sequence number 2", 2, deleteSnap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 2", 2, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Should produce 1 data manifest", 1, deleteSnap.dataManifests().size());
validateManifest(
deleteSnap.dataManifests().get(0),
seqs(2),
ids(deleteSnap.snapshotId()),
files(FILE_A),
statuses(Status.DELETED));
Assert.assertEquals("Should produce 1 delete manifest", 1, deleteSnap.deleteManifests().size());
validateDeleteManifest(
deleteSnap.deleteManifests().get(0),
seqs(1),
ids(deltaSnapshotId),
files(FILE_A_DELETES),
statuses(Status.ADDED));
// the manifest that removed FILE_A will be dropped next merging commit, but FastAppend will not remove it
table.newFastAppend()
.appendFile(FILE_B)
.commit();
Snapshot nextSnap = table.currentSnapshot();
Assert.assertEquals("Append should produce sequence number 3", 3, nextSnap.sequenceNumber());
Assert.assertEquals("Last sequence number should be 3", 3, table.ops().current().lastSequenceNumber());
Assert.assertEquals("Should have 2 data manifests", 2, nextSnap.dataManifests().size());
int deleteManifestPos = nextSnap.dataManifests().get(0).deletedFilesCount() > 0 ? 0 : 1;
validateManifest(
nextSnap.dataManifests().get(deleteManifestPos),
seqs(2),
ids(deleteSnap.snapshotId()),
files(FILE_A),
statuses(Status.DELETED));
int appendManifestPos = deleteManifestPos == 0 ? 1 : 0;
validateManifest(
nextSnap.dataManifests().get(appendManifestPos),
seqs(3),
ids(nextSnap.snapshotId()),
files(FILE_B),
statuses(Status.ADDED));
Assert.assertEquals("Should produce 1 delete manifest", 1, nextSnap.deleteManifests().size());
validateDeleteManifest(
nextSnap.deleteManifests().get(0),
seqs(1),
ids(deltaSnapshotId),
files(FILE_A_DELETES),
statuses(Status.ADDED));
}
}