blob: cd8ebfac8b69e09b1763eb90c5501ce83bfdd0e2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.util.List;
/**
* A scan task for deletes generated by adding delete files to the table.
* <p>
* Suppose snapshot S1 contains data files F1, F2, F3. Then snapshot S2 adds a position delete file, D1,
* that deletes records from F2 and snapshot S3 adds an equality delete file, D2, that removes records
* from F1, F2, F3. A scan for changes from S2 to S3 (inclusive) should include the following tasks:
* <ul>
* <li>DeletedRowsScanTask(file=F2, added-deletes=[D1], existing-deletes=[], snapshot=S2)</li>
* <li>DeletedRowsScanTask(file=F1, added-deletes=[D2], existing-deletes=[], snapshot=S3)</li>
* <li>DeletedRowsScanTask(file=F2, added-deletes=[D2], existing-deletes=[D1], snapshot=S3)</li>
* <li>DeletedRowsScanTask(file=F3, added-deletes=[D2], existing-deletes=[], snapshot=S3)</li>
* </ul>
* <p>
* Readers consuming these tasks should produce deleted records with metadata like change ordinal and
* commit snapshot ID.
*/
public interface DeletedRowsScanTask extends ChangelogScanTask, ContentScanTask<DataFile> {
/**
* A list of added {@link DeleteFile delete files} that apply to the task's data file.
* Records removed by these delete files should appear as deletes in the changelog.
*
* @return a list of added delete files
*/
List<DeleteFile> addedDeletes();
/**
* A list of {@link DeleteFile delete files} that existed before and must be applied prior to
* determining which records are deleted by delete files in {@link #addedDeletes()}.
* Records removed by these delete files should not appear in the changelog.
*
* @return a list of existing delete files
*/
List<DeleteFile> existingDeletes();
@Override
default ChangelogOperation operation() {
return ChangelogOperation.DELETE;
}
}