blob: 0ef5f0946b84e88a21ec546e63fd985e6918f3b9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.util.List;
/**
* A scan task for deletes generated by removing a data file from the table.
* <p>
* Note that all historical delete files added earlier must be applied while reading the data file.
* This is required to output only those data records that were live when the data file was removed.
* <p>
* Suppose snapshot S1 contains data files F1, F2, F3. Then snapshot S2 adds a position delete file, D1,
* that deletes records from F2 and snapshot S3 removes F2 entirely. A scan for changes generated by S3
* should include the following task:
* <ul>
* <li>DeletedDataFileScanTask(file=F2, existing-deletes=[D1], snapshot=S3)</li>
* </ul>
* <p>
* Readers consuming these tasks should produce deleted records with metadata like change ordinal and
* commit snapshot ID.
*/
public interface DeletedDataFileScanTask extends ChangelogScanTask, ContentScanTask<DataFile> {
/**
* A list of previously added {@link DeleteFile delete files} to apply when reading the data file in this task.
*
* @return a list of delete files to apply
*/
List<DeleteFile> existingDeletes();
@Override
default ChangelogOperation operation() {
return ChangelogOperation.DELETE;
}
}