blob: 60e79aad91cec50369fc3edf3b16fc44dc8660dc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assumptions.assumeThat;
import java.util.Arrays;
import java.util.List;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.types.TypeUtil;
import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.extension.ExtendWith;
@ExtendWith(ParameterizedTestExtension.class)
public class TestEntriesMetadataTable extends TestBase {
@Parameters(name = "formatVersion = {0}")
protected static List<Object> parameters() {
return Arrays.asList(1, 2);
}
@TestTemplate
public void testEntriesTable() {
table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();
Table entriesTable = new ManifestEntriesTable(table);
Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
Schema expectedSchema =
TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));
assertThat(entriesTable.schema().asStruct())
.as("A tableScan.select() should prune the schema")
.isEqualTo(expectedSchema.asStruct());
}
@TestTemplate
public void testEntriesTableScan() {
table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();
Table entriesTable = new ManifestEntriesTable(table);
TableScan scan = entriesTable.newScan();
Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
Schema expectedSchema =
TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));
assertThat(scan.schema().asStruct())
.as("A tableScan.select() should prune the schema")
.isEqualTo(expectedSchema.asStruct());
FileScanTask file = Iterables.getOnlyElement(scan.planFiles());
assertThat(file.file().path())
.as("Data file should be the table's manifest")
.isEqualTo(table.currentSnapshot().allManifests(table.io()).get(0).path());
assertThat(file.file().recordCount()).as("Should contain 2 data file records").isEqualTo(2);
}
@TestTemplate
public void testSplitPlanningWithMetadataSplitSizeProperty() {
table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();
table.newAppend().appendFile(FILE_C).appendFile(FILE_D).commit();
// set the split size to a large value so that both manifests are in 1 split
table
.updateProperties()
.set(TableProperties.METADATA_SPLIT_SIZE, String.valueOf(128 * 1024 * 1024))
.commit();
Table entriesTable = new ManifestEntriesTable(table);
assertThat(entriesTable.newScan().planTasks()).hasSize(1);
// set the split size to a small value so that manifests end up in different splits
table.updateProperties().set(TableProperties.METADATA_SPLIT_SIZE, String.valueOf(1)).commit();
assertThat(entriesTable.newScan().planTasks()).hasSize(2);
// override the table property with a large value so that both manifests are in 1 split
TableScan scan =
entriesTable
.newScan()
.option(TableProperties.SPLIT_SIZE, String.valueOf(128 * 1024 * 1024));
assertThat(scan.planTasks()).hasSize(1);
}
@TestTemplate
public void testSplitPlanningWithDefaultMetadataSplitSize() {
table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();
int splitSize =
(int) TableProperties.METADATA_SPLIT_SIZE_DEFAULT; // default split size is 32 MB
Table entriesTable = new ManifestEntriesTable(table);
assertThat(entriesTable.currentSnapshot().allManifests(table.io())).hasSize(1);
int expectedSplits =
((int) entriesTable.currentSnapshot().allManifests(table.io()).get(0).length()
+ splitSize
- 1)
/ splitSize;
TableScan scan = entriesTable.newScan();
assertThat(scan.planTasks()).hasSize(expectedSplits);
}
@TestTemplate
public void testEntriesTableWithDeleteManifests() {
assumeThat(formatVersion).as("Only V2 Tables Support Deletes").isGreaterThanOrEqualTo(2);
table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();
table.newRowDelta().addDeletes(FILE_A_DELETES).commit();
Table entriesTable = new ManifestEntriesTable(table);
TableScan scan = entriesTable.newScan();
Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
Schema expectedSchema =
TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));
assertThat(scan.schema().asStruct())
.as("A tableScan.select() should prune the schema")
.isEqualTo(expectedSchema.asStruct());
List<FileScanTask> files = ImmutableList.copyOf(scan.planFiles());
assertThat(files.get(0).file().path())
.as("Data file should be the table's manifest")
.isEqualTo(table.currentSnapshot().dataManifests(table.io()).get(0).path());
assertThat(files.get(0).file().recordCount())
.as("Should contain 2 data file records")
.isEqualTo(2);
assertThat(files.get(1).file().path())
.as("Delete file should be in the table manifest")
.isEqualTo(table.currentSnapshot().deleteManifests(table.io()).get(0).path());
assertThat(files.get(1).file().recordCount())
.as("Should contain 1 delete file record")
.isEqualTo(1);
}
}