core/src/test/java/org/apache/iceberg/TestEntriesMetadataTable.java - iceberg - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.iceberg;

 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assumptions.assumeThat;

 import java.util.Arrays;
 import java.util.List;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.types.TypeUtil;
 import org.junit.jupiter.api.TestTemplate;
 import org.junit.jupiter.api.extension.ExtendWith;

 @ExtendWith(ParameterizedTestExtension.class)
 public class TestEntriesMetadataTable extends TestBase {

   @Parameters(name = "formatVersion = {0}")
   protected static List<Object> parameters() {
     return Arrays.asList(1, 2);
   }

   @TestTemplate
   public void testEntriesTable() {
     table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

     Table entriesTable = new ManifestEntriesTable(table);

     Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
     Schema expectedSchema =
         TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));

     assertThat(entriesTable.schema().asStruct())
         .as("A tableScan.select() should prune the schema")
         .isEqualTo(expectedSchema.asStruct());
   }

   @TestTemplate
   public void testEntriesTableScan() {
     table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

     Table entriesTable = new ManifestEntriesTable(table);
     TableScan scan = entriesTable.newScan();

     Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
     Schema expectedSchema =
         TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));

     assertThat(scan.schema().asStruct())
         .as("A tableScan.select() should prune the schema")
         .isEqualTo(expectedSchema.asStruct());

     FileScanTask file = Iterables.getOnlyElement(scan.planFiles());
     assertThat(file.file().path())
         .as("Data file should be the table's manifest")
         .isEqualTo(table.currentSnapshot().allManifests(table.io()).get(0).path());

     assertThat(file.file().recordCount()).as("Should contain 2 data file records").isEqualTo(2);
   }

   @TestTemplate
   public void testSplitPlanningWithMetadataSplitSizeProperty() {
     table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

     table.newAppend().appendFile(FILE_C).appendFile(FILE_D).commit();

     // set the split size to a large value so that both manifests are in 1 split
     table
         .updateProperties()
         .set(TableProperties.METADATA_SPLIT_SIZE, String.valueOf(128 * 1024 * 1024))
         .commit();

     Table entriesTable = new ManifestEntriesTable(table);

     assertThat(entriesTable.newScan().planTasks()).hasSize(1);

     // set the split size to a small value so that manifests end up in different splits
     table.updateProperties().set(TableProperties.METADATA_SPLIT_SIZE, String.valueOf(1)).commit();

     assertThat(entriesTable.newScan().planTasks()).hasSize(2);

     // override the table property with a large value so that both manifests are in 1 split
     TableScan scan =
         entriesTable
             .newScan()
             .option(TableProperties.SPLIT_SIZE, String.valueOf(128 * 1024 * 1024));

     assertThat(scan.planTasks()).hasSize(1);
   }

   @TestTemplate
   public void testSplitPlanningWithDefaultMetadataSplitSize() {
     table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

     int splitSize =
         (int) TableProperties.METADATA_SPLIT_SIZE_DEFAULT; // default split size is 32 MB

     Table entriesTable = new ManifestEntriesTable(table);
     assertThat(entriesTable.currentSnapshot().allManifests(table.io())).hasSize(1);

     int expectedSplits =
         ((int) entriesTable.currentSnapshot().allManifests(table.io()).get(0).length()
                 + splitSize
                 - 1)
             / splitSize;

     TableScan scan = entriesTable.newScan();

     assertThat(scan.planTasks()).hasSize(expectedSplits);
   }

   @TestTemplate
   public void testEntriesTableWithDeleteManifests() {
     assumeThat(formatVersion).as("Only V2 Tables Support Deletes").isGreaterThanOrEqualTo(2);
     table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

     table.newRowDelta().addDeletes(FILE_A_DELETES).commit();

     Table entriesTable = new ManifestEntriesTable(table);
     TableScan scan = entriesTable.newScan();

     Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
     Schema expectedSchema =
         TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));

     assertThat(scan.schema().asStruct())
         .as("A tableScan.select() should prune the schema")
         .isEqualTo(expectedSchema.asStruct());

     List<FileScanTask> files = ImmutableList.copyOf(scan.planFiles());
     assertThat(files.get(0).file().path())
         .as("Data file should be the table's manifest")
         .isEqualTo(table.currentSnapshot().dataManifests(table.io()).get(0).path());
     assertThat(files.get(0).file().recordCount())
         .as("Should contain 2 data file records")
         .isEqualTo(2);
     assertThat(files.get(1).file().path())
         .as("Delete file should be in the table manifest")
         .isEqualTo(table.currentSnapshot().deleteManifests(table.io()).get(0).path());
     assertThat(files.get(1).file().recordCount())
         .as("Should contain 1 delete file record")
         .isEqualTo(1);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.iceberg;

	import static org.assertj.core.api.Assertions.assertThat;
	import static org.assertj.core.api.Assumptions.assumeThat;

	import java.util.Arrays;
	import java.util.List;
	import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
	import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
	import org.apache.iceberg.types.TypeUtil;
	import org.junit.jupiter.api.TestTemplate;
	import org.junit.jupiter.api.extension.ExtendWith;

	@ExtendWith(ParameterizedTestExtension.class)
	public class TestEntriesMetadataTable extends TestBase {

	@Parameters(name = "formatVersion = {0}")
	protected static List<Object> parameters() {
	return Arrays.asList(1, 2);
	}

	@TestTemplate
	public void testEntriesTable() {
	table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

	Table entriesTable = new ManifestEntriesTable(table);

	Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
	Schema expectedSchema =
	TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));

	assertThat(entriesTable.schema().asStruct())
	.as("A tableScan.select() should prune the schema")
	.isEqualTo(expectedSchema.asStruct());
	}

	@TestTemplate
	public void testEntriesTableScan() {
	table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

	Table entriesTable = new ManifestEntriesTable(table);
	TableScan scan = entriesTable.newScan();

	Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
	Schema expectedSchema =
	TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));

	assertThat(scan.schema().asStruct())
	.as("A tableScan.select() should prune the schema")
	.isEqualTo(expectedSchema.asStruct());

	FileScanTask file = Iterables.getOnlyElement(scan.planFiles());
	assertThat(file.file().path())
	.as("Data file should be the table's manifest")
	.isEqualTo(table.currentSnapshot().allManifests(table.io()).get(0).path());

	assertThat(file.file().recordCount()).as("Should contain 2 data file records").isEqualTo(2);
	}

	@TestTemplate
	public void testSplitPlanningWithMetadataSplitSizeProperty() {
	table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

	table.newAppend().appendFile(FILE_C).appendFile(FILE_D).commit();

	// set the split size to a large value so that both manifests are in 1 split
	table
	.updateProperties()
	.set(TableProperties.METADATA_SPLIT_SIZE, String.valueOf(128 * 1024 * 1024))
	.commit();

	Table entriesTable = new ManifestEntriesTable(table);

	assertThat(entriesTable.newScan().planTasks()).hasSize(1);

	// set the split size to a small value so that manifests end up in different splits
	table.updateProperties().set(TableProperties.METADATA_SPLIT_SIZE, String.valueOf(1)).commit();

	assertThat(entriesTable.newScan().planTasks()).hasSize(2);

	// override the table property with a large value so that both manifests are in 1 split
	TableScan scan =
	entriesTable
	.newScan()
	.option(TableProperties.SPLIT_SIZE, String.valueOf(128 * 1024 * 1024));

	assertThat(scan.planTasks()).hasSize(1);
	}

	@TestTemplate
	public void testSplitPlanningWithDefaultMetadataSplitSize() {
	table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

	int splitSize =
	(int) TableProperties.METADATA_SPLIT_SIZE_DEFAULT; // default split size is 32 MB

	Table entriesTable = new ManifestEntriesTable(table);
	assertThat(entriesTable.currentSnapshot().allManifests(table.io())).hasSize(1);

	int expectedSplits =
	((int) entriesTable.currentSnapshot().allManifests(table.io()).get(0).length()
	+ splitSize
	- 1)
	/ splitSize;

	TableScan scan = entriesTable.newScan();

	assertThat(scan.planTasks()).hasSize(expectedSplits);
	}

	@TestTemplate
	public void testEntriesTableWithDeleteManifests() {
	assumeThat(formatVersion).as("Only V2 Tables Support Deletes").isGreaterThanOrEqualTo(2);
	table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit();

	table.newRowDelta().addDeletes(FILE_A_DELETES).commit();

	Table entriesTable = new ManifestEntriesTable(table);
	TableScan scan = entriesTable.newScan();

	Schema readSchema = ManifestEntry.getSchema(table.spec().partitionType());
	Schema expectedSchema =
	TypeUtil.join(readSchema, MetricsUtil.readableMetricsSchema(table.schema(), readSchema));

	assertThat(scan.schema().asStruct())
	.as("A tableScan.select() should prune the schema")
	.isEqualTo(expectedSchema.asStruct());

	List<FileScanTask> files = ImmutableList.copyOf(scan.planFiles());
	assertThat(files.get(0).file().path())
	.as("Data file should be the table's manifest")
	.isEqualTo(table.currentSnapshot().dataManifests(table.io()).get(0).path());
	assertThat(files.get(0).file().recordCount())
	.as("Should contain 2 data file records")
	.isEqualTo(2);
	assertThat(files.get(1).file().path())
	.as("Delete file should be in the table manifest")
	.isEqualTo(table.currentSnapshot().deleteManifests(table.io()).get(0).path());
	assertThat(files.get(1).file().recordCount())
	.as("Should contain 1 delete file record")
	.isEqualTo(1);
	}
	}