blob: 1163b3931252f6d1a03fe70c4362a51c48848ea8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.iceberg.ManifestEntry.Status;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Streams;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
public class TestManifestReader extends TableTestBase {
@Parameterized.Parameters(name = "formatVersion = {0}")
public static Object[] parameters() {
return new Object[] { 1, 2 };
}
public TestManifestReader(int formatVersion) {
super(formatVersion);
}
@Test
public void testManifestReaderWithEmptyInheritableMetadata() throws IOException {
ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 1000L, FILE_A));
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)) {
ManifestEntry<DataFile> entry = Iterables.getOnlyElement(reader.entries());
Assert.assertEquals(Status.EXISTING, entry.status());
Assert.assertEquals(FILE_A.path(), entry.file().path());
Assert.assertEquals(1000L, (long) entry.snapshotId());
}
}
@Test
public void testReaderWithFilterWithoutSelect() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE_A, FILE_B, FILE_C);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)
.filterRows(Expressions.equal("id", 0))) {
List<String> files = Streams.stream(reader)
.map(file -> file.path().toString())
.collect(Collectors.toList());
// note that all files are returned because the reader returns data files that may match, and the partition is
// bucketing by data, which doesn't help filter files
Assert.assertEquals("Should read the expected files",
Lists.newArrayList(FILE_A.path(), FILE_B.path(), FILE_C.path()), files);
}
}
@Test
public void testInvalidUsage() throws IOException {
ManifestFile manifest = writeManifest(FILE_A, FILE_B);
AssertHelpers.assertThrows(
"Should not be possible to read manifest without explicit snapshot ids and inheritable metadata",
IllegalArgumentException.class, "Cannot read from ManifestFile with null (unassigned) snapshot ID",
() -> ManifestFiles.read(manifest, FILE_IO));
}
@Test
public void testManifestReaderWithPartitionMetadata() throws IOException {
ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 123L, FILE_A));
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)) {
ManifestEntry<DataFile> entry = Iterables.getOnlyElement(reader.entries());
Assert.assertEquals(123L, (long) entry.snapshotId());
List<Types.NestedField> fields = ((PartitionData) entry.file().partition()).getPartitionType().fields();
Assert.assertEquals(1, fields.size());
Assert.assertEquals(1000, fields.get(0).fieldId());
Assert.assertEquals("data_bucket", fields.get(0).name());
Assert.assertEquals(Types.IntegerType.get(), fields.get(0).type());
}
}
@Test
public void testManifestReaderWithUpdatedPartitionMetadataForV1Table() throws IOException {
PartitionSpec spec = PartitionSpec.builderFor(table.schema())
.bucket("id", 8)
.bucket("data", 16)
.build();
table.ops().commit(table.ops().current(), table.ops().current().updatePartitionSpec(spec));
ManifestFile manifest = writeManifest(1000L, manifestEntry(Status.EXISTING, 123L, FILE_A));
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)) {
ManifestEntry<DataFile> entry = Iterables.getOnlyElement(reader.entries());
Assert.assertEquals(123L, (long) entry.snapshotId());
List<Types.NestedField> fields = ((PartitionData) entry.file().partition()).getPartitionType().fields();
Assert.assertEquals(2, fields.size());
Assert.assertEquals(1000, fields.get(0).fieldId());
Assert.assertEquals("id_bucket", fields.get(0).name());
Assert.assertEquals(Types.IntegerType.get(), fields.get(0).type());
Assert.assertEquals(1001, fields.get(1).fieldId());
Assert.assertEquals("data_bucket", fields.get(1).name());
Assert.assertEquals(Types.IntegerType.get(), fields.get(1).type());
}
}
@Test
public void testDataFilePositions() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE_A, FILE_B, FILE_C);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)) {
long expectedPos = 0L;
for (DataFile file : reader) {
Assert.assertEquals("Position should match", (Long) expectedPos, file.pos());
expectedPos += 1;
}
}
}
}