blob: 8d5f8b63046d6eb2a0a686b97c4c4d7ea1cdd5a3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Map;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
public class TestManifestReaderStats extends TableTestBase {
@Parameterized.Parameters(name = "formatVersion = {0}")
public static Object[] parameters() {
return new Object[] { 1, 2 };
}
public TestManifestReaderStats(int formatVersion) {
super(formatVersion);
}
private static final Map<Integer, Long> VALUE_COUNT = ImmutableMap.of(3, 3L);
private static final Map<Integer, Long> NULL_VALUE_COUNTS = ImmutableMap.of(3, 0L);
private static final Map<Integer, Long> NAN_VALUE_COUNTS = ImmutableMap.of(3, 1L);
private static final Map<Integer, ByteBuffer> LOWER_BOUNDS =
ImmutableMap.of(3, Conversions.toByteBuffer(Types.IntegerType.get(), 2));
private static final Map<Integer, ByteBuffer> UPPER_BOUNDS =
ImmutableMap.of(3, Conversions.toByteBuffer(Types.IntegerType.get(), 4));
private static final Metrics METRICS = new Metrics(3L, null,
VALUE_COUNT, NULL_VALUE_COUNTS, NAN_VALUE_COUNTS, LOWER_BOUNDS, UPPER_BOUNDS);
private static final DataFile FILE = DataFiles.builder(SPEC)
.withPath("/path/to/data-a.parquet")
.withFileSizeInBytes(10)
.withPartitionPath("data_bucket=0") // easy way to set partition data for now
.withRecordCount(3)
.withMetrics(METRICS)
.build();
@Test
public void testReadIncludesFullStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)) {
CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries();
ManifestEntry<DataFile> entry = entries.iterator().next();
assertFullStats(entry.file());
}
}
@Test
public void testReadWithFilterIncludesFullStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)
.filterRows(Expressions.equal("id", 3))) {
CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries();
ManifestEntry<DataFile> entry = entries.iterator().next();
assertFullStats(entry.file());
}
}
@Test
public void testReadEntriesWithFilterAndSelectIncludesFullStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)
.select(ImmutableSet.of("record_count"))
.filterRows(Expressions.equal("id", 3))) {
CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries();
ManifestEntry<DataFile> entry = entries.iterator().next();
assertFullStats(entry.file());
}
}
@Test
public void testReadIteratorWithFilterAndSelectDropsStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)
.select(ImmutableSet.of("record_count"))
.filterRows(Expressions.equal("id", 3))) {
DataFile entry = reader.iterator().next();
assertStatsDropped(entry);
}
}
@Test
public void testReadIteratorWithFilterAndSelectStatsIncludesFullStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)
.select(ImmutableSet.of("record_count", "value_counts"))
.filterRows(Expressions.equal("id", 3))) {
DataFile entry = reader.iterator().next();
assertFullStats(entry);
}
}
@Test
public void testReadEntriesWithSelectNotIncludeFullStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)
.select(ImmutableSet.of("record_count"))) {
CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries();
ManifestEntry<DataFile> entry = entries.iterator().next();
assertStatsDropped(entry.file());
}
}
@Test
public void testReadEntriesWithSelectCertainStatNotIncludeFullStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)
.select(ImmutableSet.of("record_count", "value_counts"))) {
DataFile dataFile = reader.iterator().next();
Assert.assertEquals(3, dataFile.recordCount());
Assert.assertNull(dataFile.columnSizes());
Assert.assertEquals(VALUE_COUNT, dataFile.valueCounts());
Assert.assertNull(dataFile.nullValueCounts());
Assert.assertNull(dataFile.lowerBounds());
Assert.assertNull(dataFile.upperBounds());
Assert.assertNull(dataFile.nanValueCounts());
}
}
private void assertFullStats(DataFile dataFile) {
Assert.assertEquals(3, dataFile.recordCount());
Assert.assertNull(dataFile.columnSizes());
Assert.assertEquals(VALUE_COUNT, dataFile.valueCounts());
Assert.assertEquals(NULL_VALUE_COUNTS, dataFile.nullValueCounts());
Assert.assertEquals(LOWER_BOUNDS, dataFile.lowerBounds());
Assert.assertEquals(UPPER_BOUNDS, dataFile.upperBounds());
Assert.assertEquals(NAN_VALUE_COUNTS, dataFile.nanValueCounts());
}
private void assertStatsDropped(DataFile dataFile) {
Assert.assertEquals(3, dataFile.recordCount()); // always select record count in all test cases
Assert.assertNull(dataFile.columnSizes());
Assert.assertNull(dataFile.valueCounts());
Assert.assertNull(dataFile.nullValueCounts());
Assert.assertNull(dataFile.lowerBounds());
Assert.assertNull(dataFile.upperBounds());
Assert.assertNull(dataFile.nanValueCounts());
}
}