blob: 668eca139494d488cb114ca5810c8d311ba09798 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.specific.SpecificData.SchemaConstructable;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Objects;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
public class GenericManifestFile
implements ManifestFile, StructLike, IndexedRecord, SchemaConstructable, Serializable {
private static final Schema AVRO_SCHEMA = AvroSchemaUtil.convert(
ManifestFile.schema(), "manifest_file");
private transient Schema avroSchema; // not final for Java serialization
private int[] fromProjectionPos;
// data fields
private InputFile file = null;
private String manifestPath = null;
private Long length = null;
private int specId = -1;
private ManifestContent content = ManifestContent.DATA;
private long sequenceNumber = 0;
private long minSequenceNumber = 0;
private Long snapshotId = null;
private Integer addedFilesCount = null;
private Integer existingFilesCount = null;
private Integer deletedFilesCount = null;
private Long addedRowsCount = null;
private Long existingRowsCount = null;
private Long deletedRowsCount = null;
private List<PartitionFieldSummary> partitions = null;
/**
* Used by Avro reflection to instantiate this class when reading manifest files.
*/
public GenericManifestFile(org.apache.avro.Schema avroSchema) {
this.avroSchema = avroSchema;
List<Types.NestedField> fields = AvroSchemaUtil.convert(avroSchema).asStructType().fields();
List<Types.NestedField> allFields = ManifestFile.schema().asStruct().fields();
this.fromProjectionPos = new int[fields.size()];
for (int i = 0; i < fromProjectionPos.length; i += 1) {
boolean found = false;
for (int j = 0; j < allFields.size(); j += 1) {
if (fields.get(i).fieldId() == allFields.get(j).fieldId()) {
found = true;
fromProjectionPos[i] = j;
}
}
if (!found) {
throw new IllegalArgumentException("Cannot find projected field: " + fields.get(i));
}
}
}
GenericManifestFile(InputFile file, int specId) {
this.avroSchema = AVRO_SCHEMA;
this.file = file;
this.manifestPath = file.location();
this.length = null; // lazily loaded from file
this.specId = specId;
this.sequenceNumber = 0;
this.minSequenceNumber = 0;
this.snapshotId = null;
this.addedFilesCount = null;
this.addedRowsCount = null;
this.existingFilesCount = null;
this.existingRowsCount = null;
this.deletedFilesCount = null;
this.deletedRowsCount = null;
this.partitions = null;
this.fromProjectionPos = null;
}
public GenericManifestFile(String path, long length, int specId, ManifestContent content,
long sequenceNumber, long minSequenceNumber, Long snapshotId,
int addedFilesCount, long addedRowsCount, int existingFilesCount,
long existingRowsCount, int deletedFilesCount, long deletedRowsCount,
List<PartitionFieldSummary> partitions) {
this.avroSchema = AVRO_SCHEMA;
this.manifestPath = path;
this.length = length;
this.specId = specId;
this.content = content;
this.sequenceNumber = sequenceNumber;
this.minSequenceNumber = minSequenceNumber;
this.snapshotId = snapshotId;
this.addedFilesCount = addedFilesCount;
this.addedRowsCount = addedRowsCount;
this.existingFilesCount = existingFilesCount;
this.existingRowsCount = existingRowsCount;
this.deletedFilesCount = deletedFilesCount;
this.deletedRowsCount = deletedRowsCount;
this.partitions = partitions;
this.fromProjectionPos = null;
}
/**
* Copy constructor.
*
* @param toCopy a generic manifest file to copy.
*/
private GenericManifestFile(GenericManifestFile toCopy) {
this.avroSchema = toCopy.avroSchema;
this.manifestPath = toCopy.manifestPath;
this.length = toCopy.length;
this.specId = toCopy.specId;
this.content = toCopy.content;
this.sequenceNumber = toCopy.sequenceNumber;
this.minSequenceNumber = toCopy.minSequenceNumber;
this.snapshotId = toCopy.snapshotId;
this.addedFilesCount = toCopy.addedFilesCount;
this.addedRowsCount = toCopy.addedRowsCount;
this.existingFilesCount = toCopy.existingFilesCount;
this.existingRowsCount = toCopy.existingRowsCount;
this.deletedFilesCount = toCopy.deletedFilesCount;
this.deletedRowsCount = toCopy.deletedRowsCount;
this.partitions = copyList(toCopy.partitions, PartitionFieldSummary::copy);
this.fromProjectionPos = toCopy.fromProjectionPos;
}
/**
* Constructor for Java serialization.
*/
GenericManifestFile() {
}
@Override
public String path() {
return manifestPath;
}
public Long lazyLength() {
if (length == null) {
if (file != null) {
// this was created from an input file and length is lazily loaded
this.length = file.getLength();
} else {
// this was loaded from a file without projecting length, throw an exception
return null;
}
}
return length;
}
@Override
public long length() {
return lazyLength();
}
@Override
public int partitionSpecId() {
return specId;
}
@Override
public ManifestContent content() {
return content;
}
@Override
public long sequenceNumber() {
return sequenceNumber;
}
@Override
public long minSequenceNumber() {
return minSequenceNumber;
}
@Override
public Long snapshotId() {
return snapshotId;
}
@Override
public Integer addedFilesCount() {
return addedFilesCount;
}
@Override
public Long addedRowsCount() {
return addedRowsCount;
}
@Override
public Integer existingFilesCount() {
return existingFilesCount;
}
@Override
public Long existingRowsCount() {
return existingRowsCount;
}
@Override
public Integer deletedFilesCount() {
return deletedFilesCount;
}
@Override
public Long deletedRowsCount() {
return deletedRowsCount;
}
@Override
public List<PartitionFieldSummary> partitions() {
return partitions;
}
@Override
public int size() {
return ManifestFile.schema().columns().size();
}
@Override
public <T> T get(int pos, Class<T> javaClass) {
return javaClass.cast(get(pos));
}
@Override
public Object get(int i) {
int pos = i;
// if the schema was projected, map the incoming ordinal to the expected one
if (fromProjectionPos != null) {
pos = fromProjectionPos[i];
}
switch (pos) {
case 0:
return manifestPath;
case 1:
return lazyLength();
case 2:
return specId;
case 3:
return content.id();
case 4:
return sequenceNumber;
case 5:
return minSequenceNumber;
case 6:
return snapshotId;
case 7:
return addedFilesCount;
case 8:
return existingFilesCount;
case 9:
return deletedFilesCount;
case 10:
return addedRowsCount;
case 11:
return existingRowsCount;
case 12:
return deletedRowsCount;
case 13:
return partitions;
default:
throw new UnsupportedOperationException("Unknown field ordinal: " + pos);
}
}
@Override
@SuppressWarnings("unchecked")
public <T> void set(int i, T value) {
int pos = i;
// if the schema was projected, map the incoming ordinal to the expected one
if (fromProjectionPos != null) {
pos = fromProjectionPos[i];
}
switch (pos) {
case 0:
// always coerce to String for Serializable
this.manifestPath = value.toString();
return;
case 1:
this.length = (Long) value;
return;
case 2:
this.specId = (Integer) value;
return;
case 3:
this.content = value != null ? ManifestContent.values()[(Integer) value] : ManifestContent.DATA;
return;
case 4:
this.sequenceNumber = value != null ? (Long) value : 0;
return;
case 5:
this.minSequenceNumber = value != null ? (Long) value : 0;
return;
case 6:
this.snapshotId = (Long) value;
return;
case 7:
this.addedFilesCount = (Integer) value;
return;
case 8:
this.existingFilesCount = (Integer) value;
return;
case 9:
this.deletedFilesCount = (Integer) value;
return;
case 10:
this.addedRowsCount = (Long) value;
return;
case 11:
this.existingRowsCount = (Long) value;
return;
case 12:
this.deletedRowsCount = (Long) value;
return;
case 13:
this.partitions = (List<PartitionFieldSummary>) value;
return;
default:
// ignore the object, it must be from a newer version of the format
}
}
@Override
public void put(int i, Object v) {
set(i, v);
}
@Override
public ManifestFile copy() {
return new GenericManifestFile(this);
}
@Override
public Schema getSchema() {
return avroSchema;
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
} else if (!(other instanceof GenericManifestFile)) {
return false;
}
GenericManifestFile that = (GenericManifestFile) other;
return Objects.equal(manifestPath, that.manifestPath);
}
@Override
public int hashCode() {
return Objects.hashCode(manifestPath);
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("path", manifestPath)
.add("length", length)
.add("partition_spec_id", specId)
.add("added_snapshot_id", snapshotId)
.add("added_data_files_count", addedFilesCount)
.add("added_rows_count", addedRowsCount)
.add("existing_data_files_count", existingFilesCount)
.add("existing_rows_count", existingRowsCount)
.add("deleted_data_files_count", deletedFilesCount)
.add("deleted_rows_count", deletedRowsCount)
.add("partitions", partitions)
.toString();
}
public static CopyBuilder copyOf(ManifestFile manifestFile) {
return new CopyBuilder(manifestFile);
}
public static class CopyBuilder {
private final GenericManifestFile manifestFile;
private CopyBuilder(ManifestFile toCopy) {
if (toCopy instanceof GenericManifestFile) {
this.manifestFile = new GenericManifestFile((GenericManifestFile) toCopy);
} else {
this.manifestFile = new GenericManifestFile(
toCopy.path(), toCopy.length(), toCopy.partitionSpecId(), toCopy.content(),
toCopy.sequenceNumber(), toCopy.minSequenceNumber(), toCopy.snapshotId(),
toCopy.addedFilesCount(), toCopy.addedRowsCount(), toCopy.existingFilesCount(),
toCopy.existingRowsCount(), toCopy.deletedFilesCount(), toCopy.deletedRowsCount(),
copyList(toCopy.partitions(), PartitionFieldSummary::copy));
}
}
public CopyBuilder withSnapshotId(Long newSnapshotId) {
manifestFile.snapshotId = newSnapshotId;
return this;
}
public ManifestFile build() {
return manifestFile;
}
}
private static <E, R> List<R> copyList(List<E> list, Function<E, R> transform) {
if (list != null) {
List<R> copy = Lists.newArrayListWithExpectedSize(list.size());
for (E element : list) {
copy.add(transform.apply(element));
}
return Collections.unmodifiableList(copy);
}
return null;
}
}