blob: 7c8cabc9c1fbe3a9ba0ffc155e60f4675ea99979 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.data.avro;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.avro.AvroSchemaWithTypeVisitor;
import org.apache.iceberg.avro.SupportsRowPosition;
import org.apache.iceberg.avro.ValueReader;
import org.apache.iceberg.avro.ValueReaders;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
public class DataReader<T> implements DatumReader<T>, SupportsRowPosition {
public static <D> DataReader<D> create(org.apache.iceberg.Schema expectedSchema, Schema readSchema) {
return create(expectedSchema, readSchema, ImmutableMap.of());
}
public static <D> DataReader<D> create(org.apache.iceberg.Schema expectedSchema, Schema readSchema,
Map<Integer, ?> idToConstant) {
return new DataReader<>(expectedSchema, readSchema, idToConstant);
}
private final Schema readSchema;
private final ValueReader<T> reader;
private Schema fileSchema = null;
@SuppressWarnings("unchecked")
protected DataReader(org.apache.iceberg.Schema expectedSchema, Schema readSchema, Map<Integer, ?> idToConstant) {
this.readSchema = readSchema;
this.reader = (ValueReader<T>) AvroSchemaWithTypeVisitor
.visit(expectedSchema, readSchema, new ReadBuilder(idToConstant));
}
@Override
public void setSchema(Schema newFileSchema) {
this.fileSchema = Schema.applyAliases(newFileSchema, readSchema);
}
@Override
public T read(T reuse, Decoder decoder) throws IOException {
return DecoderResolver.resolveAndRead(decoder, readSchema, fileSchema, reader, reuse);
}
@Override
public void setRowPositionSupplier(Supplier<Long> posSupplier) {
if (reader instanceof SupportsRowPosition) {
((SupportsRowPosition) reader).setRowPositionSupplier(posSupplier);
}
}
protected ValueReader<?> createStructReader(Types.StructType struct,
List<ValueReader<?>> fields, Map<Integer, ?> idToConstant) {
return GenericReaders.struct(struct, fields, idToConstant);
}
private class ReadBuilder extends AvroSchemaWithTypeVisitor<ValueReader<?>> {
private final Map<Integer, ?> idToConstant;
private ReadBuilder(Map<Integer, ?> idToConstant) {
this.idToConstant = idToConstant;
}
@Override
public ValueReader<?> record(Types.StructType struct, Schema record,
List<String> names, List<ValueReader<?>> fields) {
return createStructReader(struct, fields, idToConstant);
}
@Override
public ValueReader<?> union(Type ignored, Schema union, List<ValueReader<?>> options) {
return ValueReaders.union(options);
}
@Override
public ValueReader<?> array(Types.ListType ignored, Schema array, ValueReader<?> elementReader) {
return ValueReaders.array(elementReader);
}
@Override
public ValueReader<?> map(Types.MapType iMap, Schema map, ValueReader<?> keyReader, ValueReader<?> valueReader) {
return ValueReaders.arrayMap(keyReader, valueReader);
}
@Override
public ValueReader<?> map(Types.MapType ignored, Schema map, ValueReader<?> valueReader) {
return ValueReaders.map(ValueReaders.strings(), valueReader);
}
@Override
public ValueReader<?> primitive(Type.PrimitiveType ignored, Schema primitive) {
LogicalType logicalType = primitive.getLogicalType();
if (logicalType != null) {
switch (logicalType.getName()) {
case "date":
return GenericReaders.dates();
case "time-micros":
return GenericReaders.times();
case "timestamp-micros":
if (AvroSchemaUtil.isTimestamptz(primitive)) {
return GenericReaders.timestamptz();
}
return GenericReaders.timestamps();
case "decimal":
return ValueReaders.decimal(
ValueReaders.decimalBytesReader(primitive),
((LogicalTypes.Decimal) logicalType).getScale());
case "uuid":
return ValueReaders.uuids();
default:
throw new IllegalArgumentException("Unknown logical type: " + logicalType);
}
}
switch (primitive.getType()) {
case NULL:
return ValueReaders.nulls();
case BOOLEAN:
return ValueReaders.booleans();
case INT:
return ValueReaders.ints();
case LONG:
return ValueReaders.longs();
case FLOAT:
return ValueReaders.floats();
case DOUBLE:
return ValueReaders.doubles();
case STRING:
// might want to use a binary-backed container like Utf8
return ValueReaders.strings();
case FIXED:
return ValueReaders.fixed(primitive.getFixedSize());
case BYTES:
return ValueReaders.byteBuffers();
default:
throw new IllegalArgumentException("Unsupported type: " + primitive);
}
}
}
}