blob: 8dc75c1a2db3b9e683f7355334a4602233d29877 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.arrow;
import static org.assertj.core.api.Assertions.assertThat;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.BooleanType;
import org.apache.iceberg.types.Types.DateType;
import org.apache.iceberg.types.Types.DoubleType;
import org.apache.iceberg.types.Types.FloatType;
import org.apache.iceberg.types.Types.IntegerType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.LongType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.TimeType;
import org.apache.iceberg.types.Types.TimestampType;
import org.junit.jupiter.api.Test;
public class ArrowSchemaUtilTest {
private static final String INTEGER_FIELD = "i";
private static final String BOOLEAN_FIELD = "b";
private static final String DOUBLE_FIELD = "d";
private static final String STRING_FIELD = "s";
private static final String DATE_FIELD = "d2";
private static final String TIMESTAMP_FIELD = "ts";
private static final String LONG_FIELD = "l";
private static final String FLOAT_FIELD = "f";
private static final String TIME_FIELD = "tt";
private static final String FIXED_WIDTH_BINARY_FIELD = "fbt";
private static final String BINARY_FIELD = "bt";
private static final String DECIMAL_FIELD = "dt";
private static final String STRUCT_FIELD = "st";
private static final String LIST_FIELD = "lt";
private static final String MAP_FIELD = "mt";
private static final String UUID_FIELD = "uu";
@Test
public void convertPrimitive() {
Schema iceberg =
new Schema(
Types.NestedField.optional(0, INTEGER_FIELD, IntegerType.get()),
Types.NestedField.optional(1, BOOLEAN_FIELD, BooleanType.get()),
Types.NestedField.required(2, DOUBLE_FIELD, DoubleType.get()),
Types.NestedField.required(3, STRING_FIELD, StringType.get()),
Types.NestedField.optional(4, DATE_FIELD, DateType.get()),
Types.NestedField.optional(5, TIMESTAMP_FIELD, TimestampType.withZone()),
Types.NestedField.optional(6, LONG_FIELD, LongType.get()),
Types.NestedField.optional(7, FLOAT_FIELD, FloatType.get()),
Types.NestedField.optional(8, TIME_FIELD, TimeType.get()),
Types.NestedField.optional(9, BINARY_FIELD, Types.BinaryType.get()),
Types.NestedField.optional(10, DECIMAL_FIELD, Types.DecimalType.of(1, 1)),
Types.NestedField.optional(
12, LIST_FIELD, Types.ListType.ofOptional(13, Types.IntegerType.get())),
Types.NestedField.required(
14,
MAP_FIELD,
Types.MapType.ofOptional(15, 16, StringType.get(), IntegerType.get())),
Types.NestedField.optional(17, FIXED_WIDTH_BINARY_FIELD, Types.FixedType.ofLength(10)),
Types.NestedField.optional(18, UUID_FIELD, Types.UUIDType.get()));
org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg);
validate(iceberg, arrow);
}
@Test
public void convertComplex() {
Schema iceberg =
new Schema(
Types.NestedField.optional(
0, "m", MapType.ofOptional(1, 2, StringType.get(), LongType.get())),
Types.NestedField.required(
3,
"m2",
MapType.ofOptional(
4, 5, StringType.get(), ListType.ofOptional(6, TimestampType.withoutZone()))));
org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg);
assertThat(arrow.getFields()).hasSameSizeAs(iceberg.columns());
}
private void validate(Schema iceberg, org.apache.arrow.vector.types.pojo.Schema arrow) {
assertThat(arrow.getFields()).hasSameSizeAs(iceberg.columns());
for (Types.NestedField nf : iceberg.columns()) {
Field field = arrow.findField(nf.name());
assertThat(field).as("Missing field: " + nf).isNotNull();
validate(nf.type(), field, nf.isOptional());
}
}
private void validate(Type iceberg, Field field, boolean optional) {
ArrowType arrowType = field.getType();
assertThat(field.isNullable()).isEqualTo(optional);
switch (iceberg.typeId()) {
case BOOLEAN:
assertThat(field.getName()).isEqualTo(BOOLEAN_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Bool);
break;
case INTEGER:
assertThat(field.getName()).isEqualTo(INTEGER_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Int);
break;
case LONG:
assertThat(field.getName()).isEqualTo(LONG_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Int);
break;
case FLOAT:
assertThat(field.getName()).isEqualTo(FLOAT_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.FloatingPoint);
break;
case DOUBLE:
assertThat(field.getName()).isEqualTo(DOUBLE_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.FloatingPoint);
break;
case DATE:
assertThat(field.getName()).isEqualTo(DATE_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Date);
break;
case TIME:
assertThat(field.getName()).isEqualTo(TIME_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Time);
break;
case TIMESTAMP:
assertThat(field.getName()).isEqualTo(TIMESTAMP_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Timestamp);
break;
case STRING:
assertThat(field.getName()).isEqualTo(STRING_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Utf8);
break;
case FIXED:
assertThat(field.getName()).isEqualTo(FIXED_WIDTH_BINARY_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.FixedSizeBinary.TYPE_TYPE);
break;
case BINARY:
assertThat(field.getName()).isEqualTo(BINARY_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.Binary.TYPE_TYPE);
break;
case DECIMAL:
assertThat(field.getName()).isEqualTo(DECIMAL_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.Decimal.TYPE_TYPE);
break;
case STRUCT:
assertThat(field.getName()).isEqualTo(STRUCT_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.Struct.TYPE_TYPE);
break;
case LIST:
assertThat(field.getName()).isEqualTo(LIST_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.List.TYPE_TYPE);
break;
case MAP:
assertThat(field.getName()).isEqualTo(MAP_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.ArrowTypeID.Map);
break;
case UUID:
assertThat(field.getName()).isEqualTo(UUID_FIELD);
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.FixedSizeBinary.TYPE_TYPE);
break;
default:
throw new UnsupportedOperationException("Check not implemented for type: " + iceberg);
}
}
}