blob: 0137adf4b6f17d2696997a472be405bb404c2f5b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.hive;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.Schema;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;
import static org.apache.iceberg.types.Types.NestedField.optional;
public class TestHiveSchemaUtil {
private static final Schema SIMPLE_ICEBERG_SCHEMA = new Schema(
optional(0, "customer_id", Types.LongType.get(), "customer comment"),
optional(1, "first_name", Types.StringType.get(), "first name comment")
);
private static final Schema COMPLEX_ICEBERG_SCHEMA = new Schema(
optional(0, "id", Types.LongType.get(), ""),
optional(1, "name", Types.StringType.get(), ""),
optional(2, "employee_info", Types.StructType.of(
optional(3, "employer", Types.StringType.get()),
optional(4, "id", Types.LongType.get()),
optional(5, "address", Types.StringType.get())
), ""),
optional(6, "places_lived", Types.ListType.ofOptional(10, Types.StructType.of(
optional(7, "street", Types.StringType.get()),
optional(8, "city", Types.StringType.get()),
optional(9, "country", Types.StringType.get())
)), ""),
optional(11, "memorable_moments", Types.MapType.ofOptional(15, 16,
Types.StringType.get(),
Types.StructType.of(
optional(12, "year", Types.IntegerType.get()),
optional(13, "place", Types.StringType.get()),
optional(14, "details", Types.StringType.get())
)), ""),
optional(17, "current_address", Types.StructType.of(
optional(18, "street_address", Types.StructType.of(
optional(19, "street_number", Types.IntegerType.get()),
optional(20, "street_name", Types.StringType.get()),
optional(21, "street_type", Types.StringType.get())
)),
optional(22, "country", Types.StringType.get()),
optional(23, "postal_code", Types.StringType.get())
), "")
);
private static final List<FieldSchema> SIMPLE_HIVE_SCHEMA = ImmutableList.of(
new FieldSchema("customer_id", serdeConstants.BIGINT_TYPE_NAME, "customer comment"),
new FieldSchema("first_name", serdeConstants.STRING_TYPE_NAME, "first name comment")
);
private static final List<FieldSchema> COMPLEX_HIVE_SCHEMA = ImmutableList.of(
new FieldSchema("id", "bigint", ""),
new FieldSchema("name", "string", ""),
new FieldSchema("employee_info", "struct<employer:string,id:bigint,address:string>", ""),
new FieldSchema("places_lived", "array<struct<street:string,city:string,country:string>>", ""),
new FieldSchema("memorable_moments", "map<string,struct<year:int,place:string,details:string>>", ""),
new FieldSchema("current_address", "struct<street_address:struct<street_number:int,street_name:string," +
"street_type:string>,country:string,postal_code:string>", "")
);
@Test
public void testSimpleSchemaConvertToIcebergSchema() {
Assert.assertEquals(SIMPLE_ICEBERG_SCHEMA.asStruct(), HiveSchemaUtil.convert(SIMPLE_HIVE_SCHEMA).asStruct());
}
@Test
public void testSimpleSchemaConvertToIcebergSchemaFromNameAndTypeLists() {
List<String> names = SIMPLE_HIVE_SCHEMA.stream().map(field -> field.getName()).collect(Collectors.toList());
List<TypeInfo> types = SIMPLE_HIVE_SCHEMA.stream()
.map(field -> TypeInfoUtils.getTypeInfoFromTypeString(field.getType()))
.collect(Collectors.toList());
List<String> comments = SIMPLE_HIVE_SCHEMA.stream().map(FieldSchema::getComment).collect(Collectors.toList());
Assert.assertEquals(SIMPLE_ICEBERG_SCHEMA.asStruct(), HiveSchemaUtil.convert(names, types, comments).asStruct());
}
@Test
public void testComplexSchemaConvertToIcebergSchema() {
Assert.assertEquals(COMPLEX_ICEBERG_SCHEMA.asStruct(), HiveSchemaUtil.convert(COMPLEX_HIVE_SCHEMA).asStruct());
}
@Test
public void testSchemaConvertToIcebergSchemaForEveryPrimitiveType() {
Schema schemaWithEveryType = HiveSchemaUtil.convert(getSupportedFieldSchemas());
Assert.assertEquals(getSchemaWithSupportedTypes().asStruct(), schemaWithEveryType.asStruct());
}
@Test
public void testNotSupportedTypes() {
for (FieldSchema notSupportedField : getNotSupportedFieldSchemas()) {
AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class,
"Unsupported Hive type", () -> {
HiveSchemaUtil.convert(new ArrayList<>(Arrays.asList(notSupportedField)));
}
);
}
}
@Test
public void testSimpleSchemaConvertToHiveSchema() {
Assert.assertEquals(SIMPLE_HIVE_SCHEMA, HiveSchemaUtil.convert(SIMPLE_ICEBERG_SCHEMA));
}
@Test
public void testComplexSchemaConvertToHiveSchema() {
Assert.assertEquals(COMPLEX_HIVE_SCHEMA, HiveSchemaUtil.convert(COMPLEX_ICEBERG_SCHEMA));
}
@Test
public void testSimpleTypeAndTypeInfoConvert() {
// Test for every supported type
List<FieldSchema> fieldSchemas = getSupportedFieldSchemas();
List<Types.NestedField> nestedFields = getSchemaWithSupportedTypes().columns();
for (int i = 0; i < fieldSchemas.size(); ++i) {
checkConvert(TypeInfoUtils.getTypeInfoFromTypeString(fieldSchemas.get(i).getType()), nestedFields.get(i).type());
}
}
@Test
public void testComplexTypeAndTypeInfoConvert() {
for (int i = 0; i < COMPLEX_HIVE_SCHEMA.size(); ++i) {
checkConvert(TypeInfoUtils.getTypeInfoFromTypeString(COMPLEX_HIVE_SCHEMA.get(i).getType()),
COMPLEX_ICEBERG_SCHEMA.columns().get(i).type());
}
}
protected List<FieldSchema> getSupportedFieldSchemas() {
List<FieldSchema> fields = new ArrayList<>();
fields.add(new FieldSchema("c_float", serdeConstants.FLOAT_TYPE_NAME, "float comment"));
fields.add(new FieldSchema("c_double", serdeConstants.DOUBLE_TYPE_NAME, "double comment"));
fields.add(new FieldSchema("c_boolean", serdeConstants.BOOLEAN_TYPE_NAME, "boolean comment"));
fields.add(new FieldSchema("c_int", serdeConstants.INT_TYPE_NAME, "int comment"));
fields.add(new FieldSchema("c_long", serdeConstants.BIGINT_TYPE_NAME, "long comment"));
fields.add(new FieldSchema("c_binary", serdeConstants.BINARY_TYPE_NAME, null));
fields.add(new FieldSchema("c_string", serdeConstants.STRING_TYPE_NAME, null));
fields.add(new FieldSchema("c_timestamp", serdeConstants.TIMESTAMP_TYPE_NAME, null));
fields.add(new FieldSchema("c_date", serdeConstants.DATE_TYPE_NAME, null));
fields.add(new FieldSchema("c_decimal", serdeConstants.DECIMAL_TYPE_NAME + "(38,10)", null));
return fields;
}
protected List<FieldSchema> getNotSupportedFieldSchemas() {
List<FieldSchema> fields = new ArrayList<>();
fields.add(new FieldSchema("c_byte", serdeConstants.TINYINT_TYPE_NAME, ""));
fields.add(new FieldSchema("c_short", serdeConstants.SMALLINT_TYPE_NAME, ""));
fields.add(new FieldSchema("c_char", serdeConstants.CHAR_TYPE_NAME + "(5)", ""));
fields.add(new FieldSchema("c_varchar", serdeConstants.VARCHAR_TYPE_NAME + "(5)", ""));
fields.add(new FieldSchema("c_interval_date", serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME, ""));
fields.add(new FieldSchema("c_interval_time", serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME, ""));
return fields;
}
protected Schema getSchemaWithSupportedTypes() {
return new Schema(
optional(0, "c_float", Types.FloatType.get(), "float comment"),
optional(1, "c_double", Types.DoubleType.get(), "double comment"),
optional(2, "c_boolean", Types.BooleanType.get(), "boolean comment"),
optional(3, "c_int", Types.IntegerType.get(), "int comment"),
optional(4, "c_long", Types.LongType.get(), "long comment"),
optional(5, "c_binary", Types.BinaryType.get()),
optional(6, "c_string", Types.StringType.get()),
optional(7, "c_timestamp", Types.TimestampType.withoutZone()),
optional(8, "c_date", Types.DateType.get()),
optional(9, "c_decimal", Types.DecimalType.of(38, 10)));
}
/**
* Check conversion for 1-on-1 mappings
* @param typeInfo Hive type
* @param type Iceberg type
*/
private void checkConvert(TypeInfo typeInfo, Type type) {
// Convert to TypeInfo
Assert.assertEquals(typeInfo, HiveSchemaUtil.convert(type));
// Convert to Type
assertEquals(type, HiveSchemaUtil.convert(typeInfo));
}
/**
* Compares the nested types without checking the ids.
* @param expected The expected types to compare
* @param actual The actual types to compare
*/
private void assertEquals(Type expected, Type actual) {
if (actual.isPrimitiveType()) {
Assert.assertEquals(expected, actual);
} else {
List<Types.NestedField> expectedFields = ((Type.NestedType) expected).fields();
List<Types.NestedField> actualFields = ((Type.NestedType) actual).fields();
for (int i = 0; i < expectedFields.size(); ++i) {
assertEquals(expectedFields.get(i).type(), actualFields.get(i).type());
Assert.assertEquals(expectedFields.get(i).name(), actualFields.get(i).name());
}
}
}
}