blob: 2466cace5b4cca42a77985d1d0489393730ce8d6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.hive;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.iceberg.Schema;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Package private class for converting Hive schema to Iceberg schema. Should be used only by the HiveSchemaUtil.
* Use {@link HiveSchemaUtil} for conversion purposes.
*/
class HiveSchemaConverter {
private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaConverter.class);
private int id;
private boolean autoConvert;
private HiveSchemaConverter(boolean autoConvert) {
this.autoConvert = autoConvert;
this.id = 0;
}
static Schema convert(List<String> names, List<TypeInfo> typeInfos, List<String> comments, boolean autoConvert) {
HiveSchemaConverter converter = new HiveSchemaConverter(autoConvert);
return new Schema(converter.convertInternal(names, typeInfos, comments));
}
static Type convert(TypeInfo typeInfo, boolean autoConvert) {
HiveSchemaConverter converter = new HiveSchemaConverter(autoConvert);
return converter.convertType(typeInfo);
}
List<Types.NestedField> convertInternal(List<String> names, List<TypeInfo> typeInfos, List<String> comments) {
List<Types.NestedField> result = new ArrayList<>(names.size());
for (int i = 0; i < names.size(); ++i) {
result.add(Types.NestedField.optional(id++, names.get(i), convertType(typeInfos.get(i)),
comments.isEmpty() ? null : comments.get(i)));
}
return result;
}
Type convertType(TypeInfo typeInfo) {
switch (typeInfo.getCategory()) {
case PRIMITIVE:
switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
case FLOAT:
return Types.FloatType.get();
case DOUBLE:
return Types.DoubleType.get();
case BOOLEAN:
return Types.BooleanType.get();
case BYTE:
case SHORT:
Preconditions.checkArgument(autoConvert, "Unsupported Hive type: %s, use integer instead",
((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
LOG.debug("Using auto conversion from SHORT/BYTE to INTEGER");
return Types.IntegerType.get();
case INT:
return Types.IntegerType.get();
case LONG:
return Types.LongType.get();
case BINARY:
return Types.BinaryType.get();
case CHAR:
case VARCHAR:
Preconditions.checkArgument(autoConvert, "Unsupported Hive type: %s, use string instead",
((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
LOG.debug("Using auto conversion from CHAR/VARCHAR to STRING");
return Types.StringType.get();
case STRING:
return Types.StringType.get();
case TIMESTAMP:
return Types.TimestampType.withoutZone();
case DATE:
return Types.DateType.get();
case DECIMAL:
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
return Types.DecimalType.of(decimalTypeInfo.precision(), decimalTypeInfo.scale());
case INTERVAL_YEAR_MONTH:
case INTERVAL_DAY_TIME:
default:
// special case for Timestamp with Local TZ which is only available in Hive3
if ("TIMESTAMPLOCALTZ".equalsIgnoreCase(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().name())) {
return Types.TimestampType.withZone();
}
throw new IllegalArgumentException("Unsupported Hive type (" +
((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() +
") for Iceberg tables.");
}
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<Types.NestedField> fields =
convertInternal(structTypeInfo.getAllStructFieldNames(), structTypeInfo.getAllStructFieldTypeInfos(),
Collections.emptyList());
return Types.StructType.of(fields);
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
Type keyType = convertType(mapTypeInfo.getMapKeyTypeInfo());
Type valueType = convertType(mapTypeInfo.getMapValueTypeInfo());
int keyId = id++;
int valueId = id++;
return Types.MapType.ofOptional(keyId, valueId, keyType, valueType);
case LIST:
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
Type listType = convertType(listTypeInfo.getListElementTypeInfo());
return Types.ListType.ofOptional(id++, listType);
case UNION:
default:
throw new IllegalArgumentException("Unknown type " + typeInfo.getCategory());
}
}
}