blob: 93f9f3a731e0abd1f60964356afafc132aa4ea10 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.hive;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
public final class HiveSchemaUtil {
private HiveSchemaUtil() {
}
/**
* Converts the Iceberg schema to a Hive schema (list of FieldSchema objects).
* @param schema The original Iceberg schema to convert
* @return The Hive column list generated from the Iceberg schema
*/
public static List<FieldSchema> convert(Schema schema) {
return schema.columns().stream()
.map(col -> new FieldSchema(col.name(), convertToTypeString(col.type()), col.doc()))
.collect(Collectors.toList());
}
/**
* Converts a Hive schema (list of FieldSchema objects) to an Iceberg schema. If some of the types are not convertible
* then exception is thrown.
* @param fieldSchemas The list of the columns
* @return An equivalent Iceberg Schema
*/
public static Schema convert(List<FieldSchema> fieldSchemas) {
return convert(fieldSchemas, false);
}
/**
* Converts a Hive schema (list of FieldSchema objects) to an Iceberg schema.
* @param fieldSchemas The list of the columns
* @param autoConvert If <code>true</code> then TINYINT and SMALLINT is converted to INTEGER and VARCHAR and CHAR is
* converted to STRING. Otherwise if these types are used in the Hive schema then exception is
* thrown.
* @return An equivalent Iceberg Schema
*/
public static Schema convert(List<FieldSchema> fieldSchemas, boolean autoConvert) {
List<String> names = new ArrayList<>(fieldSchemas.size());
List<TypeInfo> typeInfos = new ArrayList<>(fieldSchemas.size());
List<String> comments = new ArrayList<>(fieldSchemas.size());
for (FieldSchema col : fieldSchemas) {
names.add(col.getName());
typeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(col.getType()));
comments.add(col.getComment());
}
return HiveSchemaConverter.convert(names, typeInfos, comments, autoConvert);
}
/**
* Converts the Hive partition columns to Iceberg identity partition specification.
* @param schema The Iceberg schema
* @param fieldSchemas The partition column specification
* @return The Iceberg partition specification
*/
public static PartitionSpec spec(Schema schema, List<FieldSchema> fieldSchemas) {
PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
fieldSchemas.forEach(fieldSchema -> builder.identity(fieldSchema.getName()));
return builder.build();
}
/**
* Converts the Hive list of column names and column types to an Iceberg schema. If some of the types are not
* convertible then exception is thrown.
* @param names The list of the Hive column names
* @param types The list of the Hive column types
* @param comments The list of the Hive column comments
* @return The Iceberg schema
*/
public static Schema convert(List<String> names, List<TypeInfo> types, List<String> comments) {
return HiveSchemaConverter.convert(names, types, comments, false);
}
/**
* Converts the Hive list of column names and column types to an Iceberg schema.
* @param names The list of the Hive column names
* @param types The list of the Hive column types
* @param comments The list of the Hive column comments, can be null
* @param autoConvert If <code>true</code> then TINYINT and SMALLINT is converted to INTEGER and VARCHAR and CHAR is
* converted to STRING. Otherwise if these types are used in the Hive schema then exception is
* thrown.
* @return The Iceberg schema
*/
public static Schema convert(List<String> names, List<TypeInfo> types, List<String> comments, boolean autoConvert) {
return HiveSchemaConverter.convert(names, types, comments, autoConvert);
}
/**
* Converts an Iceberg type to a Hive TypeInfo object.
* @param type The Iceberg type
* @return The Hive type
*/
public static TypeInfo convert(Type type) {
return TypeInfoUtils.getTypeInfoFromTypeString(convertToTypeString(type));
}
/**
* Converts a Hive typeInfo object to an Iceberg type.
* @param typeInfo The Hive type
* @return The Iceberg type
*/
public static Type convert(TypeInfo typeInfo) {
return HiveSchemaConverter.convert(typeInfo, false);
}
private static String convertToTypeString(Type type) {
switch (type.typeId()) {
case BOOLEAN:
return "boolean";
case INTEGER:
return "int";
case LONG:
return "bigint";
case FLOAT:
return "float";
case DOUBLE:
return "double";
case DATE:
return "date";
case TIME:
return "string";
case TIMESTAMP:
Types.TimestampType timestampType = (Types.TimestampType) type;
if (MetastoreUtil.hive3PresentOnClasspath() && timestampType.shouldAdjustToUTC()) {
return "timestamp with local time zone";
}
return "timestamp";
case STRING:
case UUID:
return "string";
case FIXED:
return "binary";
case BINARY:
return "binary";
case DECIMAL:
final Types.DecimalType decimalType = (Types.DecimalType) type;
return String.format("decimal(%s,%s)", decimalType.precision(), decimalType.scale());
case STRUCT:
final Types.StructType structType = type.asStructType();
final String nameToType = structType.fields().stream()
.map(f -> String.format("%s:%s", f.name(), convert(f.type())))
.collect(Collectors.joining(","));
return String.format("struct<%s>", nameToType);
case LIST:
final Types.ListType listType = type.asListType();
return String.format("array<%s>", convert(listType.elementType()));
case MAP:
final Types.MapType mapType = type.asMapType();
return String.format("map<%s,%s>", convert(mapType.keyType()), convert(mapType.valueType()));
default:
throw new UnsupportedOperationException(type + " is not supported");
}
}
}