blob: 38a5fa9a5a0dc5cf224cfbc0f974cadf11152ebf [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hcatalog.data.schema;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Schema;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.common.HCatContext;
import org.apache.hcatalog.common.HCatException;
import org.apache.hcatalog.data.schema.HCatFieldSchema.Type;
public class HCatSchemaUtils {
public static CollectionBuilder getStructSchemaBuilder() {
return new CollectionBuilder();
}
public static CollectionBuilder getListSchemaBuilder() {
return new CollectionBuilder();
}
public static MapBuilder getMapSchemaBuilder() {
return new MapBuilder();
}
public static abstract class HCatSchemaBuilder {
public abstract HCatSchema build() throws HCatException;
}
public static class CollectionBuilder extends HCatSchemaBuilder { // for STRUCTS(multiple-add-calls) and LISTS(single-add-call)
List<HCatFieldSchema> fieldSchemas = null;
CollectionBuilder() {
fieldSchemas = new ArrayList<HCatFieldSchema>();
}
public CollectionBuilder addField(FieldSchema fieldSchema) throws HCatException {
return this.addField(getHCatFieldSchema(fieldSchema));
}
public CollectionBuilder addField(HCatFieldSchema fieldColumnSchema) {
fieldSchemas.add(fieldColumnSchema);
return this;
}
@Override
public HCatSchema build() throws HCatException {
return new HCatSchema(fieldSchemas);
}
}
public static class MapBuilder extends HCatSchemaBuilder {
Type keyType = null;
HCatSchema valueSchema = null;
@Override
public HCatSchema build() throws HCatException {
List<HCatFieldSchema> fslist = new ArrayList<HCatFieldSchema>();
fslist.add(new HCatFieldSchema(null, Type.MAP, keyType, valueSchema, null));
return new HCatSchema(fslist);
}
public MapBuilder withValueSchema(HCatSchema valueSchema) {
this.valueSchema = valueSchema;
return this;
}
public MapBuilder withKeyType(Type keyType) {
this.keyType = keyType;
return this;
}
}
/**
* Convert a HCatFieldSchema to a FieldSchema
* @param fs FieldSchema to convert
* @return HCatFieldSchema representation of FieldSchema
* @throws HCatException
*/
public static HCatFieldSchema getHCatFieldSchema(FieldSchema fs) throws HCatException {
String fieldName = fs.getName();
TypeInfo baseTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType());
return getHCatFieldSchema(fieldName, baseTypeInfo);
}
private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo) throws HCatException {
Category typeCategory = fieldTypeInfo.getCategory();
HCatFieldSchema hCatFieldSchema;
if (Category.PRIMITIVE == typeCategory) {
hCatFieldSchema = new HCatFieldSchema(fieldName, getPrimitiveHType(fieldTypeInfo), null);
} else if (Category.STRUCT == typeCategory) {
HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo);
hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, null);
} else if (Category.LIST == typeCategory) {
HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo());
hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, null);
} else if (Category.MAP == typeCategory) {
HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo());
HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo());
hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.MAP, mapKeyType, subSchema, null);
} else {
throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null);
}
return hCatFieldSchema;
}
private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) {
switch (((PrimitiveTypeInfo) basePrimitiveTypeInfo).getPrimitiveCategory()) {
case BOOLEAN:
return HCatContext.getInstance().getConf().getBoolean(
HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER,
HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT) ?
Type.INT : Type.BOOLEAN;
case BYTE:
return HCatContext.getInstance().getConf().getBoolean(
HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT) ? Type.INT : Type.TINYINT;
case DOUBLE:
return Type.DOUBLE;
case FLOAT:
return Type.FLOAT;
case INT:
return Type.INT;
case LONG:
return Type.BIGINT;
case SHORT:
return HCatContext.getInstance().getConf().getBoolean(
HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT) ?
Type.INT : Type.SMALLINT;
case STRING:
return Type.STRING;
case BINARY:
return Type.BINARY;
default:
throw new TypeNotPresentException(((PrimitiveTypeInfo) basePrimitiveTypeInfo).getTypeName(), null);
}
}
public static HCatSchema getHCatSchema(Schema schema) throws HCatException {
return getHCatSchema(schema.getFieldSchemas());
}
public static HCatSchema getHCatSchema(List<? extends FieldSchema> fslist) throws HCatException {
CollectionBuilder builder = getStructSchemaBuilder();
for (FieldSchema fieldSchema : fslist) {
builder.addField(fieldSchema);
}
return builder.build();
}
private static HCatSchema constructHCatSchema(StructTypeInfo stypeInfo) throws HCatException {
CollectionBuilder builder = getStructSchemaBuilder();
for (String fieldName : ((StructTypeInfo) stypeInfo).getAllStructFieldNames()) {
builder.addField(getHCatFieldSchema(fieldName, ((StructTypeInfo) stypeInfo).getStructFieldTypeInfo(fieldName)));
}
return builder.build();
}
public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException {
Category typeCategory = typeInfo.getCategory();
HCatSchema hCatSchema;
if (Category.PRIMITIVE == typeCategory) {
hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, getPrimitiveHType(typeInfo), null)).build();
} else if (Category.STRUCT == typeCategory) {
HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo);
hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build();
} else if (Category.LIST == typeCategory) {
CollectionBuilder builder = getListSchemaBuilder();
builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo()));
hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), "")));
} else if (Category.MAP == typeCategory) {
HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo());
HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo());
MapBuilder builder = getMapSchemaBuilder();
hCatSchema = builder.withKeyType(mapKeyType).withValueSchema(subSchema).build();
} else {
throw new TypeNotPresentException(typeInfo.getTypeName(), null);
}
return hCatSchema;
}
public static HCatSchema getHCatSchemaFromTypeString(String typeString) throws HCatException {
return getHCatSchema(TypeInfoUtils.getTypeInfoFromTypeString(typeString));
}
public static HCatSchema getHCatSchema(String schemaString) throws HCatException {
if ((schemaString == null) || (schemaString.trim().isEmpty())) {
return new HCatSchema(new ArrayList<HCatFieldSchema>()); // empty HSchema construct
}
HCatSchema outerSchema = getHCatSchemaFromTypeString("struct<" + schemaString + ">");
return outerSchema.get(0).getStructSubSchema();
}
public static FieldSchema getFieldSchema(HCatFieldSchema hcatFieldSchema) {
return new FieldSchema(hcatFieldSchema.getName(), hcatFieldSchema.getTypeString(), hcatFieldSchema.getComment());
}
public static List<FieldSchema> getFieldSchemas(List<HCatFieldSchema> hcatFieldSchemas) {
List<FieldSchema> lfs = new ArrayList<FieldSchema>();
for (HCatFieldSchema hfs : hcatFieldSchemas) {
lfs.add(getFieldSchema(hfs));
}
return lfs;
}
}