blob: a0b4a0ebba8cefa23f3ed39b4e4ed77ac0dd7cdf [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.crunch.types.orc;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.crunch.CrunchRuntimeException;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
public class OrcUtils {
/**
* Generate TypeInfo for a given java class based on reflection
*
* @param typeClass
* @return
*/
public static TypeInfo getTypeInfo(Class<?> typeClass) {
ObjectInspector oi = ObjectInspectorFactory
.getReflectionObjectInspector(typeClass, ObjectInspectorOptions.JAVA);
return TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
}
/**
* Create an object of OrcStruct given a type string and a list of objects
*
* @param typeInfo
* @param objs
* @return
*/
public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) {
SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct
.createObjectInspector(typeInfo);
List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs();
OrcStruct result = (OrcStruct) oi.create();
result.setNumFields(fields.size());
for (int i = 0; i < fields.size(); i++) {
oi.setStructFieldData(result, fields.get(i), objs[i]);
}
return result;
}
/**
* Create a binary serde for OrcStruct serialization/deserialization
*
* @param typeInfo
* @return
*/
public static BinarySortableSerDe createBinarySerde(TypeInfo typeInfo){
BinarySortableSerDe serde = new BinarySortableSerDe();
StringBuffer nameSb = new StringBuffer();
StringBuffer typeSb = new StringBuffer();
StructTypeInfo sti = (StructTypeInfo) typeInfo;
for (String name : sti.getAllStructFieldNames()) {
nameSb.append(name);
nameSb.append(',');
}
for (TypeInfo info : sti.getAllStructFieldTypeInfos()) {
typeSb.append(info.toString());
typeSb.append(',');
}
Properties tbl = new Properties();
String names = nameSb.length() > 0 ? nameSb.substring(0,
nameSb.length() - 1) : "";
String types = typeSb.length() > 0 ? typeSb.substring(0,
typeSb.length() - 1) : "";
tbl.setProperty(serdeConstants.LIST_COLUMNS, names);
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, types);
try {
serde.initialize(null, tbl);
} catch (SerDeException e) {
throw new CrunchRuntimeException("Unable to initialize binary serde");
}
return serde;
}
/**
* Convert an object from / to OrcStruct
*
* @param from
* @param fromOi
* @param toOi
* @return
*/
public static Object convert(Object from, ObjectInspector fromOi, ObjectInspector toOi) {
if (from == null) {
return null;
}
Object to;
switch (fromOi.getCategory()) {
case PRIMITIVE:
PrimitiveObjectInspector fromPoi = (PrimitiveObjectInspector) fromOi;
switch (fromPoi.getPrimitiveCategory()) {
case FLOAT:
SettableFloatObjectInspector floatOi = (SettableFloatObjectInspector) toOi;
return floatOi.create((Float) fromPoi.getPrimitiveJavaObject(from));
case DOUBLE:
SettableDoubleObjectInspector doubleOi = (SettableDoubleObjectInspector) toOi;
return doubleOi.create((Double) fromPoi.getPrimitiveJavaObject(from));
case BOOLEAN:
SettableBooleanObjectInspector boolOi = (SettableBooleanObjectInspector) toOi;
return boolOi.create((Boolean) fromPoi.getPrimitiveJavaObject(from));
case INT:
SettableIntObjectInspector intOi = (SettableIntObjectInspector) toOi;
return intOi.create((Integer) fromPoi.getPrimitiveJavaObject(from));
case LONG:
SettableLongObjectInspector longOi = (SettableLongObjectInspector) toOi;
return longOi.create((Long) fromPoi.getPrimitiveJavaObject(from));
case STRING:
SettableStringObjectInspector strOi = (SettableStringObjectInspector) toOi;
return strOi.create((String) fromPoi.getPrimitiveJavaObject(from));
case BYTE:
SettableByteObjectInspector byteOi = (SettableByteObjectInspector) toOi;
return byteOi.create((Byte) fromPoi.getPrimitiveJavaObject(from));
case SHORT:
SettableShortObjectInspector shortOi = (SettableShortObjectInspector) toOi;
return shortOi.create((Short) fromPoi.getPrimitiveJavaObject(from));
case BINARY:
SettableBinaryObjectInspector binOi = (SettableBinaryObjectInspector) toOi;
return binOi.create((byte[]) fromPoi.getPrimitiveJavaObject(from));
case TIMESTAMP:
SettableTimestampObjectInspector timeOi = (SettableTimestampObjectInspector) toOi;
return timeOi.create((Timestamp) fromPoi.getPrimitiveJavaObject(from));
case DATE:
SettableDateObjectInspector dateOi = (SettableDateObjectInspector) toOi;
return dateOi.create((Date) fromPoi.getPrimitiveJavaObject(from));
case DECIMAL:
SettableHiveDecimalObjectInspector decimalOi = (SettableHiveDecimalObjectInspector) toOi;
return decimalOi.create((HiveDecimal) fromPoi.getPrimitiveJavaObject(from));
case CHAR:
SettableHiveCharObjectInspector charOi = (SettableHiveCharObjectInspector) toOi;
return charOi.create((HiveChar) fromPoi.getPrimitiveJavaObject(from));
case VARCHAR:
SettableHiveVarcharObjectInspector varcharOi = (SettableHiveVarcharObjectInspector) toOi;
return varcharOi.create((HiveVarchar) fromPoi.getPrimitiveJavaObject(from));
case VOID:
throw new IllegalArgumentException("Void type is not supported yet");
default:
throw new IllegalArgumentException("Unknown primitive type "
+ (fromPoi).getPrimitiveCategory());
}
case STRUCT:
StructObjectInspector fromSoi = (StructObjectInspector) fromOi;
List<StructField> fromFields = (List<StructField>) fromSoi.getAllStructFieldRefs();
List<Object> fromItems = fromSoi.getStructFieldsDataAsList(from);
// this is a tuple. use TupleObjectInspector to construct the result
if (toOi instanceof TupleObjectInspector) {
TupleObjectInspector toToi = (TupleObjectInspector) toOi;
List<StructField> toFields = (List<StructField>) toToi.getAllStructFieldRefs();
Object[] values = new Object[fromItems.size()];
for (int i = 0; i < fromItems.size(); i++) {
values[i] = convert(fromItems.get(i),
fromFields.get(i).getFieldObjectInspector(),
toFields.get(i).getFieldObjectInspector());
}
return toToi.create(values);
}
SettableStructObjectInspector toSoi = (SettableStructObjectInspector) toOi;
List<StructField> toFields = (List<StructField>) toSoi.getAllStructFieldRefs();
to = toSoi.create();
for (int i = 0; i < fromItems.size(); i++) {
Object converted = convert(fromItems.get(i),
fromFields.get(i).getFieldObjectInspector(),
toFields.get(i).getFieldObjectInspector());
toSoi.setStructFieldData(to, toFields.get(i), converted);
}
return to;
case MAP:
MapObjectInspector fromMoi = (MapObjectInspector) fromOi;
SettableMapObjectInspector toMoi = (SettableMapObjectInspector) toOi;
to = toMoi.create(); // do not reuse
for (Map.Entry<?, ?> entry : fromMoi.getMap(from).entrySet()) {
Object convertedKey = convert(entry.getKey(),
fromMoi.getMapKeyObjectInspector(),
toMoi.getMapKeyObjectInspector());
Object convertedValue = convert(entry.getValue(),
fromMoi.getMapValueObjectInspector(),
toMoi.getMapValueObjectInspector());
toMoi.put(to, convertedKey, convertedValue);
}
return to;
case LIST:
ListObjectInspector fromLoi = (ListObjectInspector) fromOi;
List<?> fromList = fromLoi.getList(from);
SettableListObjectInspector toLoi = (SettableListObjectInspector) toOi;
to = toLoi.create(fromList.size()); // do not reuse
for (int i = 0; i < fromList.size(); i++) {
Object converted = convert(fromList.get(i),
fromLoi.getListElementObjectInspector(),
toLoi.getListElementObjectInspector());
toLoi.set(to, i, converted);
}
return to;
case UNION:
throw new IllegalArgumentException("Union type is not supported yet");
default:
throw new IllegalArgumentException("Unknown type " + fromOi.getCategory());
}
}
}