| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hcatalog.utils; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| |
| import org.apache.hadoop.hive.ql.exec.UDFArgumentException; |
| import org.apache.hadoop.hive.ql.metadata.HiveException; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; |
| import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; |
| import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.StructField; |
| import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; |
| |
| /** |
| * A hive udf to check types of the fields read from hcat. A sample hive query which can use this is: |
| * |
| * create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive'; |
| * select typecheck('map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int', |
| * mymap, mytuple, bagofmap, rownum) from complex; |
| * |
| * |
| * The first argument to the UDF is a string representing the schema of the columns in the table. |
| * The columns in the tables are the remaining args to it. |
| * The schema specification consists of the types as given by "describe <table>" |
| * with each column's type separated from the next column's type by a '+' |
| * |
| * The UDF will throw an exception (and cause the query to fail) if it does not |
| * encounter the correct types. |
| * |
| * The output is a string representation of the data , type and hive category. |
| * It is not advisable to use this against large dataset since the output would also |
| * be large. |
| * |
| */ |
| public final class HCatTypeCheckHive extends GenericUDF { |
| |
| ObjectInspector[] argOIs; |
| @Override |
| public Object evaluate(DeferredObject[] args) throws HiveException { |
| List<Object> row = new ArrayList<Object>(); |
| String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList<Category>()); |
| String[] types = typesStr.split("\\+"); |
| for(int i = 0; i < types.length; i++) { |
| types[i] = types[i].toLowerCase(); |
| } |
| for(int i = 1; i < args.length; i++) { |
| ObjectInspector oi = argOIs[i]; |
| List<ObjectInspector.Category> categories = new ArrayList<ObjectInspector.Category>(); |
| Object o = getJavaObject(args[i].get(),oi, categories); |
| try { |
| if(o != null) { |
| Util.check(types[i-1], o); |
| } |
| } catch (IOException e) { |
| throw new HiveException(e); |
| } |
| row.add(o == null ? "null" : o); |
| row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories); |
| } |
| return row.toString(); |
| } |
| |
| private Object getJavaObject(Object o, ObjectInspector oi, List<Category> categories) { |
| if(categories != null) { |
| categories.add(oi.getCategory()); |
| } |
| if(oi.getCategory() == ObjectInspector.Category.LIST) { |
| List<?> l = ((ListObjectInspector)oi).getList(o); |
| List<Object> result = new ArrayList<Object>(); |
| ObjectInspector elemOI = ((ListObjectInspector)oi).getListElementObjectInspector(); |
| for(Object lo : l) { |
| result.add(getJavaObject(lo, elemOI, categories)); |
| } |
| return result; |
| } else if (oi.getCategory() == ObjectInspector.Category.MAP) { |
| Map<?,?> m = ((MapObjectInspector)oi).getMap(o); |
| Map<String, String> result = new HashMap<String, String>(); |
| ObjectInspector koi = ((MapObjectInspector)oi).getMapKeyObjectInspector(); |
| ObjectInspector voi = ((MapObjectInspector)oi).getMapValueObjectInspector(); |
| for(Entry<?,?> e: m.entrySet()) { |
| result.put((String)getJavaObject(e.getKey(), koi, null), |
| (String)getJavaObject(e.getValue(), voi, null)); |
| } |
| return result; |
| |
| } else if (oi.getCategory() == ObjectInspector.Category.STRUCT) { |
| List<Object> s = ((StructObjectInspector)oi).getStructFieldsDataAsList(o); |
| List<? extends StructField> sf = ((StructObjectInspector)oi).getAllStructFieldRefs(); |
| List<Object> result = new ArrayList<Object>(); |
| for(int i = 0; i < s.size(); i++) { |
| result.add(getJavaObject(s.get(i), sf.get(i).getFieldObjectInspector(), categories)); |
| } |
| return result; |
| } else if(oi.getCategory() == ObjectInspector.Category.PRIMITIVE) { |
| return ((PrimitiveObjectInspector)oi).getPrimitiveJavaObject(o); |
| } |
| throw new RuntimeException("Unexpected error!"); |
| } |
| |
| @Override |
| public String getDisplayString(String[] arg0) { |
| return null; |
| } |
| |
| @Override |
| public ObjectInspector initialize(ObjectInspector[] argOIs) |
| throws UDFArgumentException { |
| this.argOIs = argOIs; |
| return ObjectInspectorFactory.getReflectionObjectInspector(String.class, |
| ObjectInspectorOptions.JAVA); |
| } |
| |
| } |