| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| |
| package org.apache.pig.impl.util.avro; |
| |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.apache.avro.Schema; |
| import org.apache.avro.Schema.Field; |
| import org.apache.avro.Schema.Type; |
| import org.apache.avro.generic.GenericData; |
| import org.apache.avro.util.Utf8; |
| import org.apache.pig.data.DataBag; |
| import org.apache.pig.data.DataByteArray; |
| import org.apache.pig.data.DataType; |
| import org.apache.pig.data.Tuple; |
| import org.joda.time.DateTime; |
| |
| /** |
| * Utility classes for AvroStorage; contains static methods |
| * for converting between Avro and Pig objects. |
| * |
| */ |
| public class AvroStorageDataConversionUtilities { |
| |
| /** |
| * Packs a Pig Tuple into an Avro record. |
| * @param t the Pig tuple to pack into the avro object |
| * @param s The avro schema for which to determine the type |
| * @return the avro record corresponding to the input tuple |
| * @throws IOException |
| */ |
| public static GenericData.Record packIntoAvro(final Tuple t, final Schema s) |
| throws IOException { |
| |
| try { |
| GenericData.Record record = new GenericData.Record(s); |
| for (Field f : s.getFields()) { |
| Object o = t.get(f.pos()); |
| Schema innerSchema = f.schema(); |
| record.put(f.pos(), packIntoAvro(o, innerSchema)); |
| } |
| return record; |
| } catch (Exception e) { |
| throw new IOException( |
| "exception in AvroStorageDataConversionUtilities.packIntoAvro", e); |
| } |
| } |
| |
| /** |
| * Packs a Pig DataBag into an Avro array. |
| * @param db the Pig databad to pack into the avro array |
| * @param s The avro schema for which to determine the type |
| * @return the avro array corresponding to the input bag |
| * @throws IOException |
| */ |
| public static GenericData.Array<Object> packIntoAvro( |
| final DataBag db, final Schema s) throws IOException { |
| |
| try { |
| GenericData.Array<Object> array |
| = new GenericData.Array<Object>(new Long(db.size()).intValue(), s); |
| for (Tuple t : db) { |
| if (s.getElementType() != null |
| && s.getElementType().getType() == Type.RECORD) { |
| array.add(packIntoAvro(t, s.getElementType())); |
| } else if (t.size() == 1) { |
| array.add(t.get(0)); |
| } else { |
| throw new IOException( |
| "AvroStorageDataConversionUtilities.packIntoAvro: Can't pack " |
| + t + " into schema " + s); |
| } |
| } |
| return array; |
| } catch (Exception e) { |
| throw new IOException( |
| "exception in AvroStorageDataConversionUtilities.packIntoAvro", e); |
| } |
| } |
| |
| private static Object packIntoAvro(final Object o, Schema s) |
| throws IOException { |
| if (AvroStorageSchemaConversionUtilities.isNullableUnion(s)) { |
| if (o == null) { |
| return null; |
| } |
| s = AvroStorageSchemaConversionUtilities.removeSimpleUnion(s); |
| } |
| // what if o == null and schema doesn't allow it ? |
| switch (s.getType()) { |
| case RECORD: |
| return packIntoAvro((Tuple) o, s); |
| case ARRAY: |
| return packIntoAvro((DataBag) o, s); |
| case MAP: |
| return packIntoAvro((Map<CharSequence, Object>) o, s); |
| case BYTES: |
| return ByteBuffer.wrap(((DataByteArray) o).get()); |
| case FIXED: |
| return new GenericData.Fixed(s, ((DataByteArray) o).get()); |
| case ENUM: |
| return new GenericData.EnumSymbol(s,o.toString()); |
| default: |
| if (DataType.findType(o) == DataType.DATETIME) { |
| return ((DateTime) o).getMillis(); |
| } else { |
| return o; |
| } |
| } |
| } |
| |
| private static Map<Utf8, Object> packIntoAvro(Map<CharSequence, Object> input, Schema schema) |
| throws IOException { |
| final Map<Utf8, Object> output = new HashMap<Utf8, Object>(); |
| for (Map.Entry<CharSequence, Object> e : input.entrySet()) { |
| final Utf8 k = utf8(e.getKey()); |
| output.put(k, packIntoAvro(e.getValue(), schema.getValueType())); |
| } |
| return output; |
| } |
| |
| private static Utf8 utf8(CharSequence v) { |
| if (v instanceof Utf8) { |
| return (Utf8) v; |
| } else { |
| final StringBuilder sb = new StringBuilder(v.length()); |
| sb.append(v); |
| return new Utf8(sb.toString()); |
| } |
| } |
| } |