blob: acc89004ad4686ab933e3014a34039be0df4d7a1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.util.avro;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData;
import org.apache.avro.util.Utf8;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.joda.time.DateTime;
/**
* Utility classes for AvroStorage; contains static methods
* for converting between Avro and Pig objects.
*
*/
public class AvroStorageDataConversionUtilities {
/**
* Packs a Pig Tuple into an Avro record.
* @param t the Pig tuple to pack into the avro object
* @param s The avro schema for which to determine the type
* @return the avro record corresponding to the input tuple
* @throws IOException
*/
public static GenericData.Record packIntoAvro(final Tuple t, final Schema s)
throws IOException {
try {
GenericData.Record record = new GenericData.Record(s);
for (Field f : s.getFields()) {
Object o = t.get(f.pos());
Schema innerSchema = f.schema();
record.put(f.pos(), packIntoAvro(o, innerSchema));
}
return record;
} catch (Exception e) {
throw new IOException(
"exception in AvroStorageDataConversionUtilities.packIntoAvro", e);
}
}
/**
* Packs a Pig DataBag into an Avro array.
* @param db the Pig databad to pack into the avro array
* @param s The avro schema for which to determine the type
* @return the avro array corresponding to the input bag
* @throws IOException
*/
public static GenericData.Array<Object> packIntoAvro(
final DataBag db, final Schema s) throws IOException {
try {
GenericData.Array<Object> array
= new GenericData.Array<Object>(new Long(db.size()).intValue(), s);
for (Tuple t : db) {
if (s.getElementType() != null
&& s.getElementType().getType() == Type.RECORD) {
array.add(packIntoAvro(t, s.getElementType()));
} else if (t.size() == 1) {
array.add(t.get(0));
} else {
throw new IOException(
"AvroStorageDataConversionUtilities.packIntoAvro: Can't pack "
+ t + " into schema " + s);
}
}
return array;
} catch (Exception e) {
throw new IOException(
"exception in AvroStorageDataConversionUtilities.packIntoAvro", e);
}
}
private static Object packIntoAvro(final Object o, Schema s)
throws IOException {
if (AvroStorageSchemaConversionUtilities.isNullableUnion(s)) {
if (o == null) {
return null;
}
s = AvroStorageSchemaConversionUtilities.removeSimpleUnion(s);
}
// what if o == null and schema doesn't allow it ?
switch (s.getType()) {
case RECORD:
return packIntoAvro((Tuple) o, s);
case ARRAY:
return packIntoAvro((DataBag) o, s);
case MAP:
return packIntoAvro((Map<CharSequence, Object>) o, s);
case BYTES:
return ByteBuffer.wrap(((DataByteArray) o).get());
case FIXED:
return new GenericData.Fixed(s, ((DataByteArray) o).get());
case ENUM:
return new GenericData.EnumSymbol(s,o.toString());
default:
if (DataType.findType(o) == DataType.DATETIME) {
return ((DateTime) o).getMillis();
} else {
return o;
}
}
}
private static Map<Utf8, Object> packIntoAvro(Map<CharSequence, Object> input, Schema schema)
throws IOException {
final Map<Utf8, Object> output = new HashMap<Utf8, Object>();
for (Map.Entry<CharSequence, Object> e : input.entrySet()) {
final Utf8 k = utf8(e.getKey());
output.put(k, packIntoAvro(e.getValue(), schema.getValueType()));
}
return output;
}
private static Utf8 utf8(CharSequence v) {
if (v instanceof Utf8) {
return (Utf8) v;
} else {
final StringBuilder sb = new StringBuilder(v.length());
sb.append(v);
return new Utf8(sb.toString());
}
}
}