blob: 7927c8dd4407ae34845f9b2559f9548ef245fa58 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.pig.scripting.groovy;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.pig.backend.executionengine.ExecException;
public class GroovyUtils {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
private static final BagFactory bagFactory = BagFactory.getInstance();
* Converts an object created on the Groovy side to its Pig counterpart.
* The conversions are as follow:
* Groovy Pig
* Object[] Tuple
* groovy.lang.Tuple Tuple
* Tuple
* DataBag
* java.util.Map Map
* java.util.List DataBag
* Byte/Short/Integer int
* Long long
* Float float
* Double double
* BigInteger BigInteger
* BigDecimal BigDecimal
* String chararray
* byte[] DataByteArray (copy)
* Boolean boolean
* org.joda.time.DateTime org.joda.time.DateTime
* null null
* anything else raises an exception
* @param groovyObject
* Groovy object to convert
* @return the Pig counterpart of groovyObject
* @throws ExecException
public static Object groovyToPig(Object groovyObject) throws ExecException {
Object pigObject = null;
if (groovyObject instanceof Object[] || groovyObject instanceof groovy.lang.Tuple) {
// Allocate a List<Object> that will be filled with converted
// objects and later passed to newTuple.
List<Object> pigObjects = new ArrayList<Object>();
// Convert each member of groovyObject
if (groovyObject instanceof Object[]) {
for (Object o : (Object[]) groovyObject) {
} else {
for (Object o : (Iterable) groovyObject) {
// Create the result Tuple
pigObject = tupleFactory.newTuple(pigObjects);
} else if (groovyObject instanceof Tuple || groovyObject instanceof DataBag) {
// Copy Pig Tuple/DataBag as is
// This enables the creation of instances of DataBag which do not fit in
// memory
// It is advised to wrap objects into a call to groovyToPig
// prior to adding them to a Tuple or DataBag
pigObject = groovyObject;
} else if (groovyObject instanceof Map) {
// Allocate a Map
Map<String, Object> pigMap = new HashMap<String, Object>();
// Iterate over Groovy Map, putting each entry into pigMap
for (Map.Entry<?, ?> entry : ((Map<?, ?>) groovyObject).entrySet()) {
pigMap.put(groovyToPig(entry.getKey()).toString(), groovyToPig(entry.getValue()));
pigObject = pigMap;
} else if (groovyObject instanceof List) {
// Allocate a DataBag
DataBag bag = bagFactory.newDefaultBag();
// Pig's bags can only contain tuples, so we cast the return value
// of groovyToPig to a Tuple, if it's not a tuple, a ClassCastException
// will
// be thrown.
for (Object o : (List) groovyObject) {
Object p = groovyToPig(o);
if (p instanceof Tuple || null == p) {
bag.add((Tuple) p);
} else {
// Wrap value in a Tuple if it's not already a tuple
pigObject = bag;
} else if (groovyObject instanceof Integer || groovyObject instanceof Long || groovyObject instanceof Float
|| groovyObject instanceof Double) {
// Numeric types which have an equivalent in Pig are passed as is as they
// are immutable
pigObject = groovyObject;
} else if (groovyObject instanceof Byte || groovyObject instanceof Short) {
pigObject = ((Number) groovyObject).intValue();
} else if (groovyObject instanceof BigInteger) {
pigObject = groovyObject;
} else if (groovyObject instanceof BigDecimal) {
pigObject = groovyObject;
} else if (groovyObject instanceof byte[]) {
// Clone the byte array
byte[] b = new byte[((byte[]) groovyObject).length];
System.arraycopy((byte[]) groovyObject, 0, b, 0, b.length);
pigObject = new DataByteArray(b);
} else if (groovyObject instanceof String) {
// String is immutable, so pass it as is
pigObject = groovyObject;
} else if (groovyObject instanceof Boolean) {
pigObject = groovyObject;
} else if (groovyObject instanceof org.joda.time.DateTime) {
// jodatime's DateTime is immutable, so reuse the same instance
pigObject = groovyObject;
} else if (null == groovyObject) {
pigObject = null;
} else {
throw new ExecException("Unable to cast " + groovyObject.getClass().getName() + " to a Pig datatype.");
return pigObject;
* Converts an object created on the Pig side to its Groovy counterpart.
* The conversions are as follow:
* Pig Groovy
* Tuple groovy.lang.tuple
* DataBag groovy.lang.Tuple containing the bag's size and an iterator on its
* content
* Map java.util.Map
* int/long/float/double as is
* chararray String
* bytearray byte[] (copy)
* boolean boolean
* BigInteger BigInteger
* BigDecimal BigDecimal
* org.joda.time.DateTime org.joda.time.DateTime
* null null
* anything else raises an exception
* @param pigObject
* @return Object
* @throws ExecException
public static Object pigToGroovy(Object pigObject) throws ExecException {
Object groovyObject = null;
if (pigObject instanceof Tuple) {
Object[] a = new Object[((Tuple) pigObject).size()];
int i = 0;
for (Object oo : ((Tuple) pigObject).getAll()) {
a[i++] = pigToGroovy(oo);
groovyObject = new groovy.lang.Tuple(a);
} else if (pigObject instanceof DataBag) {
// Return a Groovy Tuple containing the bag's size and an
// iterator on its content (Iterator will return instances of
// groovy.lang.Tuple)
Object[] size_iterator = new Object[2];
size_iterator[0] = ((DataBag) pigObject).size();
size_iterator[1] = new DataBagGroovyIterator(((DataBag) pigObject).iterator());
groovyObject = new groovy.lang.Tuple(size_iterator);
} else if (pigObject instanceof Map) {
Map<String, Object> m = new HashMap<String, Object>();
for (Map.Entry<String, ?> entry : ((Map<String, ?>) pigObject).entrySet()) {
m.put((String) pigToGroovy(entry.getKey()), pigToGroovy(entry.getValue()));
groovyObject = m;
} else if (pigObject instanceof Number || pigObject instanceof String || pigObject instanceof Boolean) {
groovyObject = pigObject;
} else if (pigObject instanceof DataByteArray) {
// Allocate a new byte array so we don't use the original array
byte[] b = new byte[((DataByteArray) pigObject).size()];
System.arraycopy(((DataByteArray) pigObject).get(), 0, b, 0, b.length);
groovyObject = b;
} else if (pigObject instanceof BigInteger) {
groovyObject = pigObject;
} else if (pigObject instanceof BigDecimal) {
groovyObject = pigObject;
}else if (pigObject instanceof org.joda.time.DateTime) {
// jodatime's DateTime is immutable, so reuse the same instance
groovyObject = pigObject;
} else if (null == pigObject) {
groovyObject = null;
} else {
throw new ExecException("Unable to cast pig datatype " + pigObject.getClass().getName() + " to a suitable Groovy Object.");
return groovyObject;
public static class DataBagGroovyIterator implements Iterator<groovy.lang.Tuple> {
private final Iterator<Tuple> iter;
public DataBagGroovyIterator(Iterator<Tuple> iter) {
this.iter = iter;
public boolean hasNext() {
return iter.hasNext();
public groovy.lang.Tuple next() {
try {
return (groovy.lang.Tuple) pigToGroovy(;
} catch (ExecException ee) {
throw new RuntimeException(ee);
public void remove() {