blob: ca2c9e3cd4951cd707741cf68cc8e5c29d0f2aac [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.apache.pig.builtin.mock.Storage.bag;
import static org.apache.pig.builtin.mock.Storage.resetData;
import static org.apache.pig.builtin.mock.Storage.tuple;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileWriter;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.pig.PigServer;
import org.apache.pig.builtin.mock.Storage.Data;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.scripting.groovy.GroovyUtils;
import org.junit.Test;
public class TestUDFGroovy {
@Test
public void testPigToGroovy() throws Exception {
Object pigObject = Boolean.TRUE;
Object groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof Boolean);
assertEquals(true, groovyObject);
pigObject = Integer.valueOf(42);
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof Integer);
assertEquals(42, groovyObject);
pigObject = Long.valueOf(0x100000000L);
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof Long);
assertEquals(0x100000000L, groovyObject);
pigObject = Float.MIN_VALUE;
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof Float);
assertEquals(Float.MIN_VALUE, groovyObject);
pigObject = Double.MAX_VALUE;
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof Double);
assertEquals(Double.MAX_VALUE, groovyObject);
pigObject = "Dans le cochon tout est bon !";
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof String);
assertEquals("Dans le cochon tout est bon !", groovyObject);
pigObject = new DataByteArray("Surtout le jambon".getBytes("UTF-8"));
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof byte[]);
assertNotSame(groovyObject, pigObject);
assertArrayEquals("Surtout le jambon".getBytes("UTF-8"), (byte[]) groovyObject);
pigObject = new org.joda.time.DateTime();
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof org.joda.time.DateTime);
assertSame(groovyObject, pigObject);
pigObject = tuple("a","b","c");
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof groovy.lang.Tuple);
assertEquals(3, ((groovy.lang.Tuple) groovyObject).size());
assertEquals("a", ((groovy.lang.Tuple) groovyObject).get(0));
assertEquals("b", ((groovy.lang.Tuple) groovyObject).get(1));
assertEquals("c", ((groovy.lang.Tuple) groovyObject).get(2));
pigObject = bag(tuple("a"), tuple("b"));
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof groovy.lang.Tuple);
assertEquals(2, ((groovy.lang.Tuple) groovyObject).size());
assertEquals(2L, ((groovy.lang.Tuple) groovyObject).get(0));
assertTrue(((groovy.lang.Tuple) groovyObject).get(1) instanceof Iterator);
Iterator<groovy.lang.Tuple> iter = (Iterator) ((groovy.lang.Tuple) groovyObject).get(1);
groovy.lang.Tuple t = iter.next();
assertEquals(1, t.size());
assertEquals("a", t.get(0));
t = iter.next();
assertEquals(1, t.size());
assertEquals("b", t.get(0));
pigObject = new HashMap<String, String>();
((Map) pigObject).put("Pate", "Henaff");
((Map) pigObject).put("Rillettes", "Bordeau Chesnel");
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertTrue(groovyObject instanceof Map);
assertEquals(2, ((Map) groovyObject).size());
assertEquals("Henaff", ((Map) groovyObject).get("Pate"));
assertEquals("Bordeau Chesnel", ((Map) groovyObject).get("Rillettes"));
pigObject = BigInteger.ONE;
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertSame(pigObject, groovyObject);
pigObject = new BigDecimal("42.42");
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertSame(pigObject, groovyObject);
pigObject = null;
groovyObject = GroovyUtils.pigToGroovy(pigObject);
assertNull(groovyObject);
}
@Test
public void testGroovyToPig() throws Exception {
Object groovyObject = Boolean.TRUE;
Object pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Boolean);
assertEquals(true, pigObject);
groovyObject = Byte.MIN_VALUE;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Integer);
assertEquals((int) Byte.MIN_VALUE, pigObject);
groovyObject = Short.MIN_VALUE;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Integer);
assertEquals((int) Short.MIN_VALUE, pigObject);
groovyObject = Integer.MIN_VALUE;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Integer);
assertEquals(Integer.MIN_VALUE, pigObject);
groovyObject = Long.MIN_VALUE;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Long);
assertEquals(Long.MIN_VALUE, pigObject);
groovyObject = BigInteger.TEN;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertSame(groovyObject, pigObject);
groovyObject = Float.MIN_NORMAL;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Float);
assertEquals(Float.MIN_NORMAL, pigObject);
groovyObject = Double.MIN_VALUE;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Double);
assertEquals(Double.MIN_VALUE, pigObject);
groovyObject = new BigDecimal("42.42");
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertSame(groovyObject, pigObject);
groovyObject = "Dans le cochon tout est bon !";
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof String);
assertEquals("Dans le cochon tout est bon !", pigObject);
groovyObject = "Surtout le jambon".getBytes("UTF-8");
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof DataByteArray);
assertArrayEquals("Surtout le jambon".getBytes("UTF-8"), ((DataByteArray) pigObject).get());
groovyObject = new Object[2];
((Object[]) groovyObject)[0] = "Pate";
((Object[]) groovyObject)[1] = "Henaff";
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Tuple);
assertEquals(2, ((Tuple) pigObject).size());
assertEquals("Pate", ((Tuple) pigObject).get(0));
assertEquals("Henaff", ((Tuple) pigObject).get(1));
groovyObject = new Object[2];
((Object[]) groovyObject)[0] = "Rillettes";
((Object[]) groovyObject)[1] = "Bordeau Chesnel";
groovyObject = new groovy.lang.Tuple((Object[]) groovyObject);
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Tuple);
assertEquals(2, ((Tuple) pigObject).size());
assertEquals("Rillettes", ((Tuple) pigObject).get(0));
assertEquals("Bordeau Chesnel", ((Tuple) pigObject).get(1));
groovyObject = new ArrayList<Object>();
((List<Object>) groovyObject).add("Jaret");
((List<Object>) groovyObject).add("Filet Mignon");
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof DataBag);
assertEquals(2, ((DataBag) pigObject).size());
Iterator<Tuple> iter = ((DataBag) pigObject).iterator();
Set<String> values = new HashSet<String>();
while (iter.hasNext()) {
Tuple t = iter.next();
assertEquals(1, t.size());
values.add((String) t.get(0));
}
assertEquals(2, values.size());
assertTrue(values.contains("Jaret"));
assertTrue(values.contains("Filet Mignon"));
groovyObject = new HashMap<String, String>();
((Map) groovyObject).put("Henaff", "a bord");
((Map) groovyObject).put("Copains", "comme cochons");
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof Map);
assertEquals(2, ((Map) pigObject).size());
assertEquals("a bord", ((Map) pigObject).get("Henaff"));
assertEquals("comme cochons", ((Map) pigObject).get("Copains"));
groovyObject = TupleFactory.getInstance().newTuple(2);
((Tuple) groovyObject).set(0, "jambon");
((Tuple) groovyObject).set(1, "blanc");
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertSame(groovyObject, pigObject);
groovyObject = new DefaultDataBag();
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertSame(groovyObject, pigObject);
groovyObject = new org.joda.time.DateTime();
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertTrue(pigObject instanceof org.joda.time.DateTime);
assertSame(groovyObject, pigObject);
groovyObject = null;
pigObject = GroovyUtils.groovyToPig(groovyObject);
assertNull(pigObject);
}
@Test
public void testEvalFunc_Static() throws Exception {
String[] groovyStatements = {
"import org.apache.pig.builtin.OutputSchema;",
"class GroovyUDF {",
" @OutputSchema('x:long')",
" static long square(long x) {",
" return x*x;",
" }",
"}"
};
File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
tmpScriptFile.deleteOnExit();
FileWriter writer = new FileWriter(tmpScriptFile);
for (String line : groovyStatements) {
writer.write(line + "\n");
}
writer.close();
PigServer pigServer = new PigServer(Util.getLocalTestMode());
pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");
Data data = resetData(pigServer);
data.set("foo0",
tuple(1),
tuple(2),
tuple(3),
tuple(4)
);
pigServer.registerQuery("A = LOAD 'foo0' USING mock.Storage();");
pigServer.registerQuery("B = FOREACH A GENERATE groovyudfs.square($0);");
pigServer.registerQuery("STORE B INTO 'bar0' USING mock.Storage();");
List<Tuple> out = data.get("bar0");
assertEquals(tuple(1L), out.get(0));
assertEquals(tuple(4L), out.get(1));
assertEquals(tuple(9L), out.get(2));
assertEquals(tuple(16L), out.get(3));
}
@Test
public void testEvalFunc_NonStatic() throws Exception {
String[] groovyStatements = {
"import org.apache.pig.builtin.OutputSchema;",
"class GroovyUDF {",
" private final long multiplicator;",
" public GroovyUDF() {",
" this.multiplicator = 42L;",
" }",
" @OutputSchema('x:long')",
" long mul(long x) {",
" return x*this.multiplicator;",
" }",
"}"
};
File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
tmpScriptFile.deleteOnExit();
FileWriter writer = new FileWriter(tmpScriptFile);
for (String line : groovyStatements) {
writer.write(line + "\n");
}
writer.close();
PigServer pigServer = new PigServer(Util.getLocalTestMode());
pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");
Data data = resetData(pigServer);
data.set("foo1",
tuple(1)
);
pigServer.registerQuery("A = LOAD 'foo1' USING mock.Storage();");
pigServer.registerQuery("B = FOREACH A GENERATE groovyudfs.mul($0);");
pigServer.registerQuery("STORE B INTO 'bar1' USING mock.Storage();");
List<Tuple> out = data.get("bar1");
assertEquals(tuple(42L), out.get(0));
}
@Test
public void testAlgebraicEvalFunc() throws Exception {
String[] groovyStatements = {
"import org.apache.pig.scripting.groovy.AlgebraicInitial;",
"import org.apache.pig.scripting.groovy.AlgebraicIntermed;",
"import org.apache.pig.scripting.groovy.AlgebraicFinal;",
"class GroovyUDFs {",
" @AlgebraicFinal('sumalg')",
" public static long algFinal(Tuple t) {",
" long x = 0;",
" for (Object o: t[1]) {",
" x = x + o;",
" }",
" return x;",
" }",
" @AlgebraicInitial('sumalg')",
" public static Tuple algInitial(Tuple t) {",
" long x = 0;",
" for (Object o: t[1]) {",
" x = x + o[0];",
" }",
" return [x];",
" }",
" @AlgebraicIntermed('sumalg')",
" public static Tuple algIntermed(Tuple t) {",
" long x = 0;",
" for (Object o: t[1]) {",
" x = x + o;",
" }",
" return [x];",
" }",
"}"
};
File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
tmpScriptFile.deleteOnExit();
FileWriter writer = new FileWriter(tmpScriptFile);
for (String line : groovyStatements) {
writer.write(line + "\n");
}
writer.close();
PigServer pigServer = new PigServer(Util.getLocalTestMode());
pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");
Data data = resetData(pigServer);
data.set("foo2",
tuple(1),
tuple(2),
tuple(3),
tuple(4)
);
pigServer.registerQuery("A = LOAD 'foo2' USING mock.Storage();");
pigServer.registerQuery("B = GROUP A ALL;");
pigServer.registerQuery("C = FOREACH B GENERATE groovyudfs.sumalg(A);");
pigServer.registerQuery("STORE C INTO 'bar2' USING mock.Storage();");
List<Tuple> out = data.get("bar2");
assertEquals(tuple(10L), out.get(0));
}
@Test
public void testAccumulatorEvalFunc() throws Exception {
String[] groovyStatements = {
"import org.apache.pig.builtin.OutputSchema;",
"import org.apache.pig.scripting.groovy.AccumulatorAccumulate;",
"import org.apache.pig.scripting.groovy.AccumulatorGetValue;",
"import org.apache.pig.scripting.groovy.AccumulatorCleanup;",
"class GroovyUDFs {",
" private int sum = 0;",
" @AccumulatorAccumulate('sumacc')",
" public void accuAccumulate(Tuple t) {",
" for (Object o: t[1]) {",
" sum += o[0]",
" }",
" }",
" @AccumulatorGetValue('sumacc')",
" @OutputSchema('sum: long')",
" public long accuGetValue() {",
" return this.sum;",
" }",
" @AccumulatorCleanup('sumacc')",
" public void accuCleanup() {",
" this.sum = 0L;",
" }",
"}"
};
File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
tmpScriptFile.deleteOnExit();
FileWriter writer = new FileWriter(tmpScriptFile);
for (String line : groovyStatements) {
writer.write(line + "\n");
}
writer.close();
PigServer pigServer = new PigServer(Util.getLocalTestMode());
pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");
Data data = resetData(pigServer);
data.set("foo3",
tuple(1),
tuple(2),
tuple(3),
tuple(4)
);
pigServer.registerQuery("A = LOAD 'foo3' USING mock.Storage();");
pigServer.registerQuery("B = GROUP A ALL;");
pigServer.registerQuery("C = FOREACH B GENERATE groovyudfs.sumacc(A) AS sum1,groovyudfs.sumacc(A) AS sum2;");
pigServer.registerQuery("STORE C INTO 'bar3' USING mock.Storage();");
List<Tuple> out = data.get("bar3");
assertEquals(tuple(10L,10L), out.get(0));
}
@Test
public void testOutputSchemaFunction() throws Exception {
String[] groovyStatements = {
"import org.apache.pig.scripting.groovy.OutputSchemaFunction;",
"class GroovyUDFs {",
" @OutputSchemaFunction('squareSchema')",
" public static square(x) {",
" return x * x;",
" }",
" public static squareSchema(input) {",
" return input;",
" }",
"}"
};
File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
tmpScriptFile.deleteOnExit();
FileWriter writer = new FileWriter(tmpScriptFile);
for (String line : groovyStatements) {
writer.write(line + "\n");
}
writer.close();
PigServer pigServer = new PigServer(Util.getLocalTestMode());
pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");
Data data = resetData(pigServer);
data.set("foo4",
tuple(1,1L,1.0F,1.0D),
tuple(2,2L,2.0F,2.0D)
);
pigServer.registerQuery("A = LOAD 'foo4' USING mock.Storage() AS (i: int, l: long, f: float, d: double);");
pigServer.registerQuery("B = FOREACH A GENERATE groovyudfs.square(i),groovyudfs.square(l),groovyudfs.square(f),groovyudfs.square(d);");
pigServer.registerQuery("STORE B INTO 'bar4' USING mock.Storage();");
List<Tuple> out = data.get("bar4");
// Multiplying two floats leads to a double in Groovy, this is reflected here.
assertEquals(tuple(1,1L,1.0D,1.0D), out.get(0));
assertEquals(tuple(4,4L,4.0D,4.0D), out.get(1));
}
}