blob: 4568c82e4647e39decaa539300d3641d963da156 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.InternalMap;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.junit.Test;
/**
* This class will exercise the basic Pig data model and members. It tests for proper behavior in
* assigment and comparision, as well as function application.
*/
public class TestDataModel {
@Test
public void testDatum() throws Exception {
// Datum is an empty class, but in the event we
// ever add a method, test it here
}
@Test
public void testTuple() throws Exception {
TupleFactory tf = TupleFactory.getInstance();
int arity = 5;
int[] input1 = { 1, 2, 3, 4, 5 };
String[] input2 = { "1", "2", "3", "4", "5" };
String[] input3 = { "1", "2", "3", "4", "5", "6" };
// validate construction and equality
Tuple f1 = Util.loadFlatTuple(tf.newTuple(arity), input1);
Tuple f2 = Util.loadFlatTuple(tf.newTuple(arity), input1);
Tuple f3 = tf.newTuple(arity);
assertEquals(arity, f1.size());
assertEquals(f1, f2);
// invalid equality
f2 = Util.loadTuple(tf.newTuple(input3.length), input3);
assertFalse(f1.equals(f2));
// copy equality
/*
* f2.copyFrom(f1);
* assertTrue(f1.equals(f2));
*/
// append function and equality
int[] input4 = { 1, 2, 3 };
int[] input5 = { 4, 5 };
/*
* f1 = Util.loadFlatTuple(tf.newTuple(input4.length), input4);
* f2 = Util.loadFlatTuple(tf.newTuple(input5.length), input5);
* f3 = Util.loadFlatTuple(tf.newTuple(input1.length), input1);
* f1.appendTuple(f2);
* assertTrue(f3.equals(f1));
*/
// arity then value comparision behavior
f1 = Util.loadFlatTuple(tf.newTuple(input1.length), input1); // 1,2,3,4,5
f2 = Util.loadFlatTuple(tf.newTuple(input4.length), input4); // 1,2,3
assertTrue(f1.compareTo(f2) > 0);
assertFalse(f1.compareTo(f2) < 0);
int[] input6 = { 1, 2, 3, 4, 6 };
f2 = Util.loadFlatTuple(tf.newTuple(input6.length), input6);
assertTrue(f1.compareTo(f2) < 0);
assertFalse(f1.compareTo(f2) > 0);
// delimited export
String expected = "1:2:3:4:5";
f1 = Util.loadFlatTuple(tf.newTuple(input1.length), input1);
assertTrue(expected.equals(f1.toDelimitedString(":")));
// value read / write & marshalling
PipedOutputStream pos = new PipedOutputStream();
DataOutputStream dos = new DataOutputStream(pos);
PipedInputStream pis = new PipedInputStream(pos);
DataInputStream dis = new DataInputStream(pis);
f1.write(dos);
f1.write(dos);
f2.readFields(dis);
assertTrue(f1.equals(f2));
f2.readFields(dis);
assertTrue(f1.equals(f2));
}
@Test
public void testNestTuple() throws Exception {
TupleFactory tf = TupleFactory.getInstance();
int[][] input1 = { { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 },
{ 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 } };
int[][] input2 = { { 1, 2 }, { 1, 2 } };
Tuple n1 = Util.loadNestTuple(tf.newTuple(input1.length), input1);
Tuple n2 = tf.newTuple();
n2 = Util.loadNestTuple(tf.newTuple(input2.length), input2);
}
@Test
public void testReadWrite() throws Exception {
// Create a tuple with every data type in it, and then read and
// write it, both via DataReaderWriter and Tuple.readFields
TupleFactory tf = TupleFactory.getInstance();
Tuple t1 = giveMeOneOfEach();
File file = File.createTempFile("Tuple", "put");
FileOutputStream fos = new FileOutputStream(file);
DataOutput out = new DataOutputStream(fos);
t1.write(out);
t1.write(out); // twice in a row on purpose
fos.close();
FileInputStream fis = new FileInputStream(file);
DataInput in = new DataInputStream(fis);
for (int i = 0; i < 2; i++) {
Tuple after = tf.newTuple();
after.readFields(in);
Object o = after.get(0);
assertTrue("isa Tuple", o instanceof Tuple);
Tuple t3 = (Tuple)o;
o = t3.get(0);
assertTrue("isa Integer", o instanceof Integer);
assertEquals(new Integer(3), (Integer)o);
o = t3.get(1);
assertTrue("isa Float", o instanceof Float);
assertEquals(new Float(3.0), (Float)o);
o = after.get(1);
assertTrue("isa Bag", o instanceof DataBag);
DataBag b = (DataBag)o;
Iterator<Tuple> j = b.iterator();
Tuple[] ts = new Tuple[2];
assertTrue("first tuple in bag", j.hasNext());
ts[0] = j.next();
assertTrue("second tuple in bag", j.hasNext());
ts[1] = j.next();
o = ts[0].get(0);
assertTrue("isa Integer", o instanceof Integer);
assertEquals(new Integer(4), (Integer)o);
o = ts[1].get(0);
assertTrue("isa String", o instanceof String);
assertEquals("mary had a little lamb", (String)o);
o = after.get(2);
assertTrue("isa Map", o instanceof Map);
Map<String, Object> m = (Map<String, Object>)o;
assertEquals("world", (String)m.get("hello"));
assertEquals("all", (String)m.get("goodbye"));
assertNull(m.get("fred"));
o = after.get(3);
assertTrue("isa Integer", o instanceof Integer);
Integer ii = (Integer)o;
assertEquals(new Integer(42), ii);
o = after.get(4);
assertTrue("isa Long", o instanceof Long);
Long l = (Long)o;
assertEquals(new Long(5000000000L), l);
o = after.get(5);
assertTrue("isa Float", o instanceof Float);
Float f = (Float)o;
assertEquals(new Float(3.141592654), f);
o = after.get(6);
assertTrue("isa Double", o instanceof Double);
Double d = (Double)o;
assertEquals(new Double(2.99792458e8), d);
o = after.get(7);
assertTrue("isa Boolean", o instanceof Boolean);
Boolean bool = (Boolean)o;
assertTrue(bool);
o = after.get(8);
assertTrue("isa DataByteArray", o instanceof DataByteArray);
DataByteArray ba = (DataByteArray)o;
assertEquals(new DataByteArray("hello"), ba);
o = after.get(9);
assertTrue("isa String", o instanceof String);
String s = (String)o;
assertEquals("goodbye", s);
}
file.delete();
}
@Test
public void testReadWriteInternal() throws Exception {
// Create a tuple with every internal data type in it, and then read and
// write it, both via DataReaderWriter and Tuple.readFields
TupleFactory tf = TupleFactory.getInstance();
Tuple t1 = tf.newTuple(1);
InternalMap map = new InternalMap(2);
map.put(new Integer(1), new String("world"));
map.put(new Long(3L), new String("all"));
t1.set(0, map);
File file = File.createTempFile("Tuple", "put");
FileOutputStream fos = new FileOutputStream(file);
DataOutput out = new DataOutputStream(fos);
t1.write(out);
fos.close();
FileInputStream fis = new FileInputStream(file);
DataInput in = new DataInputStream(fis);
Tuple after = tf.newTuple();
after.readFields(in);
Object o = after.get(0);
assertTrue("isa InternalMap", o instanceof InternalMap);
InternalMap m = (InternalMap)o;
assertEquals("world", (String)m.get(new Integer(1)));
assertEquals("all", (String)m.get(new Long(3L)));
assertNull(m.get("fred"));
file.delete();
}
@Test
public void testTupleToString() throws Exception {
Tuple t = giveMeOneOfEach();
assertEquals(
"toString",
"((3,3.0),{(4),(mary had a little lamb)},[hello#world,goodbye#all],42,5000000000,3.1415927,2.99792458E8,true,hello,goodbye,)",
t.toString());
}
@Test
public void testTupleHashCode() throws Exception {
TupleFactory tf = TupleFactory.getInstance();
Tuple t1 = tf.newTuple(2);
t1.set(0, new DataByteArray("hello world"));
t1.set(1, new Integer(1));
Tuple t2 = tf.newTuple(2);
t2.set(0, new DataByteArray("hello world"));
t2.set(1, new Integer(1));
assertEquals("same data", t1.hashCode(), t2.hashCode());
Tuple t3 = tf.newTuple(3);
t3.set(0, new DataByteArray("hello world"));
t3.set(1, new Integer(1));
t3.set(2, new Long(4));
assertFalse("different size", t1.hashCode() == t3.hashCode());
Tuple t4 = tf.newTuple(2);
t4.set(0, new DataByteArray("hello world"));
t4.set(1, new Integer(2));
assertFalse("same size, different data", t1.hashCode() == t4.hashCode());
// Make sure we can take the hash code of all the types.
Tuple t5 = giveMeOneOfEach();
t5.hashCode();
}
@Test
public void testTupleEquals() throws Exception {
TupleFactory tf = TupleFactory.getInstance();
Tuple t1 = tf.newTuple();
Tuple t2 = tf.newTuple();
t1.append(new Integer(3));
t2.append(new Integer(3));
assertFalse("different object", t1.equals(new String()));
assertTrue("same data", t1.equals(t2));
t2 = tf.newTuple();
t2.append(new Integer(4));
assertFalse("different data", t1.equals(t2));
t2 = tf.newTuple();
t2.append(new Integer(3));
t2.append(new Integer(3));
assertFalse("different size", t1.equals(t2));
}
@Test
public void testTupleCompareTo() throws Exception {
TupleFactory tf = TupleFactory.getInstance();
Tuple t1 = tf.newTuple();
Tuple t2 = tf.newTuple();
t1.append(new Integer(3));
t2.append(new Integer(3));
assertEquals("same data equal", 0, t1.compareTo(t2));
t2 = tf.newTuple();
t2.append(new Integer(2));
assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));
t2 = tf.newTuple();
t2.append(new Integer(4));
assertTrue("less than tuple with greater value", 0 > t1.compareTo(t2));
t2 = tf.newTuple();
t2.append(new Integer(3));
t2.append(new Integer(4));
assertTrue("less than bigger tuple", 0 > t1.compareTo(t2));
t2 = tf.newTuple();
assertTrue("greater than smaller tuple", 0 < t1.compareTo(t2));
}
@Test
public void testMultiFieldTupleCompareTo() throws Exception {
TupleFactory tf = TupleFactory.getInstance();
Tuple t1 = tf.newTuple();
Tuple t2 = tf.newTuple();
t1.append(new DataByteArray("bbb"));
t1.append(new DataByteArray("bbb"));
t2.append(new DataByteArray("bbb"));
t2.append(new DataByteArray("bbb"));
assertEquals("same data equal", 0, t1.compareTo(t2));
t2 = tf.newTuple();
t2.append(new DataByteArray("aaa"));
t2.append(new DataByteArray("aaa"));
assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));
t2 = tf.newTuple();
t2.append(new DataByteArray("ddd"));
t2.append(new DataByteArray("ddd"));
assertTrue("less than tuple with greater value", 0 > t1.compareTo(t2));
// First column same, second lesser
t2 = tf.newTuple();
t2.append(new DataByteArray("bbb"));
t2.append(new DataByteArray("aaa"));
assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));
// First column same, second greater
t2 = tf.newTuple();
t2.append(new DataByteArray("bbb"));
t2.append(new DataByteArray("ccc"));
assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));
// First column less, second same
t2 = tf.newTuple();
t2.append(new DataByteArray("aaa"));
t2.append(new DataByteArray("bbb"));
assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));
// First column greater, second same
t2 = tf.newTuple();
t2.append(new DataByteArray("ccc"));
t2.append(new DataByteArray("bbb"));
assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));
// First column less, second greater
t2 = tf.newTuple();
t2.append(new DataByteArray("aaa"));
t2.append(new DataByteArray("ccc"));
assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));
// First column greater, second same
t2 = tf.newTuple();
t2.append(new DataByteArray("ccc"));
t2.append(new DataByteArray("aaa"));
assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));
}
@Test
public void testByteArrayToString() throws Exception {
DataByteArray ba = new DataByteArray("hello world");
assertEquals("toString", "hello world", ba.toString());
}
@Test
public void testByteArrayHashCode() throws Exception {
DataByteArray ba1 = new DataByteArray("hello world");
DataByteArray ba2 = new DataByteArray("hello world");
DataByteArray ba3 = new DataByteArray("goodbye world");
assertEquals("same data", ba1.hashCode(), ba2.hashCode());
assertFalse("different data", ba1.hashCode() == ba3.hashCode());
}
@Test
public void testByteArrayEquals() throws Exception {
DataByteArray ba1 = new DataByteArray("hello world");
DataByteArray ba2 = new DataByteArray("hello world");
DataByteArray ba3 = new DataByteArray("goodbye world");
assertTrue("same data", ba1.equals(ba2));
assertFalse("different data", ba1.equals(ba3));
}
@Test
public void testByteArrayCompareTo() throws Exception {
DataByteArray ba1 = new DataByteArray("hello world");
DataByteArray ba2 = new DataByteArray("hello world");
DataByteArray ba3 = new DataByteArray("goodbye world");
assertTrue("same data", ba1.compareTo(ba2) == 0);
assertTrue("different length lexically lower value less than",
ba3.compareTo(ba1) < 0);
assertTrue("different length lexically higher value greater than",
ba1.compareTo(ba3) > 0);
ba2 = new DataByteArray("hello worlc");
assertTrue("same length lexically lower value less than",
ba2.compareTo(ba1) < 0);
assertTrue("same length lexically higher value greater than",
ba1.compareTo(ba2) > 0);
ba2 = new DataByteArray("hello worlds");
assertTrue("shorter lexically same value less than",
ba1.compareTo(ba2) < 0);
assertTrue("longer lexically same value greater than",
ba2.compareTo(ba1) > 0);
}
@Test(expected = ExecException.class)
public void testIntegerConversionErr() throws Exception {
List list = new ArrayList();
try {
DataType.toInteger(list);
} catch (ExecException ee) {
assertEquals(1071, ee.getErrorCode());
throw ee;
}
}
@Test(expected = ExecException.class)
public void testIntegerConversionErr1() throws Exception {
DataByteArray ba = new DataByteArray("hello world");
try {
DataType.toInteger(ba);
} catch (ExecException ee) {
assertEquals(1074, ee.getErrorCode());
throw ee;
}
}
@Test(expected = ExecException.class)
public void testTupleConversionErr() throws Exception {
List list = new ArrayList();
try {
DataType.toTuple(list);
} catch (ExecException ee) {
assertEquals(1071, ee.getErrorCode());
throw ee;
}
}
@Test(expected = ExecException.class)
public void testTupleConversionErr1() throws Exception {
DataByteArray ba = new DataByteArray("hello world");
try {
DataType.toTuple(ba);
} catch (ExecException ee) {
assertEquals(1071, ee.getErrorCode());
throw ee;
}
}
@Test
public void testByteArrayAppend() throws Exception {
DataByteArray expected = new DataByteArray("hello world");
DataByteArray db1 = new DataByteArray("hello ");
DataByteArray db2 = new DataByteArray("world");
db1.append(db2);
assertEquals("appends as expected", db1, expected);
}
@Test
public void testByteArrayAppendMore() throws Exception {
DataByteArray expected = new DataByteArray("hello world!");
DataByteArray db1 = new DataByteArray("hello ");
DataByteArray db2 = new DataByteArray("world");
DataByteArray db3 = new DataByteArray("!");
db1.append(db2).append(db3);
assertEquals("appends as expected", db1, expected);
}
@Test
public void testByteArrayAppendBytes() throws Exception {
DataByteArray expected = new DataByteArray("hello world");
DataByteArray db1 = new DataByteArray("hello ");
byte[] db2 = "world".getBytes();
db1.append(db2);
assertEquals("appends as expected", db1, expected);
}
@Test
public void testByteArrayAppendString() throws Exception {
DataByteArray expected = new DataByteArray("hello world");
DataByteArray db1 = new DataByteArray("hello ");
db1.append("world");
assertEquals("appends as expected", db1, expected);
}
@Test(expected = ExecException.class)
public void testMapConversionErr() throws Exception {
List list = new ArrayList();
try {
DataType.toMap(list);
} catch (ExecException ee) {
assertEquals(1071, ee.getErrorCode());
throw ee;
}
}
@Test
public void testMapConversion() throws Exception {
Map<Integer, Float> map = new HashMap<Integer, Float>();
DataType.toMap(map);
}
@Test(expected = ExecException.class)
public void testDetermineFieldSchemaErr() throws Exception {
List list = new ArrayList();
try {
DataType.determineFieldSchema(list);
fail("Error expected.");
} catch (ExecException ee) {
assertEquals(1073, ee.getErrorCode());
throw ee;
}
}
private Tuple giveMeOneOfEach() throws Exception {
TupleFactory tf = TupleFactory.getInstance();
Tuple t1 = tf.newTuple(11);
Tuple t2 = tf.newTuple(2);
t2.set(0, new Integer(3));
t2.set(1, new Float(3.0));
DataBag bag = BagFactory.getInstance().newDefaultBag();
bag.add(tf.newTuple(new Integer(4)));
bag.add(tf.newTuple(new String("mary had a little lamb")));
Map<String, Object> map = new LinkedHashMap<String, Object>(2);
map.put(new String("hello"), new String("world"));
map.put(new String("goodbye"), new String("all"));
t1.set(0, t2);
t1.set(1, bag);
t1.set(2, map);
t1.set(3, new Integer(42));
t1.set(4, new Long(5000000000L));
t1.set(5, new Float(3.141592654));
t1.set(6, new Double(2.99792458e8));
t1.set(7, new Boolean(true));
t1.set(8, new DataByteArray("hello"));
t1.set(9, new String("goodbye"));
return t1;
}
}