blob: b9772f70a4bd66a0d7c5e02663796407120af29f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.builtin.Utf8StorageConverter;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.parser.ParserException;
import org.apache.pig.test.utils.GenRandomData;
import org.apache.pig.test.utils.TestHelper;
import org.joda.time.DateTime;
import org.junit.Test;
/**
* Test class to test conversions from bytes to types
* and vice versa
*
*/
public class TestConversions {
PigStorage ps = new PigStorage();
Random r = new Random(42L);
final int MAX = 10;
@Test
public void testBytesToBoolean() throws IOException {
// valid booleans
String[] a = { "true", "True", "TRUE", "false", "False", "FALSE" };
Boolean[] b = { Boolean.TRUE, Boolean.TRUE, Boolean.TRUE,
Boolean.FALSE, Boolean.FALSE, Boolean.FALSE };
for (int i = 0; i < b.length; ++i) {
byte[] bytes = a[i].getBytes();
assertEquals(b[i], ps.getLoadCaster().bytesToBoolean(bytes));
}
// invalid booleans
// the string that is neither "true" nor "false" cannot be converted to a boolean value
a = new String[] { "neither true nor false" };
for (String s : a) {
byte[] bytes = s.getBytes();
Boolean bool = ps.getLoadCaster().bytesToBoolean(bytes);
assertNull(bool);
}
}
@Test
public void testBytesToInteger() throws IOException
{
// valid ints
String[] a = {"1234 ", " 4321", " 12345 ", "1", "-2345", "1234567", "1.1", "-23.45", ""};
Integer[] ia = {1234, 4321, 12345, 1, -2345, 1234567, 1, -23};
for (int i = 0; i < ia.length; i++) {
byte[] b = a[i].getBytes();
assertEquals(ia[i], ps.getLoadCaster().bytesToInteger(b));
}
// invalid ints
a = new String[]{"1234567890123456", "This is an int", "", " "};
for (String s : a) {
byte[] b = s.getBytes();
Integer i = ps.getLoadCaster().bytesToInteger(b);
assertNull(i);
}
}
@Test
public void testBytesToFloat() throws IOException
{
// valid floats
String[] a = {"1", "-2.345", "12.12334567", "1.02e-2",".23344",
"23.1234567897", "12312.33", "002312.33", "1.02e-2", ""};
Float[] f = {1f, -2.345f, 12.12334567f, 1.02e-2f,.23344f, 23.1234567f, // 23.1234567f is a truncation case
12312.33f, 2312.33f, 1.02e-2f };
for (int j = 0; j < f.length; j++) {
byte[] b = a[j].getBytes();
assertEquals(f[j], ps.getLoadCaster().bytesToFloat(b));
}
// invalid floats
a = new String[]{"1a.1", "23.1234567a890123456", "This is a float", "", " "};
for (String s : a) {
byte[] b = s.getBytes();
Float fl = ps.getLoadCaster().bytesToFloat(b);
assertNull(fl);
}
}
@Test
public void testBytesToDouble() throws IOException
{
// valid doubles
String[] a = {"1", "-2.345", "12.12334567890123456", "1.02e12","-.23344", ""};
Double[] d = {1.0, -2.345, 12.12334567890123456, 1.02e12, -.23344};
for (int j = 0; j < d.length; j++) {
byte[] b = a[j].getBytes();
assertEquals(d[j], ps.getLoadCaster().bytesToDouble(b));
}
// invalid doubles
a = new String[]{"-0x1.1", "-23a.45", "This is a double", "", " "};
for (String s : a) {
byte[] b = s.getBytes();
Double dl = ps.getLoadCaster().bytesToDouble(b);
assertNull(dl);
}
}
@Test
public void testBytesToLong() throws IOException
{
// valid Longs
String[] a = {"1703598819951657279 ", " 999888123L ", " 1234 ", " 1234", "1234 ", "1", "-2345", "123456789012345678", "1.1", "-23.45",
"21345345", "3422342", ""};
Long[] la = {1703598819951657279L, 999888123L, 1234L, 1234L, 1234L, 1L, -2345L, 123456789012345678L, 1L, -23L,
21345345L, 3422342L};
for (int i = 0; i < la.length; i++) {
byte[] b = a[i].getBytes();
assertEquals(la[i], ps.getLoadCaster().bytesToLong(b));
}
// invalid longs
a = new String[]{"This is a long", "1.0e1000", "", " "};
for (String s : a) {
byte[] b = s.getBytes();
Long l = ps.getLoadCaster().bytesToLong(b);
assertNull(l);
}
}
@Test
public void testBytesToChar() throws IOException
{
// valid Strings
String[] a = {"1", "-2345", "text", "hello\nworld", ""};
for (String s : a) {
byte[] b = s.getBytes();
assertEquals(s, ps.getLoadCaster().bytesToCharArray(b));
}
}
@Test
public void testBytesToTuple() throws IOException
{
for (int i = 0; i < MAX; i++) {
Tuple t = GenRandomData.genRandSmallBagTextTuple(r, 1, 100);
ResourceFieldSchema fs = GenRandomData.getSmallBagTextTupleFieldSchema();
Tuple convertedTuple = ps.getLoadCaster().bytesToTuple(t.toString().getBytes(), fs);
assertEquals(t, convertedTuple);
}
}
@Test
public void testBytesToBag() throws IOException
{
ResourceFieldSchema fs = GenRandomData.getFullTupTextDataBagFieldSchema();
for (int i = 0; i < MAX; i++) {
DataBag b = GenRandomData.genRandFullTupTextDataBag(r,5,100);
DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);
assertTrue(b.equals(convertedBag));
}
}
@Test
public void testBytesToMap() throws IOException
{
ResourceFieldSchema fs = GenRandomData.getRandMapFieldSchema();
for (int i = 0; i < MAX; i++) {
Map<String, Object> m = GenRandomData.genRandMap(r,5);
String expectedMapString = DataType.mapToString(m);
Map<String, Object> convertedMap = ps.getLoadCaster().bytesToMap(expectedMapString.getBytes(), fs);
assertTrue(TestHelper.mapEquals(m, convertedMap));
}
}
@Test
public void testBooleanToBytes() throws IOException {
assertTrue(DataType.equalByteArrays(Boolean.TRUE.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(Boolean.TRUE)));
assertTrue(DataType.equalByteArrays(Boolean.FALSE.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(Boolean.FALSE)));
}
@Test
public void testIntegerToBytes() throws IOException {
Integer i = r.nextInt();
assertTrue(DataType.equalByteArrays(i.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(i)));
}
@Test
public void testLongToBytes() throws IOException {
Long l = r.nextLong();
assertTrue(DataType.equalByteArrays(l.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(l)));
}
@Test
public void testFloatToBytes() throws IOException {
Float f = r.nextFloat();
assertTrue(DataType.equalByteArrays(f.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(f)));
}
@Test
public void testDoubleToBytes() throws IOException {
Double d = r.nextDouble();
assertTrue(DataType.equalByteArrays(d.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(d)));
}
@Test
public void testDateTimeToBytes() throws IOException {
DateTime dt = new DateTime(r.nextLong());
assertTrue(DataType.equalByteArrays(dt.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(dt)));
}
@Test
public void testCharArrayToBytes() throws IOException {
String s = GenRandomData.genRandString(r);
assertTrue(s.equals(new String(((Utf8StorageConverter)ps.getLoadCaster()).toBytes(s))));
}
@Test
public void testTupleToBytes() throws IOException {
Tuple t = GenRandomData.genRandSmallBagTextTuple(r, 1, 100);
assertTrue(DataType.equalByteArrays(t.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(t)));
}
@Test
public void testBagToBytes() throws IOException {
DataBag b = GenRandomData.genRandFullTupTextDataBag(r,5,100);
assertTrue(DataType.equalByteArrays(b.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(b)));
}
@Test
public void testMapToBytes() throws IOException {
Map<String, Object> m = GenRandomData.genRandMap(r,5);
assertTrue(DataType.equalByteArrays(DataType.mapToString(m).getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(m)));
}
@Test
public void testBytesToBagWithConversion() throws IOException {
DataBag b = GenRandomData.genFloatDataBag(r,5,100);
ResourceFieldSchema fs = GenRandomData.getFloatDataBagFieldSchema(5);
DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);
Iterator<Tuple> iter1 = b.iterator();
Iterator<Tuple> iter2 = convertedBag.iterator();
for (int i=0;i<100;i++) {
Tuple t1 = (Tuple)iter1.next();
assertTrue(iter2.hasNext());
Tuple t2 = (Tuple)iter2.next();
for (int j=0;j<5;j++) {
assertTrue(t2.get(j) instanceof Integer);
Integer expectedValue = ((Float)t1.get(j)).intValue();
assertEquals(expectedValue, t2.get(j));
}
}
}
@Test
public void testBytesToTupleWithConversion() throws IOException {
for (int i=0;i<100;i++) {
Tuple t = GenRandomData.genMixedTupleToConvert(r);
ResourceFieldSchema fs = GenRandomData.getMixedTupleToConvertFieldSchema();
Tuple convertedTuple = ps.getLoadCaster().bytesToTuple(t.toString().getBytes(), fs);
assertTrue(convertedTuple.get(0) instanceof String);
assertEquals(convertedTuple.get(0), ((Integer)t.get(0)).toString());
assertTrue(convertedTuple.get(1) instanceof Long);
Integer origValue1 = (Integer)t.get(1);
assertEquals(convertedTuple.get(1), Long.valueOf(origValue1.longValue()));
assertNull(convertedTuple.get(2));
assertTrue(convertedTuple.get(3) instanceof Double);
Float origValue3 = (Float)t.get(3);
assertEquals(((Double)convertedTuple.get(3)).doubleValue(), origValue3.doubleValue(), 0.01);
assertTrue(convertedTuple.get(4) instanceof Float);
Double origValue4 = (Double)t.get(4);
assertEquals((Float)convertedTuple.get(4), origValue4.floatValue(), 0.01);
assertTrue(convertedTuple.get(5) instanceof String);
assertEquals(convertedTuple.get(5), t.get(5));
assertNull(convertedTuple.get(6));
assertNull(convertedTuple.get(7));
assertNull(convertedTuple.get(8));
assertTrue(convertedTuple.get(9) instanceof Boolean);
String origValue9 = (String)t.get(9);
assertEquals(Boolean.valueOf(origValue9), convertedTuple.get(9));
}
}
@Test
public void testBytesToComplexTypeMisc() throws IOException, ParserException {
String s = "(a,b";
Schema schema = Utils.getSchemaFromString("t:tuple(a:chararray, b:chararray)");
ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
Tuple t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
assertNull(t);
s = "{(a,b}";
schema = Utils.getSchemaFromString("b:bag{t:tuple(a:chararray, b:chararray)}");
rfs = new ResourceSchema(schema).getFields()[0];
DataBag b = ps.getLoadCaster().bytesToBag(s.getBytes(), rfs);
assertNull(b);
s = "{(a,b)";
schema = Utils.getSchemaFromString("b:bag{t:tuple(a:chararray, b:chararray)}");
rfs = new ResourceSchema(schema).getFields()[0];
b = ps.getLoadCaster().bytesToBag(s.getBytes(), rfs);
assertNull(b);
s = "[ab]";
schema = Utils.getSchemaFromString("m:map[chararray]");
rfs = new ResourceSchema(schema).getFields()[0];
Map<String, Object> m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
assertNull(m);
s = "[a#b";
m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
assertNull(m);
s = "[a#]";
m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
Map.Entry<String, Object> entry = m.entrySet().iterator().next();
assertEquals("a", entry.getKey());
assertNull(entry.getValue());
s = "[#]";
m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
assertNull(m);
s = "[a#}";
m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
assertNull(m);
s = "[a#)";
m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
assertNull(m);
s = "[]";
m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
assertTrue(m.isEmpty());
s = "(a,b)";
schema = Utils.getSchemaFromString("t:tuple()");
rfs = new ResourceSchema(schema).getFields()[0];
t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
assertEquals(2, t.size());
assertTrue(t.get(0) instanceof DataByteArray);
assertEquals("a", t.get(0).toString());
assertTrue(t.get(1) instanceof DataByteArray);
assertEquals("b", t.get(1).toString());
s = "[a#(1,2,3)]";
schema = Utils.getSchemaFromString("m:map[]");
rfs = new ResourceSchema(schema).getFields()[0];
m = ps.getLoadCaster().bytesToMap(s.getBytes(), rfs);
entry = m.entrySet().iterator().next();
assertEquals("a", entry.getKey());
assertTrue(entry.getValue() instanceof DataByteArray);
assertEquals("(1,2,3)", entry.getValue().toString());
s = "(a,b,(123,456,{(1,2,3)}))";
schema = Utils.getSchemaFromString("t:tuple()");
rfs = new ResourceSchema(schema).getFields()[0];
t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
assertTrue(t.size()==3);
assertTrue(t.get(0) instanceof DataByteArray);
assertEquals("a", t.get(0).toString());
assertTrue(t.get(1) instanceof DataByteArray);
assertEquals("b", t.get(1).toString());
assertTrue(t.get(2) instanceof DataByteArray);
assertEquals("(123,456,{(1,2,3)})", t.get(2).toString());
s = "(a,b,(123,456,{(1,2,3}))";
schema = Utils.getSchemaFromString("t:tuple()");
rfs = new ResourceSchema(schema).getFields()[0];
t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
assertNull(t);
}
@Test
public void testOverflow() throws IOException, ParserException {
Schema schema;
ResourceFieldSchema rfs;
Tuple tuple, convertedTuple;
tuple = TupleFactory.getInstance().newTuple(1);
schema = Utils.getSchemaFromString("t:tuple(a:int)");
rfs = new ResourceSchema(schema).getFields()[0];
// long bigger than Integer.MAX_VALUE
tuple.set(0, Integer.valueOf(Integer.MAX_VALUE).longValue() + 1);
convertedTuple = ps.getLoadCaster().bytesToTuple(tuple.toString().getBytes(), rfs);
assertNull("Invalid cast to int: " + tuple.get(0) + " -> " + convertedTuple.get(0), convertedTuple.get(0));
// long smaller than Integer.MIN_VALUE
tuple.set(0, Integer.valueOf(Integer.MIN_VALUE).longValue() - 1);
convertedTuple = ps.getLoadCaster().bytesToTuple(tuple.toString().getBytes(), rfs);
assertNull("Invalid cast to int: " + tuple.get(0) + " -> " + convertedTuple.get(0), convertedTuple.get(0));
// double bigger than Integer.MAX_VALUE
tuple.set(0, Integer.valueOf(Integer.MAX_VALUE).doubleValue() + 1);
convertedTuple = ps.getLoadCaster().bytesToTuple(tuple.toString().getBytes(), rfs);
assertNull("Invalid cast to int: " + tuple.get(0) + " -> " + convertedTuple.get(0), convertedTuple.get(0));
// double smaller than Integer.MIN_VALUE
tuple.set(0, Integer.valueOf(Integer.MIN_VALUE).doubleValue() - 1);
convertedTuple = ps.getLoadCaster().bytesToTuple(tuple.toString().getBytes(), rfs);
assertNull("Invalid cast to int: " + tuple.get(0) + " -> " + convertedTuple.get(0), convertedTuple.get(0));
schema = Utils.getSchemaFromString("t:tuple(a:long)");
rfs = new ResourceSchema(schema).getFields()[0];
// double bigger than Long.MAX_VALUE
tuple.set(0, Long.valueOf(Long.MAX_VALUE).doubleValue() + 10000);
convertedTuple = ps.getLoadCaster().bytesToTuple(tuple.toString().getBytes(), rfs);
assertNull("Invalid cast to long: " + tuple.get(0) + " -> " + convertedTuple.get(0), convertedTuple.get(0));
// double smaller than Long.MIN_VALUE
tuple.set(0, Long.valueOf(Long.MIN_VALUE).doubleValue() - 10000);
convertedTuple = ps.getLoadCaster().bytesToTuple(tuple.toString().getBytes(), rfs);
assertNull("Invalid cast to long: " + tuple.get(0) + " -> " + convertedTuple.get(0), convertedTuple.get(0));
}
}