PIG-5067: Revisit union on numeric type and chararray to bytearray (knoguchi)
git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1772368 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index fefdf6c..ecf0541 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -21,6 +21,8 @@
Trunk (unreleased changes)
INCOMPATIBLE CHANGES
+
+PIG-5067: Revisit union on numeric type and chararray to bytearray (knoguchi)
IMPROVEMENTS
diff --git a/src/docs/src/documentation/content/xdocs/basic.xml b/src/docs/src/documentation/content/xdocs/basic.xml
index 050530b..f4066a6 100644
--- a/src/docs/src/documentation/content/xdocs/basic.xml
+++ b/src/docs/src/documentation/content/xdocs/basic.xml
@@ -713,7 +713,7 @@
<p></p>
<ul>
<li>
- <p>If Pig cannot resolve incompatible types through implicit casts, an error will occur. For example, you cannot add chararray and float (see the Types Table for addition and subtraction).</p>
+ <p>If Pig cannot resolve incompatible types through implicit casts, an error will occur. For example, you cannot add chararray and float (see the <a href="#types-table-add">Types Table for addition and subtraction</a>).</p>
<source>
A = LOAD 'data' AS (name:chararray, age:int, gpa:float);
B = FOREACH A GENERATE name + gpa;
@@ -8503,11 +8503,11 @@
A union B: null
</source>
-<p>Union columns with incompatible types result in a bytearray type: </p>
+<p>Union columns with incompatible types results in a failure. (See <a href="#types-table-add">Types Table for addition and subtraction</a> for incompatible types.)</p>
<source>
-A: (a1:long, a2:long)
-B: (b1:(b11:long, b12:long), b2:long)
-A union B: (a1:bytearray, a2:long)
+A: (a1:long)
+B: (a1:chararray)
+A union B: ERROR: Cannot cast from long to bytearray
</source>
<p>Union columns of compatible type will produce an "escalate" type.
diff --git a/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java b/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
index db1ca48..81fbe28 100644
--- a/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
+++ b/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
@@ -491,7 +491,7 @@
byte outType = cast.getType();
if(outType == DataType.BYTEARRAY && inType != outType) {
int errCode = 1051;
- String msg = "Cannot cast to bytearray";
+ String msg = "Cannot cast from " + DataType.findTypeName(inType) + " to bytearray";
msgCollector.collect(msg, MessageType.Error) ;
throw new TypeCheckerException(cast, msg, errCode, PigException.INPUT) ;
}
diff --git a/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java b/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
index d6c4cff..fd7ea74 100644
--- a/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
+++ b/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
@@ -351,7 +351,8 @@
if (outFieldSchema.type != fs.type) {
castNeededCounter++ ;
- new CastExpression(genPlan, project, outFieldSchema);
+ CastExpression castexp = new CastExpression(genPlan, project, outFieldSchema);
+ castexp.setLocation(toOp.getLocation());
}
generatePlans.add(genPlan) ;
diff --git a/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java b/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
index 3d5ce68..fea4262 100644
--- a/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
+++ b/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
@@ -21,6 +21,7 @@
import java.util.ArrayList;
import java.util.List;
+import org.apache.pig.PigException;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.Pair;
@@ -110,9 +111,20 @@
} else {
ProjectExpression projExpr =
new ProjectExpression( exprPlan, genInputs.size(), 0, gen );
- if( fs.type != DataType.BYTEARRAY
- && opSchema.getField( pos ).type != fs.type ) {
- new CastExpression( exprPlan, projExpr, fs );
+ if( opSchema.getField( pos ).type != fs.type ) {
+ if( fs.type != DataType.BYTEARRAY ) {
+ CastExpression castexpr = new CastExpression( exprPlan, projExpr, fs );
+ castexpr.setLocation(union.getLocation());
+ } else {
+ int errCode = 1056;
+ String msg = "Union of incompatible types not allowed. "
+ + "Cannot cast from "
+ + DataType.findTypeName(opSchema.getField( pos ).type)
+ + " to bytearray for '"
+ + opSchema.getField( pos ).alias
+ + "'. Please typecast to compatible types before union." ;
+ throw new FrontendException(union, msg, errCode, PigException.INPUT) ;
+ }
}
genInputs.add( new LOInnerLoad( innerPlan, foreach, pos ) );
}
diff --git a/test/e2e/pig/tests/nightly.conf b/test/e2e/pig/tests/nightly.conf
index 2b0a50a..da7528d 100644
--- a/test/e2e/pig/tests/nightly.conf
+++ b/test/e2e/pig/tests/nightly.conf
@@ -4872,21 +4872,6 @@
b = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
C = union a, b;
store C into ':OUTPATH:';\,
- },
- {
- # Test Union using merge with incompatible types. float->bytearray and chararray->bytearray
- 'num' => 8,
- 'delimiter' => ' ',
- 'pig' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:chararray);
-C = union onschema A, B;
-store C into ':OUTPATH:';\,
- 'verify_pig_script' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:bytearray);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:bytearray);
-C = union A, B;
-store C into ':OUTPATH:';\,
}
]
diff --git a/test/org/apache/pig/test/TestUnionOnSchema.java b/test/org/apache/pig/test/TestUnionOnSchema.java
index 170ef78..1da3885 100644
--- a/test/org/apache/pig/test/TestUnionOnSchema.java
+++ b/test/org/apache/pig/test/TestUnionOnSchema.java
@@ -96,8 +96,6 @@
/**
* Test UNION ONSCHEMA on two inputs with same schema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaSameSchema() throws Exception {
@@ -128,8 +126,6 @@
/**
* Test UNION ONSCHEMA with operations after the union
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaFilter() throws Exception {
@@ -161,8 +157,6 @@
/**
* Test UNION ONSCHEMA with operations after the union
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaSuccOps() throws Exception {
@@ -194,8 +188,6 @@
/**
* Test UNION ONSCHEMA with cast from bytearray to another type
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaCastOnByteArray() throws Exception {
@@ -223,8 +215,6 @@
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace' part
* in the column name in one of the inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnName() throws Exception {
@@ -266,8 +256,6 @@
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace' part
* in the column name in both the inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnNameBothInp1() throws Exception {
@@ -302,8 +290,6 @@
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace' part
* in the column name in both the inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnNameBothInp2() throws Exception {
@@ -340,8 +326,6 @@
* Test UNION ONSCHEMA where a common column has additional 'namespace' part
* in the column name in one of the inputs.
* Negative test case
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnNameNeg() throws Exception {
@@ -366,8 +350,6 @@
/**
* Test UNION ONSCHEMA on two inputs with same column names, but different
* numeric types - test type promotion
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaDiffNumType() throws Exception {
@@ -396,8 +378,6 @@
/**
* Test UNION ONSCHEMA on two inputs with no common columns
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaNoCommonCols() throws Exception {
@@ -424,8 +404,6 @@
/**
* Test UNION ONSCHEMA on two inputs , one input with additional columns
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaAdditionalColumn() throws Exception {
@@ -498,8 +476,6 @@
/**
* Test UNION ONSCHEMA on 3 inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchema3Inputs() throws Exception {
@@ -533,8 +509,6 @@
/**
* Test UNION ONSCHEMA with bytearray type
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaByteArrayConversions() throws Exception {
@@ -572,8 +546,6 @@
/**
* negative test - test error on no schema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaNoSchema() throws Exception {
@@ -597,8 +569,6 @@
/**
* negative test - test error on null alias in one of the FieldSchema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaNullAliasInFieldSchema() throws Exception {
@@ -640,8 +610,6 @@
/**
* test union with incompatible types in schema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaIncompatibleTypes() throws Exception {
@@ -650,7 +618,15 @@
+ "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);"
+ "u = union onschema l1, l2;";
- checkSchemaEquals(query, "x : long, y : bytearray");
+ checkSchemaEx(query, "Cannot cast from chararray to bytearray");
+
+ //without "onschema"
+ query =
+ " l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y : chararray);"
+ + "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);"
+ + "u = union l1, l2;";
+
+ checkSchemaEx(query, "Cannot cast from chararray to bytearray");
@@ -659,8 +635,15 @@
+ "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y : chararray);"
+ "u = union onschema l1, l2;"
;
- checkSchemaEquals(query, "x : bytearray, y : chararray");
+ checkSchemaEx(query, "Cannot cast from long to bytearray");
+ query =
+ " l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y : chararray);"
+ + "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y : chararray);"
+ + "u = union l1, l2;"
+ ;
+ checkSchemaEx(query, "Cannot cast from long to bytearray");
+
// bag column with different internal column types
query =
" l1 = load '" + INP_FILE_2NUMS
@@ -708,8 +691,6 @@
/**
* Test UNION ONSCHEMA with input relation having udfs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaInputUdfs() throws Exception {
@@ -745,8 +726,6 @@
/**
* Test UNION ONSCHEMA with udf whose default type is different from
* final type
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaUdfTypeEvolution() throws Exception {
@@ -797,8 +776,6 @@
/**
* Test UNION ONSCHEMA with udf whose default type is different from
* final type - where udf is not in immediate input of union
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaUdfTypeEvolution2() throws Exception {
@@ -869,8 +846,6 @@
/**
* Test UNION ONSCHEMA with input relation having column names with multiple
* level of namespace in their names
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopeMulti() throws Exception {
@@ -916,8 +891,6 @@
/**
* Test query with a union-onschema having another as input
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testTwoUnions() throws Exception {