blob: 585ebadcf41755fdcb624b04bf6222b0012605d2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.parser;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
import org.apache.pig.ExecType;
import org.apache.pig.PigRunner;
import org.apache.pig.PigServer;
import org.apache.pig.test.Util;
import org.apache.pig.tools.pigstats.PigStats;
import org.junit.Assert;
import org.junit.Test;
public class TestQueryParser {
@Test
public void test() throws IOException, RecognitionException {
CharStream input = new QueryParserFileStream( "test/org/apache/pig/parser/TestParser.pig" );
QueryLexer lexer = new QueryLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
QueryParser parser = new QueryParser(tokens);
QueryParser.query_return result = parser.query();
Tree ast = (Tree)result.getTree();
System.out.println( ast.toStringTree() );
TreePrinter.printTree( (CommonTree)ast, 0 );
Assert.assertEquals( 0, lexer.getNumberOfSyntaxErrors() );
Assert.assertEquals( 0, parser.getNumberOfSyntaxErrors() );
}
@Test
// After PIG-438, realias statement is valid
public void testNegative1() throws IOException, RecognitionException {
shouldPass("A = load 'x'; B=A;");
}
@Test(expected=RecognitionException.class)
public void testNegative2() throws IOException, RecognitionException {
shouldFail("A = load 'x'; B=(A);");
}
@Test(expected=RecognitionException.class)
public void testNegative3() throws IOException, RecognitionException {
shouldFail("A = load 'x';B = (A) as (a:int, b:long);");
}
@Test(expected=RecognitionException.class)
public void testNegative4() throws IOException, RecognitionException {
shouldFail("A = load 'x'; B = ( filter A by $0 == 0 ) as (a:bytearray, b:long);");
}
@Test(expected=RecognitionException.class)
public void testNegative5() throws IOException, RecognitionException {
shouldFail("A = load 'x'; D = group A by $0:long;");
}
@Test(expected=RecognitionException.class)
public void testNegative6() throws IOException, RecognitionException {
shouldFail("A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate $0, 3.0e10.1;");
}
@Test // test error message with file name
public void testNagative7() throws IOException {
File f1 = new File("myscript.pig");
f1.deleteOnExit();
FileWriter fw1 = new FileWriter(f1);
fw1.append("A = loadd '1.txt';");
fw1.close();
String[] args = { "-x", "local", "-c", "myscript.pig" };
PigStats stats = PigRunner.run(args, null);
Assert.assertFalse(stats.isSuccessful());
String expected = "<file myscript.pig, line 1, column 0>";
String msg = stats.getErrorMessage();
Assert.assertFalse(msg == null);
Assert.assertTrue(msg.startsWith(expected));
}
// See PIG-2238
@Test
public void testDependentNullAlias() throws IOException, RecognitionException {
PigServer pigServer = new PigServer(ExecType.LOCAL);
try {
pigServer.registerQuery( "F = limit F 20;store F into 'out';" );
} catch(Exception ex) {
Assert.assertTrue(ex.getMessage().contains("Unrecognized alias F"));
return;
}
Assert.fail();
}
@Test
public void test2() throws IOException, RecognitionException {
shouldPass("A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate ( $0 == 0 ? 1 : 0 );");
}
@Test
public void test3() throws IOException, RecognitionException {
String query = "a = load '1.txt' as (a0);" +
"b = foreach a generate flatten( (bag{tuple(map[])})a0 ) as b0:map[];" +
"c = foreach b generate (long)b0#'key1';";
shouldPass( query );
}
@Test
public void test4() throws IOException, RecognitionException {
String query = "a = load '1.txt' as (name, age, gpa); b = group a by name;" +
"c = foreach b generate group, COUNT(a.age);" +
"store c into 'y';";
shouldPass( query );
}
@Test
public void test5() throws IOException, RecognitionException {
String query = "a = load 'x' as (name, age, gpa);" +
"b = foreach a generate name, age + 2L, 3.125F, 3.4e2;" +
" store b into 'y'; ";
shouldPass( query );
}
@Test
public void test6() throws IOException, RecognitionException {
String query = "a = load '/user/pig/tests/data/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double);" +
"b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int;" +
"c = foreach b generate (norm_gpa is not null? norm_gpa: 0);" +
"store c into '/user/pig/out/jianyong.1297229709/Types_37.out';";
shouldPass( query );
}
@Test
public void test7() throws IOException, RecognitionException {
String query = "a = load '/user/pig/tests/data/singlefile/studenttab10k';" +
"b = group a by $0;" +
"c = foreach b {c1 = order $1 by * using org.apache.pig.test.udf.orderby.OrdDesc; generate flatten(c1); };" +
"store c into '/user/pig/out/jianyong.1297305352/Order_15.out';";
shouldPass( query );
}
@Test
public void test8() throws IOException, RecognitionException {
String query = "a = load '/user/pig/tests/data/singlefile/studenttab10k';" +
"b = group a by $0;" +
"c = foreach b {c1 = order $1 by $1; generate flatten(c1), MAX($1.$1); };" +
"store c into '/user/pig/out/jianyong.1297305352/Order_17.out';";
shouldPass( query );
}
@Test(expected=RecognitionException.class)
public void testCubeNegative1() throws IOException, RecognitionException {
// cube keyword used as alias
String query = "x = load 'cubedata' as (a, b, c, d); " +
"cube = cube x by (a, b, c);";
shouldFail( query );
}
@Test(expected=RecognitionException.class)
public void testCubeNegative2() throws IOException, RecognitionException {
// syntax error - brackets missing
String query = "x = load 'cubedata' as (a, b, c, d); " +
"y = cube x by a, b, c;";
shouldFail( query );
}
@Test(expected=RecognitionException.class)
public void testCubeNegative3() throws IOException, RecognitionException {
// syntax error - BY missing
String query = "x = load 'cubedata' as (a, b, c, d); " +
"y = cube x (a, b, c);";
shouldFail( query );
}
@Test(expected=RecognitionException.class)
public void testCubeNegative4() throws IOException, RecognitionException {
// syntax error - UDF at the end
String query = "x = load 'cubedata' as (a, b, c, d); " +
"y = cube x by (a, b, c), UDF(c);";
shouldFail( query );
}
@Test(expected=RecognitionException.class)
public void testCubeNegative5() throws IOException, RecognitionException {
// syntax error - specifying just dimensions
String query = "x = load 'cubedata' as (a, b, c, d); " +
"y = cube x by (a, b, c), CUBE(c);";
shouldFail( query );
}
@Test(expected=RecognitionException.class)
public void testCubeNegative6() throws IOException, RecognitionException {
// syntax error - dangling dimension
String query = "x = load 'cubedata' as (a, b, c, d); " +
"y = cube x by CUBE(a, b, c), y, ROLLUP(c);";
shouldFail( query );
}
@Test
public void testCubePositive1() throws IOException, RecognitionException {
// syntactically correct
String query = "x = load 'cubedata' as (a, b, c, d);" +
"y = cube x by cube(a, b, c);" +
"z = foreach y generate flatten(group) as (a, b, c), COUNT(x) as count;" +
"store z into 'cube_output';";
shouldPass( query );
}
@Test
public void testCubePositive2() throws IOException, RecognitionException {
// all columns using *
String query = "x = load 'cubedata' as (a, b, c, d);" +
"y = cube x by rollup(*), rollup($2..$3);" +
"z = foreach y generate flatten(group) as (a, b, c, d), COUNT(x) as count;" +
"store z into 'cube_output';";
shouldPass( query );
}
@Test
public void testCubePositive3() throws IOException, RecognitionException {
// range projection
String query = "x = load 'cubedata' as (a, b, c, d);" +
"y = cube x by cube($0, $1);" +
"z = foreach y generate flatten(group) as (a, b), COUNT(x) as count;" +
"store z into 'cube_output';";
shouldPass( query );
}
@Test
public void test9() throws IOException, RecognitionException {
String query = "a = load 'x' as (u,v);" +
"b = load 'y' as (u,w);" +
"c = join a by u, b by u;" +
"d = foreach c generate a::u, b::u, w;";
shouldPass( query );
}
@Test
public void test10() throws IOException, RecognitionException {
String query = "a = load 'x' as (name, age, gpa);" +
"b = FOREACH C GENERATE group, flatten( ( 1 == 2 ? 2 : 3 ) );" +
" store b into 'y'; ";
shouldPass( query );
}
// 'repl' and such, shouldn't be treated as a constant. So, the following should pass.
@Test
public void test11() throws IOException, RecognitionException {
String query = "a = load 'repl' as (name, age, gpa);" +
"b = FOREACH C GENERATE group, flatten( ( 1 == 2 ? 2 : 3 ) );" +
" store b into 'skewed'; ";
shouldPass( query );
}
@Test
public void testBagType() throws IOException, RecognitionException {
String query = "a = load '1.txt' as ( u : bag{}, v : bag{tuple(x, y)} );" +
"b = load '2.x' as ( t : {}, u : {(r,s)}, v : bag{ T : tuple( x, y ) }, w : bag{(z1, z2)} );" +
"c = load '3.x' as p : int;";
int errorCount = parse( query );
Assert.assertTrue( errorCount == 0 );
}
@Test
public void testFlatten() throws IOException, RecognitionException {
String query = "a = load '1.txt' as ( u, v, w : int );" +
"b = foreach a generate * as ( x, y, z ), flatten( u ) as ( r, s ), flatten( v ) as d, w + 5 as e:int;";
int errorCount = parse( query );
Assert.assertTrue( errorCount == 0 );
}
@Test //PIG-2083
public void testNullInBinCondNoSpace() throws IOException{
String query = "a = load '1.txt' as (a0, a1);" +
"b = foreach a generate (a0==0?null:2);"; //no space around the null keyword, so the lexer doesn't emit a NULL token
PigServer pig = new PigServer(ExecType.LOCAL);
Util.registerMultiLineQuery(pig, query);
pig.explain("b", System.out);
}
@Test
public void testAST() throws IOException, RecognitionException {
CharStream input = new QueryParserFileStream( "test/org/apache/pig/parser/TestAST.pig" );
QueryLexer lexer = new QueryLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
QueryParser parser = new QueryParser(tokens);
QueryParser.query_return result = parser.query();
Tree ast = (Tree)result.getTree();
System.out.println( ast.toStringTree() );
TreePrinter.printTree( (CommonTree)ast, 0 );
Assert.assertEquals( 0, lexer.getNumberOfSyntaxErrors() );
Assert.assertEquals( 0, parser.getNumberOfSyntaxErrors() );
Assert.assertEquals( "QUERY", ast.getText() );
Assert.assertEquals( 5, ast.getChildCount() );
for( int i = 0; i < ast.getChildCount(); i++ ) {
Tree c = ast.getChild( i );
Assert.assertEquals( "STATEMENT", c.getText() );
}
Tree stmt = ast.getChild( 0 );
Assert.assertEquals( "A", stmt.getChild( 0 ).getText() ); // alias
Assert.assertTrue( "LOAD".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
stmt = ast.getChild( 1 );
Assert.assertEquals( "B", stmt.getChild( 0 ).getText() ); // alias
Assert.assertTrue( "FOREACH".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
stmt = ast.getChild( 2 );
Assert.assertEquals( "C", stmt.getChild( 0 ).getText() ); // alias
Assert.assertTrue( "FILTER".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
stmt = ast.getChild( 3 );
Assert.assertEquals( "D", stmt.getChild( 0 ).getText() ); // alias
Assert.assertTrue( "LIMIT".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
stmt = ast.getChild( 4 );
Assert.assertTrue( "STORE".equalsIgnoreCase( stmt.getChild( 0 ).getText() ) );
}
@Test
public void testMultilineFunctionArguments() throws RecognitionException, IOException {
final String pre = "STORE data INTO 'testOut' \n" +
"USING PigStorage (\n";
String lotsOfNewLines = "'{\"debug\": 5,\n" +
" \"data\": \"/user/lguo/testOut/ComponentActTracking4/part-m-00000.avro\",\n" +
" \"field0\": \"int\",\n" +
" \"field1\": \"def:browser_id\",\n" +
" \"field3\": \"def:act_content\" }\n '\n";
String [] queries = { lotsOfNewLines,
"'notsplitatall'",
"'see you\nnext line'",
"'surrounded \n by spaces'",
"'\nleading newline'",
"'trailing newline\n'",
"'\n'",
"'repeated\n\n\n\n\n\n\n\n\nnewlines'",
"'also\ris\rsupported\r'"};
final String post = ");";
for(String q : queries) {
shouldPass(pre + q + post);
}
}
private void shouldPass(String query) throws RecognitionException, IOException {
System.out.println("Testing: " + query);
Assert.assertEquals(query + " should have passed", 0, parse(query));
}
private void shouldFail(String query) throws RecognitionException, IOException {
System.out.println("Testing: " + query);
parse( query );
}
private int parse(String query) throws IOException, RecognitionException {
CharStream input = new QueryParserStringStream( query, null );
QueryLexer lexer = new QueryLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
QueryParser parser = QueryParserUtils.createParser(tokens);
QueryParser.query_return result = parser.query();
Tree ast = (Tree)result.getTree();
System.out.println( ast.toStringTree() );
TreePrinter.printTree((CommonTree) ast, 0);
Assert.assertEquals(0, lexer.getNumberOfSyntaxErrors());
return parser.getNumberOfSyntaxErrors();
}
//PIG-2267
@Test
public void testThatColNameIsGeneratedProperly() throws IOException {
String query = "a = load '1.txt' as (int,(long,[]),{([])});"
+ "b = foreach a generate val_0, tuple_0, bag_0;"
+ "c = foreach b generate val_0, flatten(tuple_0), flatten(bag_0);"
+ "d = foreach c generate val_0, tuple_0::val_0, bag_0::map_0;";
PigServer pig = new PigServer(ExecType.LOCAL);
Util.registerMultiLineQuery(pig, query);
}
//RANK
@Test
public void testRankPositive1() throws IOException, RecognitionException {
shouldPass("B = rank A;");
}
@Test
public void testRankPositive2() throws IOException, RecognitionException {
shouldPass("B = rank A by x;");
}
@Test
public void testRankPositive3() throws IOException, RecognitionException {
shouldPass("B = rank A by x DESC;");
}
@Test
public void testRankPositive4() throws IOException, RecognitionException {
shouldPass("B = rank A by x, y ASC, w DESC, z ASC;");
}
@Test
public void testRankPositive5() throws IOException, RecognitionException {
String query = "A = load 'data' as (x:int, y:chararray, z:int, rz:chararray);";
query += "B = rank A by x..z;";
shouldPass(query);
}
@Test
public void testRankPositive6() throws IOException, RecognitionException {
String query = "A = load 'data' as (x:int, y:chararray, z:int, rz:chararray);";
query += "B = rank A by *;";
shouldPass(query);
}
@Test
public void testRankPositive7() throws IOException, RecognitionException {
String query = "A = load 'data' as (x:int, y:chararray, z:int, rz:chararray);";
query += "B = rank A by x DESC DENSE;";
shouldPass(query);
}
@Test
public void testRankPositive8() throws IOException, RecognitionException {
String query = "A = load 'data' as (x:int, y:chararray, z:int, rz:chararray);";
query += "B = rank A by x DESC,y ASC DENSE;";
shouldPass(query);
}
@Test
public void testRankPositive9() throws IOException, RecognitionException {
String query = "A = load 'data' as (x:int, y:chararray, z:int, rz:chararray);";
query += "B = rank A by * DENSE;";
shouldPass(query);
}
@Test // PIG-2769
public void testSlowQuery() throws Exception {
String query = "A = load 'A.txt' using PigStorage() AS (m: int);";
query += "B = FOREACH A { days_str = (chararray) (m == 1 ? 31: (m == 2 ? 28: (m == 3 ? 31: (m == 4 ? 30: (m == 5 ? 31: (m == 6 ? 30: (m == 7 ? 31: (m == 8 ? 31: (m == 9 ? 30: (m == 10 ? 31: (m == 11 ? 30:31))))))))))); GENERATE days_str as days_str; }";
query += "store B into 'B';";
shouldPass(query);
}
@Test
public void testFunction1() throws Exception {
shouldPass("B = foreach A generate org.apache.pig.builtin.CONCAT(b, c);");
}
@Test
public void testFunction2() throws Exception {
shouldPass("B = foreach A generate org.apache.pig.builtin.CONCAT(b, c, 'x', 'y');");
}
@Test
public void testFunction3() throws Exception {
shouldPass("B = foreach A generate flatten(myudfs.Swap(name, age)), gpa;");
}
@Test
public void testFilter1() throws Exception {
shouldPass("E = FILTER D BY not IsEmpty(B);");
}
@Test
public void testFilter2() throws Exception {
shouldPass("C = filter B by 2 > 1;");
}
@Test
public void testFilter3() throws Exception {
shouldPass("C = filter B by a is null;");
}
@Test
public void testFilter4() throws Exception {
shouldPass("C = filter B by a is not null;");
}
@Test
public void testGroup1() throws Exception {
shouldPass("B = group A by ( a, $2 );");
}
@Test
public void testCast1() throws Exception {
shouldPass("B = FOREACH A GENERATE (int)$0 + 1;");
}
@Test
public void testCast2() throws Exception {
shouldPass("B = FOREACH A GENERATE (tuple(int,int,float))fld;");
}
@Test
public void testCast3() throws Exception {
shouldPass("B = FOREACH A GENERATE (bag{tuple(long)})fld; ");
}
@Test
public void testCast4() throws Exception {
shouldPass("B = FOREACH A GENERATE (map[])fld;");
}
@Test
public void testCast5() throws Exception {
shouldPass("E = foreach D generate userid, clicks/(double)C.total, cnt;");
}
@Test
public void testCast6() throws Exception {
shouldPass("X = FOREACH B GENERATE group, (chararray)COUNT(A) AS total;");
}
@Test
public void testCast7() throws Exception {
shouldPass("B = FOREACH A GENERATE a + (int)null;");
}
@Test
public void testCast8() throws Exception {
shouldPass("a = load '1.txt' as(map[int]); --Map value is int");
}
@Test
public void testCast9() throws Exception {
shouldPass("b = foreach a generate (map[(i:int)])a0; -- Map value is tuple");
}
@Test
public void testCast10() throws Exception {
shouldPass("b = stream a through `cat` as (m:map[{(i:int,j:chararray)}]); -- Map value is bag");
}
@Test
public void testNull1() throws Exception {
shouldPass("B = FOREACH A GENERATE a, null;");
}
@Test
public void testNull2() throws Exception {
shouldPass("D = FOREACH C GENERATE FLATTEN((IsEmpty(A) ? null : A)), FLATTEN((IsEmpty(B) ? null : B));");
}
@Test
public void testNull3() throws Exception {
shouldPass("B = FOREACH A GENERATE a + null;");
}
@Test
public void testStar1() throws Exception {
shouldPass("B = FOREACH A GENERATE *, MyUDF(name, age);");
}
@Test
public void testStar2() throws Exception {
shouldPass("C = FOREACH A GENERATE name, age, MyUDF(*);");
}
@Test
public void testProjectRange1() throws Exception {
shouldPass("F = foreach IN generate (int)col0, col1 .. col3; ");
}
@Test
public void testProjectRange2() throws Exception {
shouldPass("SORT = order IN by col2 .. col3, col0, col4 ..; ");
}
@Test
public void testProjectRange3() throws Exception {
shouldPass("J = join IN1 by $0 .. $3, IN2 by $0 .. $3; ");
}
@Test
public void testProjectRange4() throws Exception {
shouldPass("g = group l1 by b .. c; ");
}
@Test
public void testProjection1() throws Exception {
shouldPass("b = foreach a generate flatten(group), SUM($1.$2);");
}
@Test
public void testProjection2() throws Exception {
shouldPass("a = group (load '/var/folders/bs/cy3sndf95ng5ljgy5nxs1j080000gn/T/test6322762304144938425txt') by ($0,$1);");
}
@Test
public void testPartition() throws Exception {
shouldPass("B = group A by $0 PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner parallel 2;");
}
@Test
public void testBoolean1() throws Exception {
shouldPass("C = foreach B generate group, " +
"((org.apache.pig.test.utils.AccumulatorBagCount(A)>1 and " +
"org.apache.pig.test.utils.AccumulatorBagCount(A)<3)?0:1);");
}
@Test
public void testBoolean2() throws Exception {
shouldPass("C = foreach B generate group, " +
"((org.apache.pig.test.utils.AccumulatorBagCount(A)>3 or " +
"org.apache.pig.test.utils.AccumulatorBagCount(A)<2)?0:1);");
}
@Test
public void testSplit1() throws Exception {
shouldPass("split a into b if id > 3, c if id < 3, d otherwise;");
}
@Test
public void testSplit2() throws Exception {
shouldPass("SPLIT logs INTO logins IF command == 'login', all_quits IF command == 'quit';");
}
@Test
public void testBigDecimalParsing() throws Exception {
shouldPass("B = FILTER A BY $1 < 1234567890.123456789BD;");
}
@Test
public void testBigIntegerParsing() throws Exception {
shouldPass("B = FILTER A BY $1 < 1234567890123456789BI;");
}
}