blob: a4cac18ce2309250330523cadfe92d1d59d07262 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.pig.PigServer;
import org.apache.pig.data.Tuple;
import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
public class TestNewPlanImplicitSplit {
private PigServer pigServer;
static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
@Before
public void setUp() throws Exception {
pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
pigServer.getPigContext().getProperties().setProperty("pig.usenewlogicalplan", "true");
}
@After
public void tearDown() throws Exception {
}
@AfterClass
public static void oneTimeTearDown() throws Exception {
cluster.shutDown();
}
@Test
public void testImplicitSplit() throws Exception{
int LOOP_SIZE = 20;
String[] input = new String[LOOP_SIZE];
for(int i = 1; i <= LOOP_SIZE; i++) {
input[i-1] = Integer.toString(i);
}
String inputFileName = "testImplicitSplit-input.txt";
Util.createInputFile(cluster, inputFileName, input);
pigServer.registerQuery("A = LOAD '" + inputFileName + "';");
pigServer.registerQuery("B = filter A by $0<=10;");
pigServer.registerQuery("C = filter A by $0>10;");
pigServer.registerQuery("D = union B,C;");
Iterator<Tuple> iter = pigServer.openIterator("D");
if(!iter.hasNext()) fail("No Output received");
int cnt = 0;
while(iter.hasNext()){
Tuple t = iter.next();
++cnt;
}
assertEquals(20, cnt);
Util.deleteFile(cluster, inputFileName);
}
@Test
public void testImplicitSplitInCoGroup() throws Exception {
// this query is similar to the one reported in JIRA - PIG-537
// Create input file
String input1 = "testImplicitSplitInCoGroup-input1.txt";
String input2 = "testImplicitSplitInCoGroup-input2.txt";
Util.createInputFile(cluster, input1,
new String[] {"a:1", "b:2", "b:20", "c:3", "c:30"});
Util.createInputFile(cluster, input2,
new String[] {"a:first", "b:second", "c:third"});
pigServer.registerQuery("a = load '" + input1 +
"' using PigStorage(':') as (name:chararray, marks:int);");
pigServer.registerQuery("b = load '" + input2 +
"' using PigStorage(':') as (name:chararray, rank:chararray);");
pigServer.registerQuery("c = cogroup a by name, b by name;");
pigServer.registerQuery("d = foreach c generate group, FLATTEN(a.marks) as newmarks;");
pigServer.registerQuery("e = cogroup a by marks, d by newmarks;");
pigServer.registerQuery("f = foreach e generate group, flatten(a), flatten(d);");
HashMap<Integer, Object[]> results = new HashMap<Integer, Object[]>();
results.put(1, new Object[] { "a", 1, "a", 1 });
results.put(2, new Object[] { "b", 2, "b", 2 });
results.put(3, new Object[] { "c", 3, "c", 3 });
results.put(20, new Object[] { "b", 20, "b", 20 });
results.put(30, new Object[] { "c", 30, "c", 30 });
pigServer.explain("f", System.out);
Iterator<Tuple> it = pigServer.openIterator("f");
while(it.hasNext()) {
Tuple t = it.next();
System.err.println("Tuple:" + t);
Integer group = (Integer)t.get(0);
Object[] groupValues = results.get(group);
for(int i = 0; i < 4; i++) {
assertEquals(groupValues[i], t.get(i+1));
}
}
Util.deleteFile(cluster, input1);
Util.deleteFile(cluster, input2);
}
@Test
public void testImplicitSplitInCoGroup2() throws Exception {
// this query is similar to the one reported in JIRA - PIG-537
String query = ("a = load 'file1' using PigStorage(':') as (name:chararray, marks:int);"+
"b = load 'file2' using PigStorage(':') as (name:chararray, rank:chararray);"+
"c = cogroup a by name, b by name;"+
"d = foreach c generate group, FLATTEN(a.marks) as newmarks;"+
"e = cogroup a by marks, d by newmarks;"+
"f = foreach e generate group, flatten(a), flatten(d);" +
"store f into 'out';");
org.apache.pig.newplan.logical.relational.LogicalPlan plan = Util.buildLp(pigServer, query);
// this will run ImplicitSplitInserter
new LogicalPlanOptimizer( plan, 5, null ).optimize();
// get Schema of leaf and compare:
String expectedSchema = "group:int,a::name:chararray,a::marks:int,d::group:chararray,d::newmarks:int";
assertEquals(expectedSchema, ((LogicalRelationalOperator)plan.getSinks().get(0)).getSchema().toString(false));
}
}