blob: f8257e19f887c025c17c358789de07d2d8ceef86 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.ConstantExpression;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.EqualToExpr;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.GreaterThanExpr;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POAnd;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.DefaultTuple;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.test.utils.GenPhyOp;
import org.apache.pig.test.utils.GenRandomData;
import org.apache.pig.test.utils.TestHelper;
import org.junit.Before;
import org.junit.Test;
public class TestFilter {
Random r = new Random(42L);
POFilter pass;
POFilter fail;
Tuple t;
DataBag inp;
POFilter projFil;
boolean[] nullFlags = new boolean[] { false, true };
@Before
public void setUp() throws Exception {
pass = GenPhyOp.topFilterOpWithExPlan(50, 25);
fail = GenPhyOp.topFilterOpWithExPlan(25, 50);
}
private void setUpProjFil(boolean withNulls) throws Exception {
if (withNulls)
inp = GenRandomData.genRandSmallTupDataBagWithNulls(r, 10, 100);
else
inp = GenRandomData.genRandSmallTupDataBag(r, 10, 100);
t = GenRandomData.genRandSmallBagTuple(r, 10, 100);
projFil = GenPhyOp.topFilterOpWithProj(1, 50);
POProject inpPrj = GenPhyOp.exprProject();
Tuple tmpTpl = new DefaultTuple();
tmpTpl.append(inp);
inpPrj.setColumn(0);
inpPrj.setResultType(DataType.TUPLE);
inpPrj.setOverloaded(true);
inpPrj.attachInput(tmpTpl);
List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>();
inputs.add(inpPrj);
projFil.setInputs(inputs);
}
@Test
public void testGetNextTuple() throws Exception {
pass.attachInput(t);
Result res = pass.getNextTuple();
assertEquals(t, res.result);
fail.attachInput(t);
res = fail.getNextTuple();
assertEquals(res.returnStatus, POStatus.STATUS_EOP);
for (int i = 0; i < nullFlags.length; i++) {
int count = 0;
setUpProjFil(nullFlags[i]);
while (true) {
res = projFil.getNextTuple();
if (res.returnStatus == POStatus.STATUS_EOP)
break;
count++;
assertEquals(POStatus.STATUS_OK, res.returnStatus);
Tuple output = (Tuple)res.result;
assertEquals(
"Running testGetNextTuple with nullFlags set to " + nullFlags[i] + ":",
true, TestHelper.bagContains(inp, output));
assertEquals(
"Running testGetNextTuple with nullFlags set to " + nullFlags[i] + ":",
true, (Integer)((Tuple)res.result).get(1) > 50);
}
assertEquals("Running testGetNextTuple with nullFlags set to " + nullFlags[i] + ":",
getExpCount(inp), count);
}
}
/**
* @param inp2
* @return
* @throws ExecException
*/
private int getExpCount(DataBag inp2) throws ExecException {
// TODO Auto-generated method stub
int count = 0;
for (Iterator<Tuple> it = inp2.iterator(); it.hasNext();) {
Tuple t = it.next();
if (t.get(1) != null && (Integer)t.get(1) > 50)
count++;
}
return count;
}
@Test
public void testSimpleFilter() throws Exception {
for (int i = 0; i < nullFlags.length; i++) {
// Build the inner expression
POProject p1 = GenPhyOp.exprProject(0);
POProject p2 = GenPhyOp.exprProject(1);
GreaterThanExpr gt = GenPhyOp.compGreaterThanExpr(p1, p2, DataType.INTEGER);
PhysicalPlan ip = new PhysicalPlan();
ip.add(p1);
ip.add(p2);
ip.add(gt);
ip.connect(p1, gt);
ip.connect(p2, gt);
int[] ints = { 0, 1, 1, 0, 1, 1 };
TupleFactory tf = TupleFactory.getInstance();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int j = 0; j < ints.length; j += 2) {
// if we are testing with nulls
// introduce nulls randomly
if (nullFlags[i] == true) {
int rand = r.nextInt(100);
if (rand <= 20) {
Tuple t = tf.newTuple(2);
t.set(0, new Integer(ints[j]));
t.set(1, null);
inbag.add(t);
} else if (rand > 20 && rand <= 40) {
Tuple t = tf.newTuple(2);
t.set(0, null);
t.set(1, new Integer(ints[j + 1]));
inbag.add(t);
} else if (rand > 40 && rand <= 60) {
Tuple t = tf.newTuple(2);
t.set(0, null);
t.set(1, null);
inbag.add(t);
}
}
Tuple t = tf.newTuple(2);
t.set(0, new Integer(ints[j]));
t.set(1, new Integer(ints[j + 1]));
inbag.add(t);
}
PORead read = GenPhyOp.topReadOp(inbag);
POFilter filter = GenPhyOp.connectedFilterOp(read);
filter.setPlan(ip);
PhysicalPlan op = new PhysicalPlan();
op.add(filter);
op.add(read);
op.connect(read, filter);
DataBag outbag = BagFactory.getInstance().newDefaultBag();
Result res;
Tuple t = tf.newTuple();
do {
res = filter.getNextTuple();
if (res.returnStatus == POStatus.STATUS_OK) {
outbag.add((Tuple)res.result);
}
} while (res.returnStatus == POStatus.STATUS_OK);
assertEquals("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", POStatus.STATUS_EOP, res.returnStatus);
assertEquals("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", 1, outbag.size());
Iterator<Tuple> it = outbag.iterator();
assertTrue("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", it.hasNext());
t = it.next();
assertEquals("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", 2, t.size());
assertTrue("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", t.get(0) instanceof Integer);
assertTrue("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", t.get(1) instanceof Integer);
Integer i1 = (Integer)t.get(0);
Integer i2 = (Integer)t.get(1);
assertEquals("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", 1, (int)i1);
assertEquals("Running " + this.getClass().getName() + "with nullFlags set to "
+ nullFlags[i] + ":", 0, (int)i2);
}
}
@Test
public void testAndFilter() throws Exception {
for (int i = 0; i < nullFlags.length; i++) {
// Build the inner expression
POProject p1 = GenPhyOp.exprProject(0);
ConstantExpression c2 = GenPhyOp.exprConst();
c2.setValue(new Integer(0));
GreaterThanExpr gt = GenPhyOp.compGreaterThanExpr(p1, c2, DataType.INTEGER);
POProject p3 = GenPhyOp.exprProject(1);
ConstantExpression c = GenPhyOp.exprConst();
c.setValue(new Integer(1));
EqualToExpr eq = GenPhyOp.compEqualToExpr(p3, c, DataType.INTEGER);
POAnd and = GenPhyOp.compAndExpr(gt, eq);
PhysicalPlan ip = new PhysicalPlan();
ip.add(p1);
ip.add(c2);
ip.add(gt);
ip.add(p3);
ip.add(c);
ip.add(eq);
ip.add(and);
ip.connect(p1, gt);
ip.connect(c2, gt);
ip.connect(p3, eq);
ip.connect(c, eq);
ip.connect(eq, and);
ip.connect(gt, and);
int[] ints = { 0, 1, 1, 0, 1, 1 };
TupleFactory tf = TupleFactory.getInstance();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int j = 0; j < ints.length; j += 2) {
// if we are testing with nulls
// introduce nulls randomly
if (nullFlags[i] == true) {
int rand = r.nextInt(100);
if (rand <= 20) {
Tuple t = tf.newTuple(2);
t.set(0, new Integer(ints[j]));
t.set(1, null);
inbag.add(t);
} else if (rand > 20 && rand <= 40) {
Tuple t = tf.newTuple(2);
t.set(0, null);
t.set(1, new Integer(ints[j + 1]));
inbag.add(t);
} else if (rand > 40 && rand <= 60) {
Tuple t = tf.newTuple(2);
t.set(0, null);
t.set(1, null);
inbag.add(t);
}
}
Tuple t = tf.newTuple(2);
t.set(0, new Integer(ints[j]));
t.set(1, new Integer(ints[j + 1]));
inbag.add(t);
}
PORead read = GenPhyOp.topReadOp(inbag);
POFilter filter = GenPhyOp.connectedFilterOp(read);
filter.setPlan(ip);
PhysicalPlan op = new PhysicalPlan();
op.add(filter);
op.add(read);
op.connect(read, filter);
DataBag outbag = BagFactory.getInstance().newDefaultBag();
Result res;
Tuple t = tf.newTuple();
do {
res = filter.getNextTuple();
if (res.returnStatus == POStatus.STATUS_OK) {
outbag.add((Tuple)res.result);
}
} while (res.returnStatus == POStatus.STATUS_OK);
assertEquals(POStatus.STATUS_EOP, res.returnStatus);
assertEquals(1, outbag.size());
Iterator<Tuple> it = outbag.iterator();
assertTrue(it.hasNext());
t = it.next();
assertEquals(2, t.size());
assertTrue(t.get(0) instanceof Integer);
assertTrue(t.get(1) instanceof Integer);
Integer i1 = (Integer)t.get(0);
Integer i2 = (Integer)t.get(1);
assertEquals(1, (int)i1);
assertEquals(1, (int)i2);
}
}
}