| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.pig.test; |
| |
| import java.util.*; |
| |
| import org.apache.pig.ExecType; |
| import org.apache.pig.FilterFunc; |
| import org.apache.pig.PigServer; |
| import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer; |
| import org.apache.pig.newplan.logical.relational.LOFilter; |
| import org.apache.pig.newplan.logical.relational.LogicalPlan; |
| import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator; |
| import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier; |
| import org.apache.pig.newplan.Operator; |
| import org.apache.pig.newplan.OperatorPlan; |
| import org.apache.pig.newplan.optimizer.PlanOptimizer; |
| import org.apache.pig.newplan.optimizer.Rule; |
| import org.apache.pig.test.TestPruneColumn.MyFilterFunc; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.impl.PigContext; |
| |
| import junit.framework.TestCase; |
| |
| import org.junit.Before; |
| import org.junit.Test; |
| |
| public class TestFilterSimplification extends TestCase { |
| |
| LogicalPlan plan = null; |
| PigContext pc = null; |
| PigServer pigServer = null; |
| |
| @Before |
| public void setUp() throws Exception { |
| pigServer = new PigServer(ExecType.LOCAL, new Properties()); |
| pc = pigServer.getPigContext(); |
| } |
| |
| @Test |
| public void test1() throws Exception { |
| // case 1: simple and implication |
| String query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) AND (id > 5);" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query); |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 5);" + |
| "store b into 'empty';"; |
| LogicalPlan expected = Util.buildLp(pigServer, query); |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 2: simple or implication |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) OR (id > 5);" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query); |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 3: constant expression eval |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3+4*2);" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 11);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query); |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 4: simple NOT |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by NOT(NOT(NOT(id > 3)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id <= 3);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 5: redundant NOT |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by NOT(NOT(id > 3));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 6: negative |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) AND (v1 is null);" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) AND (v1 is null);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 7: is not null |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by NOT(v1 is null);" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (v1 is not null);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 8: combo I |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by NOT((id > 1) OR ((v1 is null) AND (id > 5)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id <= 1);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 9: combo II: lhs <-> rhs |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by NOT(((id > 5) AND (v1 is null)) OR (id > 1));"+ |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id <= 1);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 10: complementary OR |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) OR (id >= 1));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = load 'd.txt' as (id:int, v1, v2);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 11: OR Equality elimination |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) OR (id < 1));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id < 1);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 12: AND Equality elimination |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) AND (id < 1));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id < 1);" + "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 13: negative case |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) AND (v1 is null));" + "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) AND (v1 is NULL));" + "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 14: combo III |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by NOT((id > 1) OR ((v1 is null) AND (id > 1+2*2)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id <= 1);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 15: combo III: negative |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 5) OR (v1 < 3)) AND ((id > 4) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 5) OR (v1 < 3)) AND ((id > 4) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 15: combo III: negative |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 5) OR (v1 > 3)) AND ((id > 4) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 5) OR (v1 > 3)) AND ((id > 4) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 16: conflicting OR |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) OR (id > 1));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) OR (id > 1));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 17: conflicting AND: negtive case for now |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) AND (id > 1));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((id < 1) AND (id > 1));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 18: combo IV: negative |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 5) OR (v1 > 3)) AND ((id < 8) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 5) OR (v1 > 3)) AND ((id < 8) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 19: negative AND |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) AND (id < 5);" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) AND (id < 5);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 20: negative OR |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) OR (id < 5);" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) OR (id < 5);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 20: combo V: negative |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((v1 > 3) OR (id > 5)) AND ((id < 8) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((v1 > 3) OR (id > 5)) AND ((id < 8) OR (v1 > 5)));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 22: combo V: negative |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((v1 > 3) OR (id > 5)) AND ((v1 > 5) OR (id < 8)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((v1 > 3) OR (id > 5)) AND ((v1 > 5) OR (id < 8)));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 23: combo VI: extremely degenerate |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by ((((id > 1) OR (id > 2)) AND ((id > 3) OR (id > 4))) AND (((id > 5) OR (id > 6)) AND ((id > 7) OR (id > 8))));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (id > 7);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 24: combo VII: extremely degenerate |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by ((((id > 1) OR (id > 2)) AND ((id > 3) OR (id > 4))) AND (id > 7));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (id > 7);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 25: combo VII: extremely degenerate |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by ((((id > 1) OR (id > 2)) AND ((id > 3) OR (id > 4))) AND (((id > 5) AND (id > 7))));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (id > 7);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 26: combo VIII: lhs<->rhs for case 25 |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by ((((id > 7) AND (id > 5))) AND (((id > 4) OR (id > 3)) AND ((id > 2) OR (id > 1))));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (id > 7);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 27: combo VII: rhs<->lhs for case 24 |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by ((id > 7) AND (((id > 4) OR (id > 3)) AND ((id > 2) OR (id > 1))));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (id > 7);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 28: complex equality |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 4) OR (id > 3)) AND ((id > 3) OR (id > 4)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (id > 3);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 29: complex equality |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 4) OR (v1 > 3)) AND ((v1 > 3) OR (id > 4)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by ((id > 4) OR (v1 > 3));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 30: complex equality |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by (((id > 4) OR (v1 > 3)) OR ((v1 > 3) OR (id > 4)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1:int, v2)) by ((id > 4) OR (v1 > 3));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // case 31: See PIG-2067 |
| query = "A = load 'a.dat' as (cookie);" + |
| "B = load 'b.dat' as (cookie);" + |
| "C = cogroup A by cookie, B by cookie;" + |
| "E = filter C by COUNT(B)>0 AND COUNT(A)>0;" + |
| "store E into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| // Make sure in this case, we don't optimize |
| query = "A = load 'a.dat' as (cookie);" + |
| "B = load 'b.dat' as (cookie);" + |
| "C = cogroup A by cookie, B by cookie;" + |
| "E = filter C by COUNT(B)>0 AND COUNT(A)>0;" + |
| "store E into 'empty';"; |
| |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| } |
| |
| @Test |
| public void test2() throws Exception { |
| String query = "b = filter (load 'd.txt' as (name, age, gpa)) by age >= 50 or name > 'fred' and gpa <= 3.0 or name >= 'bob';" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (name, age, gpa)) by age >= 50 or name >= 'bob';" + |
| "store b into 'empty';"; |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // Regex filtering |
| query = "b = filter (load 'd.txt' as (name:chararray, age:int, registration, contributions:double)) by (name matches '^fred.*' and (chararray)registration matches '^dem.*');" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (name:chararray, age:int, registration, contributions:double)) by (name matches '^fred.*' and (chararray)registration matches '^dem.*');" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // NOT Regex filtering |
| query = "b = filter (load 'd.txt') by (not $0 matches '^fred.*');" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt') by (not $0 matches '^fred.*');" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // naiive filtering |
| query = "b = filter (load 'd.txt') by 1==1;" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = load 'd.txt';" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| @Test |
| public void test3() throws Exception { |
| // boolean constant elimination: AND |
| String query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((v1 is not null) AND (id == 1) AND (1 == 1));" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((v1 is not null) AND (id == 1));" + |
| "store b into 'empty';"; |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // boolean constant elimination: OR |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by (((v1 is not null) AND (id == 1)) OR (1 == 0));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((v1 is not null) AND (id == 1));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // the mirror case of the above |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((1 == 0) OR ((v1 is not null) AND (id == 1)));" + |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (id:int, v1, v2)) by ((v1 is not null) AND (id == 1));" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| @Test |
| public void test4() throws Exception { |
| String query = "b = filter (load 'd.txt' as (a:chararray, b:long, c:map[], d:chararray, e:chararray)) by a == 'v' and b == 117L and c#'p1' == 'h' and c#'p2' == 'to' and ((d is not null and d != '') or (e is not null and e != ''));" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (a:chararray, b:long, c:map[], d:chararray, e:chararray)) by a == 'v' and b == 117L and c#'p1' == 'h' and c#'p2' == 'to' and ((d is not null and d != '') or (e is not null and e != ''));" + |
| "store b into 'empty';"; |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| // mirror of the above |
| query = "b = filter (load 'd.txt' as (a:chararray, b:long, c:map[], d:chararray, e:chararray)) by ((d is not null and d != '') or (e is not null and e != '')) and a == 'v' and b == 117L and c#'p1' == 'h' and c#'p2' == 'to';" + |
| |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (a:chararray, b:long, c:map[], d:chararray, e:chararray)) by ((d is not null and d != '') or (e is not null and e != '')) and a == 'v' and b == 117L and c#'p1' == 'h' and c#'p2' == 'to';" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| } |
| |
| @Test |
| public void test5() throws Exception { |
| // 2-level combo: 8 possibilities |
| boolean[] booleans = {false, true}; |
| for (boolean b1 : booleans) |
| for (boolean b2 : booleans) |
| for (boolean b3 : booleans) |
| comboRunner2(b1, b2, b3); |
| } |
| |
| private void comboRunner2(boolean b1, boolean b2, boolean b3) throws Exception { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("b = filter (load 'd.txt' as (a:int, b:int, c:int, d:int)) by (((a < 1) " + (b1 ? "and" : "or") + " (b < 2)) " + (b2 ? "and" : "or") + " ((c < 3) " + (b3 ? "and" : "or") + " (d < 4)));"); |
| String query = sb.toString() + "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| @Test |
| public void test6() throws Exception { |
| // 3-level combo: 128 possibilities |
| boolean[] booleans = {false, true}; |
| for (boolean b1 : booleans) |
| for (boolean b2 : booleans) |
| for (boolean b3 : booleans) |
| for (boolean b4 : booleans) |
| for (boolean b5 : booleans) |
| for (boolean b6 : booleans) |
| for (boolean b7 : booleans) |
| comboRunner3(b1, b2, b3, b4, b5, b6, b7); |
| } |
| |
| private void comboRunner3(boolean b1, boolean b2, boolean b3, boolean b4, boolean b5, boolean b6, boolean b7) throws Exception { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("b = filter (load 'd.txt' as (a:int, b:int, c:int, d:int, e:int, f:int, g:int, h:int)) by ((((a < 1) " + (b1 ? "and" : "or") + " (b < 2)) " + (b2 ? "and" : "or") + " ((c < 3) " + (b3 ? "and" : "or") + " (d < 4))) " + (b4 ? "and" : "or") + " (((e < 5) " + (b5 ? "and" : "or") + " (f < 6)) " + (b6 ? "and" : "or") + " ((g < 7) " + (b7 ? "and" : "or") + " (h < 8))));"); |
| String query = sb.toString() + "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| @Test |
| public void test7() throws Exception { |
| String query = "b = filter (load 'd.txt' as (k1, k2, k3, v1, v2, v3)) by k2#'f1'#'f' is not null and (v2#'f'#'f1' is not null or v2#'f'#'f2' is not null);" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (k1, k2, k3, v1, v2, v3)) by k2#'f1'#'f' is not null and (v2#'f'#'f1' is not null or v2#'f'#'f2' is not null);" + |
| "store b into 'empty';"; |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| query = "b = filter (load 'd.txt' as (k1, k2, k3, v1, v2, v3)) by k2#'f1'#'f' is not null and (v2#'f1'#'f' is not null or v2#'f2'#'f' is not null);" + |
| |
| "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (k1, k2, k3, v1, v2, v3)) by k2#'f1'#'f' is not null and (v2#'f1'#'f' is not null or v2#'f2'#'f' is not null);" + |
| "store b into 'empty';"; |
| expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| @Test |
| // PIG-1820 |
| public void test8() throws Exception { |
| String query = "b = filter (load 'd.txt' as (a0, a1)) by (a0 is not null or a1 is not null) and IsEmpty(a0);" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| query = "b = filter (load 'd.txt' as (a0, a1)) by (a0 is not null or a1 is not null) and IsEmpty(a0);" + |
| "store b into 'empty';"; |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| |
| @Test |
| // PIG-2137 |
| public void testSimiplificationNonDeterministicUdf() throws Exception { |
| String query = "b = filter (load 'd.txt' as (a0, a1)) by RANDOM() > 0.1 and RANDOM() > 0.1;" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| LOFilter optimizedFilt = (LOFilter) newLogicalPlan.getSuccessors(newLogicalPlan.getSources().get(0)).get(0); |
| |
| //expected query is same as original query, optimizer should not combine |
| // conditions involving non deterministic udfs |
| query = "b = filter (load 'd.txt' as (a0, a1)) by RANDOM() > 0.1 and RANDOM() > 0.1;" + |
| "store b into 'empty';"; |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| LOFilter expectedFilt = (LOFilter) expected.getSuccessors(expected.getSources().get(0)).get(0); |
| assertEquals( |
| "size of filter expression plans", |
| optimizedFilt.getFilterPlan().size(), |
| expectedFilt.getFilterPlan().size() |
| ); |
| |
| } |
| |
| static public class MyFilterFunc extends FilterFunc { |
| |
| @Override |
| public Boolean exec(Tuple input) { |
| return true; |
| } |
| } |
| |
| //PIG-2144 |
| @Test |
| public void testNotConversionUdfArg() throws Exception{ |
| //udf arg should not be changed |
| String query = "b = filter (load 'd.txt' as (a0, a1)) by " + |
| " NOT IsEmpty( " + MyFilterFunc.class.getName() + "(a0,a1));" + |
| "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| //expected plan is same as original plan |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| // PIG-2316 |
| @Test |
| public void testEqualNotEqualWithSameValue() throws Exception { |
| String query = "b = filter (load 'd.txt' as (a0:int, a1:int)) " |
| + "by ((a0 == 1) or (a0 != 1));" |
| + "store b into 'empty';"; |
| LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);; |
| PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| //expected plan is same as original plan |
| LogicalPlan expected = Util.buildLp(pigServer, query);; |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| //swapping == and != |
| query = "b = filter (load 'd.txt' as (a0:int, a1:int)) " |
| + "by ((a0 != 1) or (a0 == 1));" |
| + "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| //expected plan is same as original plan |
| expected = Util.buildLp(pigServer, query);; |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| |
| //more realistic test case which created incorrect output |
| query = "b = filter (load 'd.txt' as (a0:int, a1:int)) " |
| + "by ((a0 == 1 and a1 == 3) or (a0 != 1));" |
| + "store b into 'empty';"; |
| newLogicalPlan = Util.buildLp(pigServer, query);; |
| optimizer = new MyPlanOptimizer(newLogicalPlan, 10); |
| optimizer.optimize(); |
| |
| //expected plan is same as original plan |
| expected = Util.buildLp(pigServer, query);; |
| assertTrue(expected.isEqual(newLogicalPlan)); |
| } |
| |
| public class MyPlanOptimizer extends LogicalPlanOptimizer { |
| |
| protected MyPlanOptimizer(OperatorPlan p, int iterations) { |
| super(p, iterations, null); |
| } |
| |
| protected List<Set<Rule>> buildRuleSets() { |
| List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); |
| |
| Rule r = new LogicalExpressionSimplifier("LogicalPlanSimplifier"); |
| Set<Rule> s = new HashSet<Rule>(); |
| s.add(r); |
| ls.add(s); |
| |
| return ls; |
| } |
| } |
| } |