| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.newplan.logical.optimizer; |
| |
| import java.util.ArrayList; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.pig.newplan.OperatorPlan; |
| import org.apache.pig.newplan.logical.rules.AddForEach; |
| import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune; |
| import org.apache.pig.newplan.logical.rules.DuplicateForEachColumnRewrite; |
| import org.apache.pig.newplan.logical.rules.FilterAboveForeach; |
| import org.apache.pig.newplan.logical.rules.GroupByConstParallelSetter; |
| import org.apache.pig.newplan.logical.rules.ImplicitSplitInserter; |
| import org.apache.pig.newplan.logical.rules.InputOutputFileValidator; |
| import org.apache.pig.newplan.logical.rules.LimitOptimizer; |
| import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter; |
| import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier; |
| import org.apache.pig.newplan.logical.rules.MergeFilter; |
| import org.apache.pig.newplan.logical.rules.MergeForEach; |
| import org.apache.pig.newplan.logical.rules.PartitionFilterOptimizer; |
| import org.apache.pig.newplan.logical.rules.PushDownForEachFlatten; |
| import org.apache.pig.newplan.logical.rules.PushUpFilter; |
| import org.apache.pig.newplan.logical.rules.SplitFilter; |
| import org.apache.pig.newplan.logical.rules.StreamTypeCastInserter; |
| import org.apache.pig.newplan.optimizer.PlanOptimizer; |
| import org.apache.pig.newplan.optimizer.Rule; |
| |
| public class LogicalPlanOptimizer extends PlanOptimizer { |
| private Set<String> mRulesOff = null; |
| |
| public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) { |
| super(p, null, iterations); |
| this.mRulesOff = turnOffRules; |
| ruleSets = buildRuleSets(); |
| addListeners(); |
| } |
| |
| protected List<Set<Rule>> buildRuleSets() { |
| List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); |
| |
| |
| // ImplicitSplitInserter set |
| // This set of rules Insert Foreach dedicated for casting after load |
| Set<Rule> s = new HashSet<Rule>(); |
| Rule r = new ImplicitSplitInserter("ImplicitSplitInserter"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // DuplicateForEachColumnRewrite set |
| // This insert Identity UDF in the case foreach duplicate field. |
| // This is because we need unique uid through out the plan |
| s = new HashSet<Rule>(); |
| r = new DuplicateForEachColumnRewrite("DuplicateForEachColumnRewrite"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Logical expression simplifier |
| s = new HashSet<Rule>(); |
| // add logical expression simplification rule |
| r = new LogicalExpressionSimplifier("FilterLogicExpressionSimplifier"); |
| checkAndAddRule(s, r); |
| ls.add(s); |
| |
| // TypeCastInserter set |
| // This set of rules Insert Foreach dedicated for casting after load |
| s = new HashSet<Rule>(); |
| // add split filter rule |
| r = new LoadTypeCastInserter("LoadTypeCastInserter"); |
| checkAndAddRule(s, r); |
| r = new StreamTypeCastInserter("StreamTypeCastInserter"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Limit Set |
| // This set of rules push up limit |
| s = new HashSet<Rule>(); |
| // Optimize limit |
| r = new LimitOptimizer("LimitOptimizer"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Split Set |
| // This set of rules does splitting of operators only. |
| // It does not move operators |
| s = new HashSet<Rule>(); |
| // add split filter rule |
| r = new SplitFilter("SplitFilter"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| |
| // Push Set, |
| // This set does moving of operators only. |
| s = new HashSet<Rule>(); |
| r = new PushUpFilter("PushUpFilter"); |
| checkAndAddRule(s, r); |
| r = new FilterAboveForeach("PushUpFilter"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Merge Set |
| // This Set merges operators but does not move them. |
| s = new HashSet<Rule>(); |
| checkAndAddRule(s, r); |
| // add merge filter rule |
| r = new MergeFilter("MergeFilter"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Partition filter set |
| // This set of rules push partition filter to LoadFunc |
| s = new HashSet<Rule>(); |
| // Optimize partition filter |
| r = new PartitionFilterOptimizer("PartitionFilterOptimizer"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // PushDownForEachFlatten set |
| s = new HashSet<Rule>(); |
| // Add the PushDownForEachFlatten |
| r = new PushDownForEachFlatten("PushDownForEachFlatten"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Prune Set |
| // This set is used for pruning columns and maps |
| s = new HashSet<Rule>(); |
| // Add the PruneMap Filter |
| r = new ColumnMapKeyPrune("ColumnMapKeyPrune"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Add LOForEach set |
| s = new HashSet<Rule>(); |
| // Add the AddForEach |
| r = new AddForEach("AddForEach"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| // Add MergeForEach set |
| s = new HashSet<Rule>(); |
| // Add the AddForEach |
| r = new MergeForEach("MergeForEach"); |
| checkAndAddRule(s, r); |
| if (!s.isEmpty()) |
| ls.add(s); |
| |
| //set parallism to 1 for cogroup/group-by on constant |
| s = new HashSet<Rule>(); |
| r = new GroupByConstParallelSetter("GroupByConstParallelSetter"); |
| checkAndAddRule(s, r); |
| if(!s.isEmpty()) |
| ls.add(s); |
| |
| return ls; |
| } |
| |
| private void checkAndAddRule(Set<Rule> ruleSet, Rule rule) { |
| if (rule.isMandatory()) { |
| ruleSet.add(rule); |
| return; |
| } |
| |
| boolean turnAllRulesOff = false; |
| if (mRulesOff != null) { |
| for (String ruleName : mRulesOff) { |
| if ("all".equalsIgnoreCase(ruleName)) { |
| turnAllRulesOff = true; |
| break; |
| } |
| } |
| } |
| |
| if (turnAllRulesOff) return; |
| |
| if(mRulesOff != null) { |
| for(String ruleOff: mRulesOff) { |
| String ruleName = rule.getName(); |
| if(ruleName == null) continue; |
| if(ruleName.equalsIgnoreCase(ruleOff)) return; |
| } |
| } |
| |
| ruleSet.add(rule); |
| } |
| |
| private void addListeners() { |
| addPlanTransformListener(new SchemaPatcher()); |
| addPlanTransformListener(new ProjectionPatcher()); |
| } |
| } |