src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java - pig - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.pig.newplan.logical.optimizer;

 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;

 import org.apache.pig.newplan.OperatorPlan;
 import org.apache.pig.newplan.logical.rules.AddForEach;
 import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
 import org.apache.pig.newplan.logical.rules.DuplicateForEachColumnRewrite;
 import org.apache.pig.newplan.logical.rules.FilterAboveForeach;
 import org.apache.pig.newplan.logical.rules.GroupByConstParallelSetter;
 import org.apache.pig.newplan.logical.rules.ImplicitSplitInserter;
 import org.apache.pig.newplan.logical.rules.InputOutputFileValidator;
 import org.apache.pig.newplan.logical.rules.LimitOptimizer;
 import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter;
 import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier;
 import org.apache.pig.newplan.logical.rules.MergeFilter;
 import org.apache.pig.newplan.logical.rules.MergeForEach;
 import org.apache.pig.newplan.logical.rules.PartitionFilterOptimizer;
 import org.apache.pig.newplan.logical.rules.PushDownForEachFlatten;
 import org.apache.pig.newplan.logical.rules.PushUpFilter;
 import org.apache.pig.newplan.logical.rules.SplitFilter;
 import org.apache.pig.newplan.logical.rules.StreamTypeCastInserter;
 import org.apache.pig.newplan.optimizer.PlanOptimizer;
 import org.apache.pig.newplan.optimizer.Rule;

 public class LogicalPlanOptimizer extends PlanOptimizer {
     private Set<String> mRulesOff = null;

     public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) {
         super(p, null, iterations);
         this.mRulesOff = turnOffRules;
         ruleSets = buildRuleSets();
         addListeners();
     }

     protected List<Set<Rule>> buildRuleSets() {
         List<Set<Rule>> ls = new ArrayList<Set<Rule>>();


         // ImplicitSplitInserter set
         // This set of rules Insert Foreach dedicated for casting after load
         Set<Rule> s = new HashSet<Rule>();
         Rule r = new ImplicitSplitInserter("ImplicitSplitInserter");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // DuplicateForEachColumnRewrite set
         // This insert Identity UDF in the case foreach duplicate field.
         // This is because we need unique uid through out the plan
         s = new HashSet<Rule>();
         r = new DuplicateForEachColumnRewrite("DuplicateForEachColumnRewrite");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Logical expression simplifier
         s = new HashSet<Rule>();
         // add logical expression simplification rule
         r = new LogicalExpressionSimplifier("FilterLogicExpressionSimplifier");
         checkAndAddRule(s, r);
         ls.add(s);

         // TypeCastInserter set
         // This set of rules Insert Foreach dedicated for casting after load
         s = new HashSet<Rule>();
         // add split filter rule
         r = new LoadTypeCastInserter("LoadTypeCastInserter");
         checkAndAddRule(s, r);
         r = new StreamTypeCastInserter("StreamTypeCastInserter");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Limit Set
         // This set of rules push up limit
         s = new HashSet<Rule>();
         // Optimize limit
         r = new LimitOptimizer("LimitOptimizer");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Split Set
         // This set of rules does splitting of operators only.
         // It does not move operators
         s = new HashSet<Rule>();
         // add split filter rule
         r = new SplitFilter("SplitFilter");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);


         // Push Set,
         // This set does moving of operators only.
         s = new HashSet<Rule>();
         r = new PushUpFilter("PushUpFilter");
         checkAndAddRule(s, r);
         r = new FilterAboveForeach("PushUpFilter");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Merge Set
         // This Set merges operators but does not move them.
         s = new HashSet<Rule>();
         checkAndAddRule(s, r);
         // add merge filter rule
         r = new MergeFilter("MergeFilter");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Partition filter set
         // This set of rules push partition filter to LoadFunc
         s = new HashSet<Rule>();
         // Optimize partition filter
         r = new PartitionFilterOptimizer("PartitionFilterOptimizer");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // PushDownForEachFlatten set
         s = new HashSet<Rule>();
         // Add the PushDownForEachFlatten
         r = new PushDownForEachFlatten("PushDownForEachFlatten");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Prune Set
         // This set is used for pruning columns and maps
         s = new HashSet<Rule>();
         // Add the PruneMap Filter
         r = new ColumnMapKeyPrune("ColumnMapKeyPrune");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Add LOForEach set
         s = new HashSet<Rule>();
         // Add the AddForEach
         r = new AddForEach("AddForEach");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         // Add MergeForEach set
         s = new HashSet<Rule>();
         // Add the AddForEach
         r = new MergeForEach("MergeForEach");
         checkAndAddRule(s, r);
         if (!s.isEmpty())
             ls.add(s);

         //set parallism to 1 for cogroup/group-by on constant
         s = new HashSet<Rule>();
         r = new GroupByConstParallelSetter("GroupByConstParallelSetter");
         checkAndAddRule(s, r);
         if(!s.isEmpty())
             ls.add(s);

         return ls;
     }

     private void checkAndAddRule(Set<Rule> ruleSet, Rule rule) {
         if (rule.isMandatory()) {
             ruleSet.add(rule);
             return;
         }

         boolean turnAllRulesOff = false;
         if (mRulesOff != null) {
             for (String ruleName : mRulesOff) {
                 if ("all".equalsIgnoreCase(ruleName)) {
                     turnAllRulesOff = true;
                     break;
                 }
             }
         }

         if (turnAllRulesOff) return;

         if(mRulesOff != null) {
             for(String ruleOff: mRulesOff) {
                 String ruleName = rule.getName();
                 if(ruleName == null) continue;
                 if(ruleName.equalsIgnoreCase(ruleOff)) return;
             }
         }

         ruleSet.add(rule);
     }

     private void addListeners() {
         addPlanTransformListener(new SchemaPatcher());
         addPlanTransformListener(new ProjectionPatcher());
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.pig.newplan.logical.optimizer;

	import java.util.ArrayList;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Set;

	import org.apache.pig.newplan.OperatorPlan;
	import org.apache.pig.newplan.logical.rules.AddForEach;
	import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
	import org.apache.pig.newplan.logical.rules.DuplicateForEachColumnRewrite;
	import org.apache.pig.newplan.logical.rules.FilterAboveForeach;
	import org.apache.pig.newplan.logical.rules.GroupByConstParallelSetter;
	import org.apache.pig.newplan.logical.rules.ImplicitSplitInserter;
	import org.apache.pig.newplan.logical.rules.InputOutputFileValidator;
	import org.apache.pig.newplan.logical.rules.LimitOptimizer;
	import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter;
	import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier;
	import org.apache.pig.newplan.logical.rules.MergeFilter;
	import org.apache.pig.newplan.logical.rules.MergeForEach;
	import org.apache.pig.newplan.logical.rules.PartitionFilterOptimizer;
	import org.apache.pig.newplan.logical.rules.PushDownForEachFlatten;
	import org.apache.pig.newplan.logical.rules.PushUpFilter;
	import org.apache.pig.newplan.logical.rules.SplitFilter;
	import org.apache.pig.newplan.logical.rules.StreamTypeCastInserter;
	import org.apache.pig.newplan.optimizer.PlanOptimizer;
	import org.apache.pig.newplan.optimizer.Rule;

	public class LogicalPlanOptimizer extends PlanOptimizer {
	private Set<String> mRulesOff = null;

	public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) {
	super(p, null, iterations);
	this.mRulesOff = turnOffRules;
	ruleSets = buildRuleSets();
	addListeners();
	}

	protected List<Set<Rule>> buildRuleSets() {
	List<Set<Rule>> ls = new ArrayList<Set<Rule>>();


	// ImplicitSplitInserter set
	// This set of rules Insert Foreach dedicated for casting after load
	Set<Rule> s = new HashSet<Rule>();
	Rule r = new ImplicitSplitInserter("ImplicitSplitInserter");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// DuplicateForEachColumnRewrite set
	// This insert Identity UDF in the case foreach duplicate field.
	// This is because we need unique uid through out the plan
	s = new HashSet<Rule>();
	r = new DuplicateForEachColumnRewrite("DuplicateForEachColumnRewrite");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Logical expression simplifier
	s = new HashSet<Rule>();
	// add logical expression simplification rule
	r = new LogicalExpressionSimplifier("FilterLogicExpressionSimplifier");
	checkAndAddRule(s, r);
	ls.add(s);

	// TypeCastInserter set
	// This set of rules Insert Foreach dedicated for casting after load
	s = new HashSet<Rule>();
	// add split filter rule
	r = new LoadTypeCastInserter("LoadTypeCastInserter");
	checkAndAddRule(s, r);
	r = new StreamTypeCastInserter("StreamTypeCastInserter");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Limit Set
	// This set of rules push up limit
	s = new HashSet<Rule>();
	// Optimize limit
	r = new LimitOptimizer("LimitOptimizer");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Split Set
	// This set of rules does splitting of operators only.
	// It does not move operators
	s = new HashSet<Rule>();
	// add split filter rule
	r = new SplitFilter("SplitFilter");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);


	// Push Set,
	// This set does moving of operators only.
	s = new HashSet<Rule>();
	r = new PushUpFilter("PushUpFilter");
	checkAndAddRule(s, r);
	r = new FilterAboveForeach("PushUpFilter");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Merge Set
	// This Set merges operators but does not move them.
	s = new HashSet<Rule>();
	checkAndAddRule(s, r);
	// add merge filter rule
	r = new MergeFilter("MergeFilter");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Partition filter set
	// This set of rules push partition filter to LoadFunc
	s = new HashSet<Rule>();
	// Optimize partition filter
	r = new PartitionFilterOptimizer("PartitionFilterOptimizer");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// PushDownForEachFlatten set
	s = new HashSet<Rule>();
	// Add the PushDownForEachFlatten
	r = new PushDownForEachFlatten("PushDownForEachFlatten");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Prune Set
	// This set is used for pruning columns and maps
	s = new HashSet<Rule>();
	// Add the PruneMap Filter
	r = new ColumnMapKeyPrune("ColumnMapKeyPrune");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Add LOForEach set
	s = new HashSet<Rule>();
	// Add the AddForEach
	r = new AddForEach("AddForEach");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	// Add MergeForEach set
	s = new HashSet<Rule>();
	// Add the AddForEach
	r = new MergeForEach("MergeForEach");
	checkAndAddRule(s, r);
	if (!s.isEmpty())
	ls.add(s);

	//set parallism to 1 for cogroup/group-by on constant
	s = new HashSet<Rule>();
	r = new GroupByConstParallelSetter("GroupByConstParallelSetter");
	checkAndAddRule(s, r);
	if(!s.isEmpty())
	ls.add(s);

	return ls;
	}

	private void checkAndAddRule(Set<Rule> ruleSet, Rule rule) {
	if (rule.isMandatory()) {
	ruleSet.add(rule);
	return;
	}

	boolean turnAllRulesOff = false;
	if (mRulesOff != null) {
	for (String ruleName : mRulesOff) {
	if ("all".equalsIgnoreCase(ruleName)) {
	turnAllRulesOff = true;
	break;
	}
	}
	}

	if (turnAllRulesOff) return;

	if(mRulesOff != null) {
	for(String ruleOff: mRulesOff) {
	String ruleName = rule.getName();
	if(ruleName == null) continue;
	if(ruleName.equalsIgnoreCase(ruleOff)) return;
	}
	}

	ruleSet.add(rule);
	}

	private void addListeners() {
	addPlanTransformListener(new SchemaPatcher());
	addPlanTransformListener(new ProjectionPatcher());
	}
	}