| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.pig.pen; |
| |
| import java.util.Collection; |
| import java.util.LinkedList; |
| import java.util.Map; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; |
| import org.apache.pig.data.DataBag; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.newplan.logical.relational.LOForEach; |
| import org.apache.pig.newplan.logical.relational.LOCross; |
| import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator; |
| import org.apache.pig.newplan.logical.relational.LogicalPlan; |
| import org.apache.pig.newplan.Operator; |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; |
| import org.apache.pig.impl.util.IdentityHashSet; |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.*; |
| |
| |
| //These methods are used to generate equivalence classes given the operator name and the output from the operator |
| //For example, it gives out 2 eq. classes for filter, one that passes the filter and one that doesn't |
| public class EquivalenceClasses { |
| |
| public static Map<LogicalRelationalOperator, Collection<IdentityHashSet<Tuple>>> getLoToEqClassMap(PhysicalPlan plan, |
| LogicalPlan lp, Map<Operator, PhysicalOperator> logToPhyMap, |
| Map<Operator, DataBag> logToDataMap, |
| Map<LOForEach, Map<LogicalRelationalOperator, PhysicalOperator>> forEachInnerLogToPhyMap, |
| final HashMap<PhysicalOperator, Collection<IdentityHashSet<Tuple>>> poToEqclassesMap) |
| { |
| Map<LogicalRelationalOperator, Collection<IdentityHashSet<Tuple>>> ret = |
| new HashMap<LogicalRelationalOperator, Collection<IdentityHashSet<Tuple>>>(); |
| List<Operator> roots = lp.getSources(); |
| HashSet<Operator> seen = new HashSet<Operator>(); |
| for(Operator lo: roots) { |
| getEqClasses(plan, lo, lp, logToPhyMap, ret, poToEqclassesMap, logToDataMap, forEachInnerLogToPhyMap, seen); |
| } |
| return ret; |
| } |
| |
| private static void getEqClasses(PhysicalPlan plan, Operator parent, LogicalPlan lp, |
| Map<Operator, PhysicalOperator> logToPhyMap, Map<LogicalRelationalOperator, |
| Collection<IdentityHashSet<Tuple>>> result, |
| final HashMap<PhysicalOperator, Collection<IdentityHashSet<Tuple>>> poToEqclassesMap, |
| Map<Operator, DataBag> logToDataMap, |
| Map<LOForEach, Map<LogicalRelationalOperator, PhysicalOperator>> forEachInnerLogToPhyMap, |
| HashSet<Operator> seen) { |
| if (parent instanceof LOForEach) { |
| if (poToEqclassesMap.get(logToPhyMap.get(parent)) != null) { |
| LinkedList<IdentityHashSet<Tuple>> eqClasses = new LinkedList<IdentityHashSet<Tuple>>(); |
| eqClasses.addAll(poToEqclassesMap.get(logToPhyMap.get(parent))); |
| for (Map.Entry<LogicalRelationalOperator, PhysicalOperator> entry : forEachInnerLogToPhyMap.get(parent).entrySet()) { |
| if (poToEqclassesMap.get(entry.getValue()) != null) |
| eqClasses.addAll(poToEqclassesMap.get(entry.getValue())); |
| } |
| result.put((LogicalRelationalOperator) parent, eqClasses); |
| } |
| } else if (parent instanceof LOCross) { |
| boolean ok = true; |
| for (Operator input : ((LOCross) parent).getInputs()) { |
| if (logToDataMap.get(input).size() < 2) { |
| // only if all inputs have at least more than two tuples will all outputs be added to the eq. class |
| ok = false; |
| break; |
| } |
| } |
| if (ok) { |
| LinkedList<IdentityHashSet<Tuple>> eqClasses = new LinkedList<IdentityHashSet<Tuple>>(); |
| IdentityHashSet<Tuple> eqClass = new IdentityHashSet<Tuple>(); |
| for (Iterator<Tuple> it = logToDataMap.get(parent).iterator(); it.hasNext();) { |
| eqClass.add(it.next()); |
| } |
| eqClasses.add(eqClass); |
| result.put((LogicalRelationalOperator) parent, eqClasses); |
| } else { |
| LinkedList<IdentityHashSet<Tuple>> eqClasses = new LinkedList<IdentityHashSet<Tuple>>(); |
| IdentityHashSet<Tuple> eqClass = new IdentityHashSet<Tuple>(); |
| eqClasses.add(eqClass); |
| result.put((LogicalRelationalOperator)parent, eqClasses); |
| } |
| } else { |
| Collection<IdentityHashSet<Tuple>> eqClasses = poToEqclassesMap.get(logToPhyMap.get(parent)); |
| if (eqClasses == null) { |
| eqClasses = new LinkedList<IdentityHashSet<Tuple>>(); |
| int size = ((POPackage)logToPhyMap.get(parent)).getNumInps(); |
| for (int i = 0; i < size; i++) { |
| eqClasses.add(new IdentityHashSet<Tuple>()); |
| } |
| } |
| result.put((LogicalRelationalOperator)parent, eqClasses); |
| } |
| // result.put(parent, getEquivalenceClasses(plan, parent, lp, logToPhyMap, poToEqclassesMap)); |
| if (lp.getSuccessors(parent) != null) { |
| for (Operator lo : lp.getSuccessors(parent)) { |
| if (!seen.contains(lo)) { |
| seen.add(lo); |
| getEqClasses(plan, lo, lp, logToPhyMap, result, poToEqclassesMap, logToDataMap, forEachInnerLogToPhyMap, seen); |
| } |
| } |
| } |
| } |
| } |