| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.pig.newplan.logical.relational; |
| |
| import java.util.ArrayList; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.pig.PigException; |
| import org.apache.pig.impl.logicalLayer.FrontendException; |
| import org.apache.pig.impl.util.Pair; |
| import org.apache.pig.newplan.Operator; |
| import org.apache.pig.newplan.OperatorPlan; |
| import org.apache.pig.newplan.PlanVisitor; |
| import org.apache.pig.newplan.logical.expression.LogicalExpression; |
| import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema; |
| |
| public class LOUnion extends LogicalRelationalOperator { |
| private boolean onSchema; |
| |
| // uid mapping from output uid to input uid |
| private List<Pair<Long, Long>> uidMapping = new ArrayList<Pair<Long, Long>>(); |
| |
| public LOUnion(OperatorPlan plan) { |
| super("LOUnion", plan); |
| } |
| |
| public LOUnion(OperatorPlan plan, boolean onSchema) { |
| this( plan ); |
| this.onSchema = onSchema; |
| } |
| |
| public boolean isOnSchema() { |
| return onSchema; |
| } |
| |
| @Override |
| public LogicalSchema getSchema() throws FrontendException { |
| if (schema != null) { |
| return schema; |
| } |
| |
| List<Operator> inputs = plan.getPredecessors(this); |
| // If any predecessor's schema is null, then the schema for union is null |
| for (Operator input : inputs) { |
| LogicalRelationalOperator op = (LogicalRelationalOperator)input; |
| if( op.getSchema() == null ) { |
| if( isOnSchema() ) { |
| String msg = "Schema of relation " + op.getAlias() |
| + " is null." |
| + " UNION ONSCHEMA cannot be used with relations that" |
| + " have null schema."; |
| throw new FrontendException(this, msg, 1116, PigException.INPUT); |
| |
| } else { |
| return null; |
| } |
| } |
| } |
| |
| LogicalSchema mergedSchema = null; |
| if ( inputs.size() == 1 ) |
| return schema = ((LogicalRelationalOperator)inputs.get(0)).getSchema(); |
| |
| List<String> inputAliases = new ArrayList<String>(inputs.size()); |
| List<LogicalSchema> inputSchemas = new ArrayList<LogicalSchema>(inputs.size()); |
| for (Operator input : inputs) { |
| LogicalRelationalOperator lop = (LogicalRelationalOperator)input; |
| inputAliases.add(lop.getAlias()); |
| inputSchemas.add(lop.getSchema()); |
| } |
| |
| if( isOnSchema() ) { |
| mergedSchema = createMergedSchemaOnAlias( inputSchemas, inputAliases ); |
| } else { |
| LogicalSchema s0 = inputSchemas.get(0); |
| LogicalSchema s1 = inputSchemas.get(1); |
| mergedSchema = LogicalSchema.merge(s0, s1, LogicalSchema.MergeMode.Union); |
| if (mergedSchema==null) |
| return null; |
| |
| // Merge schema |
| for (int i=2;i<inputSchemas.size();i++) { |
| LogicalSchema otherSchema = inputSchemas.get(i); |
| if (mergedSchema==null || otherSchema==null) |
| return null; |
| mergedSchema = LogicalSchema.merge(mergedSchema, otherSchema, LogicalSchema.MergeMode.Union); |
| if (mergedSchema == null) |
| return null; |
| } |
| } |
| |
| // Bring back cached uid if any; otherwise, cache uid generated |
| setMergedSchemaUids(mergedSchema, inputSchemas); |
| |
| return schema = mergedSchema; |
| } |
| |
| /** |
| * create schema for union-onschema |
| */ |
| private LogicalSchema createMergedSchemaOnAlias(List<LogicalSchema> inputSchemas, |
| List<String> inputAliases) |
| throws FrontendException { |
| ArrayList<LogicalSchema> schemas = new ArrayList<LogicalSchema>(); |
| for (int i = 0; i < inputSchemas.size(); i++){ |
| LogicalSchema sch = inputSchemas.get(i); |
| for( LogicalFieldSchema fs : sch.getFields() ) { |
| if(fs.alias == null){ |
| String msg = "Schema of relation " + inputAliases.get(i) |
| + " has a null fieldschema for column(s). Schema :" + sch.toString(false); |
| throw new FrontendException( this, msg, 1116, PigException.INPUT ); |
| } |
| } |
| schemas.add( sch ); |
| } |
| |
| //create the merged schema |
| LogicalSchema mergedSchema = null; |
| try { |
| mergedSchema = LogicalSchema.mergeSchemasByAlias( schemas ); |
| } catch(FrontendException e) { |
| String msg = "Error merging schemas for union operator : " |
| + e.getMessage(); |
| throw new FrontendException(this, msg, 1116, PigException.INPUT, e); |
| } |
| |
| return mergedSchema; |
| } |
| |
| private void setMergedSchemaUids(LogicalSchema mergedSchema, List<LogicalSchema> inputSchemas) |
| throws FrontendException { |
| |
| for (int i=0;i<mergedSchema.size();i++) { |
| LogicalSchema.LogicalFieldSchema outputFieldSchema = mergedSchema.getField(i); |
| |
| long uid = -1; |
| List<LogicalSchema> fieldInputSchemas = new ArrayList<>(inputSchemas.size()); |
| |
| // Search all the cached uid mappings by input field to see if |
| // we've cached an output uid for this output field |
| for (LogicalSchema inputSchema : inputSchemas) { |
| LogicalSchema.LogicalFieldSchema inputFieldSchema; |
| if (onSchema) { |
| inputFieldSchema = inputSchema.getFieldSubNameMatch(outputFieldSchema.alias); |
| } else { |
| inputFieldSchema = inputSchema.getField(i); |
| } |
| |
| if (inputFieldSchema != null) { |
| if (inputFieldSchema.schema != null) { |
| fieldInputSchemas.add(inputFieldSchema.schema); |
| } |
| |
| if (uid < 0) { |
| uid = getCachedOuputUid(inputFieldSchema.uid); |
| if (uid >= 0 && outputFieldSchema.schema == null) break; |
| } |
| } |
| } |
| |
| // No cached uid. Allocate one, and locate and cache all inputs. |
| if (uid==-1) { |
| uid = LogicalExpression.getNextUid(); |
| for (LogicalSchema inputSchema : inputSchemas) { |
| long inputUid; |
| LogicalFieldSchema matchedInputFieldSchema; |
| if (onSchema) { |
| matchedInputFieldSchema = inputSchema.getFieldSubNameMatch(mergedSchema.getField(i).alias); |
| if (matchedInputFieldSchema!=null) { |
| inputUid = matchedInputFieldSchema.uid; |
| uidMapping.add(new Pair<Long, Long>(uid, inputUid)); |
| } |
| } |
| else { |
| matchedInputFieldSchema = mergedSchema.getField(i); |
| inputUid = inputSchema.getField(i).uid; |
| uidMapping.add(new Pair<Long, Long>(uid, inputUid)); |
| } |
| } |
| } |
| |
| outputFieldSchema.uid = uid; |
| |
| // This field has a schema. Assign uids to it as well |
| if (outputFieldSchema.schema != null) { |
| setMergedSchemaUids(outputFieldSchema.schema, fieldInputSchemas); |
| } |
| } |
| } |
| |
| private long getCachedOuputUid(long inputUid) { |
| long uid = -1; |
| |
| for (Pair<Long, Long> pair : uidMapping) { |
| if (pair.second==inputUid) { |
| uid = pair.first; |
| break; |
| } |
| } |
| |
| return uid; |
| } |
| |
| @Override |
| public void accept(PlanVisitor v) throws FrontendException { |
| if (!(v instanceof LogicalRelationalNodesVisitor)) { |
| throw new FrontendException("Expected LogicalPlanVisitor", 2223); |
| } |
| ((LogicalRelationalNodesVisitor)v).visit(this); |
| } |
| |
| @Override |
| public boolean isEqual(Operator other) throws FrontendException { |
| if (other != null && other instanceof LOUnion) { |
| return checkEquality((LOUnion)other); |
| } else { |
| return false; |
| } |
| } |
| |
| // Get input uids mapping to the output uid |
| public Set<Long> getInputUids(long uid) { |
| Set<Long> result = new HashSet<Long>(); |
| for (Pair<Long, Long> pair : uidMapping) { |
| if (pair.first==uid) |
| result.add(pair.second); |
| } |
| return result; |
| } |
| |
| @Override |
| public void resetUid() { |
| uidMapping = new ArrayList<Pair<Long, Long>>(); |
| } |
| |
| public List<Operator> getInputs() { |
| return plan.getPredecessors(this); |
| } |
| |
| public List<Operator> getInputs(LogicalPlan plan) { |
| return plan.getPredecessors(this); |
| } |
| |
| public void setUnionOnSchema(boolean flag) { |
| onSchema = flag; |
| } |
| } |