exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.physical.impl.join;

 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.expression.FieldReference;
 import org.apache.drill.common.expression.PathSegment;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.logical.data.JoinCondition;
 import org.apache.drill.common.logical.data.NamedExpression;
 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.exception.OutOfMemoryException;
 import org.apache.drill.exec.exception.SchemaChangeException;
 import org.apache.drill.exec.ops.FragmentContext;
 import org.apache.drill.exec.physical.config.HashJoinPOP;
 import org.apache.drill.exec.physical.impl.common.Comparator;
 import org.apache.drill.exec.physical.impl.common.HashTable;
 import org.apache.drill.exec.physical.impl.common.HashTableConfig;
 import org.apache.drill.exec.planner.common.JoinControl;
 import org.apache.drill.exec.record.BatchSchema;
 import org.apache.drill.exec.record.RecordBatch;
 import org.apache.drill.exec.record.VectorContainer;
 import org.apache.drill.exec.record.VectorWrapper;
 import org.apache.drill.exec.vector.ValueVector;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;

 import java.util.ArrayList;
 import java.util.List;

 /**
  * Implements the runtime execution for the Hash-Join operator supporting INNER,
  * LEFT OUTER, RIGHT OUTER, and FULL OUTER joins
  */
 public class HashJoinBatch extends AbstractHashBinaryRecordBatch<HashJoinPOP>
     implements RowKeyJoin {
   // Join type, INNER, LEFT, RIGHT or OUTER
   private final JoinRelType joinType;
   // Join conditions
   private final List<JoinCondition> conditions;
   private final JoinControl joinControl;

   /**
    * The constructor
    *
    * @param popConfig HashJoinPOP
    * @param context FragmentContext
    * @param left probe/outer side incoming input
    * @param right build/iner side incoming input
    * @throws OutOfMemoryException out of memory exception
    */
   public HashJoinBatch(HashJoinPOP popConfig, FragmentContext context,
       RecordBatch left, /* Probe side record batch */
       RecordBatch right /* Build side record batch */
   ) throws OutOfMemoryException {
     super(popConfig, context, left, right);
     joinType = popConfig.getJoinType();
     conditions = popConfig.getConditions();
     this.joinControl = new JoinControl(popConfig.getJoinControl());

     semiJoin = popConfig.isSemiJoin();
     joinIsLeftOrFull = joinType == JoinRelType.LEFT
       || joinType == JoinRelType.FULL;
     joinIsRightOrFull = joinType == JoinRelType.RIGHT
       || joinType == JoinRelType.FULL;
     this.isRowKeyJoin = popConfig.isRowKeyJoin();

     for (int i = 0; i < conditions.size(); i++) {
       SchemaPath rightPath = (SchemaPath) conditions.get(i).getRight();
       PathSegment.NameSegment nameSegment = (PathSegment.NameSegment) rightPath
         .getLastSegment();
       buildJoinColumns.add(nameSegment.getPath());
       String refName = "build_side_" + i;
       rightExpr.add(new NamedExpression(conditions.get(i).getRight(),
         new FieldReference(refName)));
     }

     runtimeFilterDef = popConfig.getRuntimeFilterDef();
     enableRuntimeFilter = context.getOptions().getOption(ExecConstants.HASHJOIN_ENABLE_RUNTIME_FILTER)
       && runtimeFilterDef != null;
   }

   @Override
   public Probe createProbe() {
     // No real code generation !!
     return new HashJoinProbeTemplate();
   }

   @Override
   public void setupProbe() throws SchemaChangeException {
     probe.setup(probeBatch, this, joinType,
       semiJoin, leftUpstream, partitions, spilledState.getCycle(),
       container, spilledInners, buildSideIsEmpty.booleanValue(),
       numPartitions, rightHVColPosition);
   }

   @Override
   protected HashTableConfig buildHashTableConfig() {
     List<Comparator> comparators = Lists
       .newArrayListWithExpectedSize(conditions.size());
     conditions.forEach(cond -> comparators
       .add(JoinUtils.checkAndReturnSupportedJoinComparator(cond)));

     // Setup the hash table configuration object
     List<NamedExpression> leftExpr = new ArrayList<>(conditions.size());

     // Create named expressions from the conditions
     for (int i = 0; i < conditions.size(); i++) {
       leftExpr.add(new NamedExpression(conditions.get(i).getLeft(),
         new FieldReference("probe_side_" + i)));
     }

     // Set the left named expression to be null if the probe batch is empty.
     if (leftUpstream != IterOutcome.OK_NEW_SCHEMA
       && leftUpstream != IterOutcome.OK) {
       leftExpr = null;
     } else {
       if (probeBatch.getSchema()
         .getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
         throw UserException.internalError(null).message(
             "Hash join does not support probe batch with selection vectors.")
           .addContext("Probe batch has selection mode",
             (probeBatch.getSchema().getSelectionVectorMode()).toString())
           .build(logger);
       }
     }

     return new HashTableConfig(
       (int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE),
       true, HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators,
       joinControl.asInt(), false);
   }

   @Override
   public void dump() {
     logger.error(
         "HashJoinBatch[container={}, left={}, right={}, leftOutcome={}, rightOutcome={}, joinType={}, hashJoinProbe={},"
             + " rightExpr={}, canSpill={}, buildSchema={}, probeSchema={}]",
         container, left, right, leftUpstream, rightUpstream, joinType,
       probe, rightExpr, canSpill, buildSchema, probeSchema);
   }

   @Override // implement RowKeyJoin interface
   public boolean hasRowKeyBatch() {
     return buildComplete;
   }

   @Override // implement RowKeyJoin interface
   public BatchState getBatchState() {
     return state;
   }

   @Override // implement RowKeyJoin interface
   public void setBatchState(BatchState newState) {
     state = newState;
   }

   @Override
   public void setRowKeyJoinState(RowKeyJoin.RowKeyJoinState newState) {
     this.rkJoinState = newState;
   }

   @Override
   public RowKeyJoin.RowKeyJoinState getRowKeyJoinState() {
     return rkJoinState;
   }

   /**
    * Get the hash table iterator that is created for the build side of the hash
    * join if this hash join was instantiated as a row-key join.
    *
    * @return hash table iterator or null if this hash join was not a row-key
    *         join or if it was a row-key join but the build has not yet
    *         completed.
    */
   @Override
   public Pair<ValueVector, Integer> nextRowKeyBatch() {
     if (buildComplete) {
       // partition 0 because Row Key Join has only a single partition - no
       // spilling
       Pair<VectorContainer, Integer> pp = partitions[0].nextBatch();
       if (pp != null) {
         VectorWrapper<?> vw = Iterables.get(pp.getLeft(), 0);
         ValueVector vv = vw.getValueVector();
         return Pair.of(vv, pp.getRight());
       }
     } else if (partitions == null && firstOutputBatch) { // if there is data
       // coming to
       // right(build) side in
       // build Schema stage,
       // use it.
       firstOutputBatch = false;
       if (right.getRecordCount() > 0) {
         VectorWrapper<?> vw = Iterables.get(right, 0);
         ValueVector vv = vw.getValueVector();
         return Pair.of(vv, right.getRecordCount() - 1);
       }
     }
     return null;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.drill.exec.physical.impl.join;

	import org.apache.calcite.rel.core.JoinRelType;
	import org.apache.commons.lang3.tuple.Pair;
	import org.apache.drill.common.exceptions.UserException;
	import org.apache.drill.common.expression.FieldReference;
	import org.apache.drill.common.expression.PathSegment;
	import org.apache.drill.common.expression.SchemaPath;
	import org.apache.drill.common.logical.data.JoinCondition;
	import org.apache.drill.common.logical.data.NamedExpression;
	import org.apache.drill.exec.ExecConstants;
	import org.apache.drill.exec.exception.OutOfMemoryException;
	import org.apache.drill.exec.exception.SchemaChangeException;
	import org.apache.drill.exec.ops.FragmentContext;
	import org.apache.drill.exec.physical.config.HashJoinPOP;
	import org.apache.drill.exec.physical.impl.common.Comparator;
	import org.apache.drill.exec.physical.impl.common.HashTable;
	import org.apache.drill.exec.physical.impl.common.HashTableConfig;
	import org.apache.drill.exec.planner.common.JoinControl;
	import org.apache.drill.exec.record.BatchSchema;
	import org.apache.drill.exec.record.RecordBatch;
	import org.apache.drill.exec.record.VectorContainer;
	import org.apache.drill.exec.record.VectorWrapper;
	import org.apache.drill.exec.vector.ValueVector;
	import com.google.common.collect.Iterables;
	import com.google.common.collect.Lists;

	import java.util.ArrayList;
	import java.util.List;

	/**
	* Implements the runtime execution for the Hash-Join operator supporting INNER,
	* LEFT OUTER, RIGHT OUTER, and FULL OUTER joins
	*/
	public class HashJoinBatch extends AbstractHashBinaryRecordBatch<HashJoinPOP>
	implements RowKeyJoin {
	// Join type, INNER, LEFT, RIGHT or OUTER
	private final JoinRelType joinType;
	// Join conditions
	private final List<JoinCondition> conditions;
	private final JoinControl joinControl;

	/**
	* The constructor
	*
	* @param popConfig HashJoinPOP
	* @param context FragmentContext
	* @param left probe/outer side incoming input
	* @param right build/iner side incoming input
	* @throws OutOfMemoryException out of memory exception
	*/
	public HashJoinBatch(HashJoinPOP popConfig, FragmentContext context,
	RecordBatch left, /* Probe side record batch */
	RecordBatch right /* Build side record batch */
	) throws OutOfMemoryException {
	super(popConfig, context, left, right);
	joinType = popConfig.getJoinType();
	conditions = popConfig.getConditions();
	this.joinControl = new JoinControl(popConfig.getJoinControl());

	semiJoin = popConfig.isSemiJoin();
	joinIsLeftOrFull = joinType == JoinRelType.LEFT
	\|\| joinType == JoinRelType.FULL;
	joinIsRightOrFull = joinType == JoinRelType.RIGHT
	\|\| joinType == JoinRelType.FULL;
	this.isRowKeyJoin = popConfig.isRowKeyJoin();

	for (int i = 0; i < conditions.size(); i++) {
	SchemaPath rightPath = (SchemaPath) conditions.get(i).getRight();
	PathSegment.NameSegment nameSegment = (PathSegment.NameSegment) rightPath
	.getLastSegment();
	buildJoinColumns.add(nameSegment.getPath());
	String refName = "build_side_" + i;
	rightExpr.add(new NamedExpression(conditions.get(i).getRight(),
	new FieldReference(refName)));
	}

	runtimeFilterDef = popConfig.getRuntimeFilterDef();
	enableRuntimeFilter = context.getOptions().getOption(ExecConstants.HASHJOIN_ENABLE_RUNTIME_FILTER)
	&& runtimeFilterDef != null;
	}

	@Override
	public Probe createProbe() {
	// No real code generation !!
	return new HashJoinProbeTemplate();
	}

	@Override
	public void setupProbe() throws SchemaChangeException {
	probe.setup(probeBatch, this, joinType,
	semiJoin, leftUpstream, partitions, spilledState.getCycle(),
	container, spilledInners, buildSideIsEmpty.booleanValue(),
	numPartitions, rightHVColPosition);
	}

	@Override
	protected HashTableConfig buildHashTableConfig() {
	List<Comparator> comparators = Lists
	.newArrayListWithExpectedSize(conditions.size());
	conditions.forEach(cond -> comparators
	.add(JoinUtils.checkAndReturnSupportedJoinComparator(cond)));

	// Setup the hash table configuration object
	List<NamedExpression> leftExpr = new ArrayList<>(conditions.size());

	// Create named expressions from the conditions
	for (int i = 0; i < conditions.size(); i++) {
	leftExpr.add(new NamedExpression(conditions.get(i).getLeft(),
	new FieldReference("probe_side_" + i)));
	}

	// Set the left named expression to be null if the probe batch is empty.
	if (leftUpstream != IterOutcome.OK_NEW_SCHEMA
	&& leftUpstream != IterOutcome.OK) {
	leftExpr = null;
	} else {
	if (probeBatch.getSchema()
	.getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
	throw UserException.internalError(null).message(
	"Hash join does not support probe batch with selection vectors.")
	.addContext("Probe batch has selection mode",
	(probeBatch.getSchema().getSelectionVectorMode()).toString())
	.build(logger);
	}
	}

	return new HashTableConfig(
	(int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE),
	true, HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators,
	joinControl.asInt(), false);
	}

	@Override
	public void dump() {
	logger.error(
	"HashJoinBatch[container={}, left={}, right={}, leftOutcome={}, rightOutcome={}, joinType={}, hashJoinProbe={},"
	+ " rightExpr={}, canSpill={}, buildSchema={}, probeSchema={}]",
	container, left, right, leftUpstream, rightUpstream, joinType,
	probe, rightExpr, canSpill, buildSchema, probeSchema);
	}

	@Override // implement RowKeyJoin interface
	public boolean hasRowKeyBatch() {
	return buildComplete;
	}

	@Override // implement RowKeyJoin interface
	public BatchState getBatchState() {
	return state;
	}

	@Override // implement RowKeyJoin interface
	public void setBatchState(BatchState newState) {
	state = newState;
	}

	@Override
	public void setRowKeyJoinState(RowKeyJoin.RowKeyJoinState newState) {
	this.rkJoinState = newState;
	}

	@Override
	public RowKeyJoin.RowKeyJoinState getRowKeyJoinState() {
	return rkJoinState;
	}

	/**
	* Get the hash table iterator that is created for the build side of the hash
	* join if this hash join was instantiated as a row-key join.
	*
	* @return hash table iterator or null if this hash join was not a row-key
	* join or if it was a row-key join but the build has not yet
	* completed.
	*/
	@Override
	public Pair<ValueVector, Integer> nextRowKeyBatch() {
	if (buildComplete) {
	// partition 0 because Row Key Join has only a single partition - no
	// spilling
	Pair<VectorContainer, Integer> pp = partitions[0].nextBatch();
	if (pp != null) {
	VectorWrapper<?> vw = Iterables.get(pp.getLeft(), 0);
	ValueVector vv = vw.getValueVector();
	return Pair.of(vv, pp.getRight());
	}
	} else if (partitions == null && firstOutputBatch) { // if there is data
	// coming to
	// right(build) side in
	// build Schema stage,
	// use it.
	firstOutputBatch = false;
	if (right.getRecordCount() > 0) {
	VectorWrapper<?> vw = Iterables.get(right, 0);
	ValueVector vv = vw.getValueVector();
	return Pair.of(vv, right.getRecordCount() - 1);
	}
	}
	return null;
	}
	}