blob: 5763b322baf1c3379e889b0dcebf2784a97fd71b [file] [log] [blame]
/*
Derby - Class org.apache.derby.impl.sql.compile.IntersectNode
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to you under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.apache.derby.impl.sql.compile;
import java.sql.Types;
import java.util.BitSet;
import java.util.Properties;
import org.apache.derby.iapi.error.StandardException;
import org.apache.derby.iapi.reference.ClassName;
import org.apache.derby.iapi.services.classfile.VMOpcode;
import org.apache.derby.iapi.services.compiler.MethodBuilder;
import org.apache.derby.iapi.services.context.ContextManager;
import org.apache.derby.shared.common.sanity.SanityManager;
import org.apache.derby.iapi.sql.compile.CostEstimate;
import org.apache.derby.iapi.sql.compile.Optimizable;
import org.apache.derby.iapi.sql.compile.OptimizablePredicateList;
import org.apache.derby.iapi.sql.compile.Optimizer;
import org.apache.derby.iapi.sql.compile.RowOrdering;
import org.apache.derby.iapi.sql.dictionary.ConglomerateDescriptor;
import org.apache.derby.iapi.types.TypeId;
import org.apache.derby.iapi.util.JBitSet;
/**
* A IntersectOrExceptNode represents an INTERSECT or EXCEPT DML statement.
*
*/
public class IntersectOrExceptNode extends SetOperatorNode
{
/* Currently we implement INTERSECT and EXCEPT by rewriting
* t1 (INTERSECT|EXCEPT) [ALL] t2
* as (roughly)
* setOpResultSet( opType, all, (select * from t1 order by 1,2,...n), (select * from t2 ORDER BY 1,2,...,n))
* where n is the number of columns in t1 (which must be the same as the number of columns in t2),
* and opType is INTERSECT, or EXCEPT.
*
* The setOpResultSet result set simultaneously scans through its two ordered inputs and
* performs the intersect or except.
*
* There are other query plans that may be more efficient, depending on the sizes. One plan is
* to make a hash table from one of the input tables and then look up each row of the other input
* table in the hash table. However, we have not yet implemented spilling to disk in the
* BackingStoreHashtable class: currently the whole hash table is in RAM. If we were to use it
* we would blow up on large input tables.
*/
private int opType;
public static final int INTERSECT_OP = 1;
public static final int EXCEPT_OP = 2;
/* Only optimize it once */
/* Only call addNewNodes() once */
private boolean addNewNodesCalled;
private int[] intermediateOrderByColumns; // The input result sets will be ordered on these columns. 0 indexed
private int[] intermediateOrderByDirection; // ascending = 1, descending = -1
private boolean[] intermediateOrderByNullsLow; // TRUE means NULL values should be ordered lower than non-NULL values
/**
* Constructor for a SetOperatorNode.
*
* @param opType The operator type: one of {@link #EXCEPT_OP} or
* {@link #INTERSECT_OP}.
* @param leftResult The ResultSetNode on the left side of this union
* @param rightResult The ResultSetNode on the right side of this union
* @param all {@code true} if this is an ALL set operation.
* @param tableProperties Properties list associated with the table
* @param cm The context manager
* @exception StandardException Thrown on error
*/
IntersectOrExceptNode(int opType,
ResultSetNode leftResult,
ResultSetNode rightResult,
boolean all,
Properties tableProperties,
ContextManager cm) throws StandardException {
super(leftResult, rightResult, all, tableProperties, cm);
this.opType = opType;
}
private int getOpType()
{
return opType;
}
/**
* Push order by lists down to the children so that we can implement the intersect/except
* by scan of the two sorted inputs.
*
* @param numTables Number of tables in the DML Statement
* @param gbl The group by list, if any
* @param fromList The from list, if any
*
* @return The preprocessed ResultSetNode that can be optimized
*
* @exception StandardException Thrown on error
*/
@Override
ResultSetNode preprocess(int numTables,
GroupByList gbl,
FromList fromList)
throws StandardException
{
// RESOLVE: We are in a quandary as to when and how we should generate order by lists. SelectNode processing
// requires order by lists at the start of preprocess. That is why we are doing it here. However we can
// pick any column ordering. Depending on the child expressions the optimizer may be able to avoid a
// sort if we pick the right column ordering. For instance if one of the child expressions is
// "select <key columns>, <other expressions> from T" where there is a unique index on <key columns>
// then we can just generate an order by on the key columns and the optimizer should use the unique index
// to produce the sorted result set. However the ResultSetNode class does not make it easy to
// find the structure of the query expression. Furthermore we most want to avoid a sort on the larger
// input, but the size estimate is not available at preprocess time.
intermediateOrderByColumns = new int[ getResultColumns().size()];
intermediateOrderByDirection = new int[ intermediateOrderByColumns.length];
intermediateOrderByNullsLow = new boolean[ intermediateOrderByColumns.length];
/* If there is an order by on the result of the intersect then use
* that because we know that doing so will avoid a sort. If the
* output of the intersect/except is small relative to its inputs then
* in some cases it would be better to sort the inputs on a different
* sequence of columns, but it is hard to analyze the input query
* expressions to see if a sort can be avoided.
*/
final OrderByList obl = qec.getOrderByList(0);
if( obl != null)
{
BitSet colsOrdered = new BitSet( intermediateOrderByColumns.length);
int orderByListSize = obl.size();
int intermediateOrderByIdx = 0;
for( int i = 0; i < orderByListSize; i++)
{
if( colsOrdered.get(i))
continue;
OrderByColumn orderByColumn =
obl.getOrderByColumn(i);
intermediateOrderByDirection[intermediateOrderByIdx] = orderByColumn.isAscending() ? 1 : -1;
intermediateOrderByNullsLow[intermediateOrderByIdx] = orderByColumn.isNullsOrderedLow();
int columnIdx = orderByColumn.getResultColumn().getColumnPosition() - 1;
intermediateOrderByColumns[intermediateOrderByIdx] = columnIdx;
colsOrdered.set( columnIdx);
intermediateOrderByIdx++;
}
for( int i = 0; i < intermediateOrderByColumns.length; i++)
{
if( ! colsOrdered.get(i))
{
intermediateOrderByDirection[intermediateOrderByIdx] = 1;
intermediateOrderByNullsLow[intermediateOrderByIdx] = false;
intermediateOrderByColumns[intermediateOrderByIdx] = i;
intermediateOrderByIdx++;
}
}
qec.setOrderByList(0, null); // It will be pushed down.
}
else // The output of the intersect/except does not have to be ordered
{
// Pick an intermediate ordering that minimizes the cost.
// RESOLVE: how do you do that?
for( int i = 0; i < intermediateOrderByColumns.length; i++)
{
intermediateOrderByDirection[i] = 1;
intermediateOrderByNullsLow[i] = false;
intermediateOrderByColumns[i] = i;
}
}
pushOrderingDown( leftResultSet);
pushOrderingDown( rightResultSet);
return super.preprocess( numTables, gbl, fromList);
} // end of preprocess
private void pushOrderingDown( ResultSetNode rsn)
throws StandardException
{
ContextManager cm = getContextManager();
OrderByList orderByList = new OrderByList(null, cm);
for( int i = 0; i < intermediateOrderByColumns.length; i++)
{
OrderByColumn orderByColumn =
new OrderByColumn(
new NumericConstantNode(
TypeId.getBuiltInTypeId(Types.INTEGER),
intermediateOrderByColumns[i] + 1,
cm),
cm);
if( intermediateOrderByDirection[i] < 0) {
orderByColumn.setDescending();
}
if( intermediateOrderByNullsLow[i]) {
orderByColumn.setNullsOrderedLow();
}
orderByList.addOrderByColumn( orderByColumn);
}
orderByList.bindOrderByColumns( rsn);
rsn.pushQueryExpressionSuffix();
rsn.pushOrderByList( orderByList);
} // end of pushOrderingDown
/**
* @see org.apache.derby.iapi.sql.compile.Optimizable#estimateCost
*/
@Override
public CostEstimate estimateCost( OptimizablePredicateList predList,
ConglomerateDescriptor cd,
CostEstimate outerCost,
Optimizer optimizer,
RowOrdering rowOrdering)
throws StandardException
{
leftResultSet = optimizeSource(
optimizer,
leftResultSet,
(PredicateList) null,
outerCost);
rightResultSet = optimizeSource(
optimizer,
rightResultSet,
(PredicateList) null,
outerCost);
CostEstimate costEst = getCostEstimate(optimizer);
CostEstimate leftCostEstimate = leftResultSet.getCostEstimate();
CostEstimate rightCostEstimate = rightResultSet.getCostEstimate();
// The cost is the sum of the two child costs plus the cost of
// sorting the union.
costEst.setCost(
leftCostEstimate.getEstimatedCost() +
rightCostEstimate.getEstimatedCost(),
getRowCountEstimate(
leftCostEstimate.rowCount(),
rightCostEstimate.rowCount()),
getSingleScanRowCountEstimate(
leftCostEstimate.singleScanRowCount(),
rightCostEstimate.singleScanRowCount()));
return costEst;
} // End of estimateCost
/**
* @see Optimizable#modifyAccessPath
*
* @exception StandardException Thrown on error
*/
@Override
public Optimizable modifyAccessPath(JBitSet outerTables) throws StandardException
{
Optimizable retOptimizable;
retOptimizable = super.modifyAccessPath(outerTables);
/* We only want call addNewNodes() once */
if (addNewNodesCalled)
{
return retOptimizable;
}
return (Optimizable) addNewNodes();
}
/**
* @see ResultSetNode#modifyAccessPaths
*
* @exception StandardException Thrown on error
*/
@Override
ResultSetNode modifyAccessPaths() throws StandardException
{
ResultSetNode retRSN;
retRSN = super.modifyAccessPaths();
/* We only want call addNewNodes() once */
if (addNewNodesCalled)
{
return retRSN;
}
return addNewNodes();
}
/**
* Add any new ResultSetNodes that are necessary to the tree.
* We wait until after optimization to do this in order to
* make it easier on the optimizer.
*
* @return (Potentially new) head of the ResultSetNode tree.
*
* @exception StandardException Thrown on error
*/
private ResultSetNode addNewNodes()
throws StandardException
{
/* Only call addNewNodes() once */
if (addNewNodesCalled)
{
return this;
}
addNewNodesCalled = true;
ResultSetNode treeTop = this;
for (int i = 0; i < qec.size(); i++) {
final OrderByList obl = qec.getOrderByList(i);
if(obl != null) {
// Generate an order by node on top of the intersect/except
treeTop = new OrderByNode(
treeTop,
obl,
tableProperties,
getContextManager());
}
final ValueNode offset = qec.getOffset(i);
final ValueNode fetchFirst = qec.getFetchFirst(i);
if (offset != null || fetchFirst != null) {
ResultColumnList newRcl =
treeTop.getResultColumns().copyListAndObjects();
newRcl.genVirtualColumnNodes(
treeTop, treeTop.getResultColumns());
treeTop = new RowCountNode(
treeTop,
newRcl,
offset,
fetchFirst,
qec.getHasJDBCLimitClause()[i].booleanValue(),
getContextManager());
}
}
return treeTop;
} // end of addNewNodes
/**
* Generate the code.
*
* @exception StandardException Thrown on error
*/
@Override
void generate( ActivationClassBuilder acb, MethodBuilder mb)
throws StandardException
{
/* Get the next ResultSet #, so that we can number this ResultSetNode, its
* ResultColumnList and ResultSet.
*/
assignResultSetNumber();
// Get our final cost estimate based on the child estimates.
setCostEstimate( getFinalCostEstimate() );
// build up the tree.
/* Generate the SetOpResultSet. Arguments:
* 1) expression for left child ResultSet
* 2) expression for right child ResultSet
* 3) activation
* 4) resultSetNumber
* 5) estimated row count
* 6) estimated cost
* 7) opType
* 8) all
* 9) intermediateOrderByColumns saved object index
* 10) intermediateOrderByDirection saved object index
* 11) intermediateOrderByNullsLow saved object index
*/
acb.pushGetResultSetFactoryExpression(mb); // instance for getSetOpResultSet
getLeftResultSet().generate( acb, mb);
getRightResultSet().generate( acb, mb);
acb.pushThisAsActivation(mb);
mb.push(getResultSetNumber());
mb.push( getCostEstimate().getEstimatedRowCount());
mb.push( getCostEstimate().getEstimatedCost());
mb.push( getOpType());
mb.push( all);
mb.push( getCompilerContext().addSavedObject( intermediateOrderByColumns));
mb.push( getCompilerContext().addSavedObject( intermediateOrderByDirection));
mb.push( getCompilerContext().addSavedObject( intermediateOrderByNullsLow));
mb.callMethod(VMOpcode.INVOKEINTERFACE,
(String) null,
"getSetOpResultSet",
ClassName.NoPutResultSet, 11);
} // end of generate
/**
* @see ResultSetNode#getFinalCostEstimate
*
* Get the final CostEstimate for this IntersectOrExceptNode.
*
* @return The final CostEstimate for this IntersectOrExceptNode,
* which is the sum of the two child costs. The final number of
* rows depends on whether this is an INTERSECT or EXCEPT (see
* getRowCountEstimate() in this class for more).
*/
@Override
CostEstimate getFinalCostEstimate()
throws StandardException
{
if (getCandidateFinalCostEstimate() != null)
{
return getCandidateFinalCostEstimate();
}
CostEstimate leftCE = leftResultSet.getFinalCostEstimate();
CostEstimate rightCE = rightResultSet.getFinalCostEstimate();
setCandidateFinalCostEstimate( getNewCostEstimate() );
getCandidateFinalCostEstimate().setCost(
leftCE.getEstimatedCost() + rightCE.getEstimatedCost(),
getRowCountEstimate(leftCE.rowCount(), rightCE.rowCount()),
getSingleScanRowCountEstimate(leftCE.singleScanRowCount(),
rightCE.singleScanRowCount()));
return getCandidateFinalCostEstimate();
}
String getOperatorName()
{
switch( opType)
{
case INTERSECT_OP:
return "INTERSECT";
case EXCEPT_OP:
return "EXCEPT";
}
if( SanityManager.DEBUG)
SanityManager.THROWASSERT( "Invalid intersectOrExcept opType: " + opType);
return "?";
}
double getRowCountEstimate( double leftRowCount, double rightRowCount)
{
switch( opType)
{
case INTERSECT_OP:
// The result has at most min( leftRowCount, rightRowCount). Estimate the actual row count at
// half that.
return Math.min( leftRowCount, rightRowCount)/2;
case EXCEPT_OP:
// The result has at most leftRowCount rows and at least
// max(0, leftRowCount - rightRowCount) rows. Use the mean
// of those two as the estimate.
return (leftRowCount + Math.max(0, leftRowCount - rightRowCount))/2;
}
if( SanityManager.DEBUG)
SanityManager.THROWASSERT( "Invalid intersectOrExcept opType: " + opType);
return 1.0;
} // end of getRowCountEstimate
double getSingleScanRowCountEstimate( double leftSingleScanRowCount, double rightSingleScanRowCount)
{
return getRowCountEstimate( leftSingleScanRowCount, rightSingleScanRowCount);
}
}