| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.phoenix.compile; |
| |
| import java.sql.SQLException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.NoSuchElementException; |
| import java.util.Set; |
| |
| import org.apache.hadoop.hbase.filter.CompareFilter; |
| import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; |
| import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
| import org.apache.hadoop.hbase.util.Bytes; |
| import org.apache.hadoop.hbase.util.Pair; |
| import org.apache.phoenix.expression.AndExpression; |
| import org.apache.phoenix.expression.BaseExpression; |
| import org.apache.phoenix.expression.BaseExpression.ExpressionComparabilityWrapper; |
| import org.apache.phoenix.expression.BaseTerminalExpression; |
| import org.apache.phoenix.expression.CoerceExpression; |
| import org.apache.phoenix.expression.ComparisonExpression; |
| import org.apache.phoenix.expression.Determinism; |
| import org.apache.phoenix.expression.Expression; |
| import org.apache.phoenix.expression.InListExpression; |
| import org.apache.phoenix.expression.IsNullExpression; |
| import org.apache.phoenix.expression.LikeExpression; |
| import org.apache.phoenix.expression.LiteralExpression; |
| import org.apache.phoenix.expression.OrExpression; |
| import org.apache.phoenix.expression.RowKeyColumnExpression; |
| import org.apache.phoenix.expression.RowValueConstructorExpression; |
| import org.apache.phoenix.expression.function.FunctionExpression.OrderPreserving; |
| import org.apache.phoenix.expression.function.ScalarFunction; |
| import org.apache.phoenix.expression.visitor.ExpressionVisitor; |
| import org.apache.phoenix.expression.visitor.StatelessTraverseNoExpressionVisitor; |
| import org.apache.phoenix.parse.FilterableStatement; |
| import org.apache.phoenix.parse.HintNode.Hint; |
| import org.apache.phoenix.parse.LikeParseNode.LikeType; |
| import org.apache.phoenix.query.KeyRange; |
| import org.apache.phoenix.query.QueryConstants; |
| import org.apache.phoenix.schema.PColumn; |
| import org.apache.phoenix.schema.PName; |
| import org.apache.phoenix.schema.PTable; |
| import org.apache.phoenix.schema.RowKeySchema; |
| import org.apache.phoenix.schema.SaltingUtil; |
| import org.apache.phoenix.schema.SortOrder; |
| import org.apache.phoenix.schema.tuple.Tuple; |
| import org.apache.phoenix.schema.types.PChar; |
| import org.apache.phoenix.schema.types.PDataType; |
| import org.apache.phoenix.schema.types.PVarbinary; |
| import org.apache.phoenix.schema.types.PVarchar; |
| import org.apache.phoenix.util.ByteUtil; |
| import org.apache.phoenix.util.ScanUtil; |
| import org.apache.phoenix.util.SchemaUtil; |
| |
| import com.google.common.collect.Iterators; |
| import com.google.common.collect.Lists; |
| import com.google.common.collect.Maps; |
| import com.google.common.collect.Sets; |
| |
| import edu.umd.cs.findbugs.annotations.NonNull; |
| |
| |
| /** |
| * |
| * Class that pushes row key expressions from the where clause to form the start/stop |
| * key of the scan and removes the expressions from the where clause when possible. |
| * |
| * |
| * @since 0.1 |
| */ |
| public class WhereOptimizer { |
| private static final List<KeyRange> EVERYTHING_RANGES = Collections.<KeyRange>singletonList(KeyRange.EVERYTHING_RANGE); |
| private static final List<KeyRange> SALT_PLACEHOLDER = Collections.singletonList(PChar.INSTANCE.getKeyRange(QueryConstants.SEPARATOR_BYTE_ARRAY)); |
| |
| private WhereOptimizer() { |
| } |
| |
| /** |
| * Pushes row key expressions from the where clause into the start/stop key of the scan. |
| * @param context the shared context during query compilation |
| * @param statement the statement being compiled |
| * @param whereClause the where clause expression |
| * @return the new where clause with the key expressions removed |
| */ |
| public static Expression pushKeyExpressionsToScan(StatementContext context, FilterableStatement statement, Expression whereClause) |
| throws SQLException{ |
| return pushKeyExpressionsToScan(context, statement, whereClause, null); |
| } |
| |
| // For testing so that the extractedNodes can be verified |
| public static Expression pushKeyExpressionsToScan(StatementContext context, FilterableStatement statement, |
| Expression whereClause, Set<Expression> extractNodes) throws SQLException { |
| PName tenantId = context.getConnection().getTenantId(); |
| byte[] tenantIdBytes = null; |
| PTable table = context.getCurrentTable().getTable(); |
| Integer nBuckets = table.getBucketNum(); |
| boolean isSalted = nBuckets != null; |
| RowKeySchema schema = table.getRowKeySchema(); |
| boolean isMultiTenant = tenantId != null && table.isMultiTenant(); |
| boolean isSharedIndex = table.getViewIndexId() != null; |
| ImmutableBytesWritable ptr = context.getTempPtr(); |
| |
| if (isMultiTenant) { |
| tenantIdBytes = ScanUtil.getTenantIdBytes(schema, isSalted, tenantId, isSharedIndex); |
| } |
| |
| if (whereClause == null && (tenantId == null || !table.isMultiTenant()) && table.getViewIndexId() == null) { |
| context.setScanRanges(ScanRanges.EVERYTHING); |
| return whereClause; |
| } |
| if (LiteralExpression.isBooleanFalseOrNull(whereClause)) { |
| context.setScanRanges(ScanRanges.NOTHING); |
| return null; |
| } |
| KeyExpressionVisitor visitor = new KeyExpressionVisitor(context, table); |
| KeyExpressionVisitor.KeySlots keySlots = null; |
| if (whereClause != null) { |
| // TODO:: When we only have one where clause, the keySlots returns as a single slot object, |
| // instead of an array of slots for the corresponding column. Change the behavior so it |
| // becomes consistent. |
| keySlots = whereClause.accept(visitor); |
| |
| if (keySlots == null && (tenantId == null || !table.isMultiTenant()) && table.getViewIndexId() == null) { |
| context.setScanRanges(ScanRanges.EVERYTHING); |
| return whereClause; |
| } |
| // If a parameter is bound to null (as will be the case for calculating ResultSetMetaData and |
| // ParameterMetaData), this will be the case. It can also happen for an equality comparison |
| // for unequal lengths. |
| if (keySlots == KeyExpressionVisitor.EMPTY_KEY_SLOTS) { |
| context.setScanRanges(ScanRanges.NOTHING); |
| return null; |
| } |
| } |
| if (keySlots == null) { |
| keySlots = KeyExpressionVisitor.EMPTY_KEY_SLOTS; |
| } |
| |
| if (extractNodes == null) { |
| extractNodes = new HashSet<Expression>(table.getPKColumns().size()); |
| } |
| |
| int pkPos = 0; |
| int nPKColumns = table.getPKColumns().size(); |
| int[] slotSpanArray = new int[nPKColumns]; |
| List<List<KeyRange>> cnf = Lists.newArrayListWithExpectedSize(schema.getMaxFields()); |
| boolean hasViewIndex = table.getViewIndexId() != null; |
| Iterator<KeyExpressionVisitor.KeySlot> iterator = keySlots.getSlots().iterator(); |
| // Add placeholder for salt byte ranges |
| if (isSalted) { |
| cnf.add(SALT_PLACEHOLDER); |
| // Increment the pkPos, as the salt column is in the row schema |
| // Do not increment the iterator, though, as there will never be |
| // an expression in the keySlots for the salt column |
| pkPos++; |
| } |
| |
| // Add unique index ID for shared indexes on views. This ensures |
| // that different indexes don't interleave. |
| if (hasViewIndex) { |
| byte[] viewIndexBytes = table.getviewIndexIdType().toBytes(table.getViewIndexId()); |
| KeyRange indexIdKeyRange = KeyRange.getKeyRange(viewIndexBytes); |
| cnf.add(Collections.singletonList(indexIdKeyRange)); |
| pkPos++; |
| } |
| |
| // Add tenant data isolation for tenant-specific tables |
| if (isMultiTenant) { |
| KeyRange tenantIdKeyRange = KeyRange.getKeyRange(tenantIdBytes); |
| cnf.add(Collections.singletonList(tenantIdKeyRange)); |
| pkPos++; |
| } |
| |
| boolean forcedSkipScan = statement.getHint().hasHint(Hint.SKIP_SCAN); |
| boolean forcedRangeScan = statement.getHint().hasHint(Hint.RANGE_SCAN); |
| boolean hasUnboundedRange = false; |
| boolean hasMultiRanges = false; |
| boolean hasRangeKey = false; |
| boolean stopExtracting = false; |
| boolean useSkipScan = false; |
| //boolean useSkipScan = !forcedRangeScan && nBuckets != null; |
| // Concat byte arrays of literals to form scan start key |
| while (iterator.hasNext()) { |
| KeyExpressionVisitor.KeySlot slot = iterator.next(); |
| // If the position of the pk columns in the query skips any part of the row k |
| // then we have to handle in the next phase through a key filter. |
| // If the slot is null this means we have no entry for this pk position. |
| if (slot == null || slot.getKeyRanges().isEmpty()) { |
| continue; |
| } |
| if (slot.getPKPosition() != pkPos) { |
| if (!forcedSkipScan) { |
| stopExtracting = true; |
| } else { |
| useSkipScan |= !stopExtracting && !forcedRangeScan && forcedSkipScan; |
| } |
| for (int i=pkPos; i < slot.getPKPosition(); i++) { |
| cnf.add(Collections.singletonList(KeyRange.EVERYTHING_RANGE)); |
| } |
| } |
| KeyPart keyPart = slot.getKeyPart(); |
| List<KeyRange> keyRanges = slot.getKeyRanges(); |
| SortOrder prevSortOrder = null; |
| int slotOffset = 0; |
| int clipLeftSpan = 0; |
| |
| // Iterate through all spans of this slot |
| while (true) { |
| SortOrder sortOrder = |
| schema.getField(slot.getPKPosition() + slotOffset).getSortOrder(); |
| if (prevSortOrder == null) { |
| prevSortOrder = sortOrder; |
| } else if (prevSortOrder != sortOrder) { |
| //Consider the Universe of keys to be [0,7]+ on the leading column A |
| // and [0,7]+ on trailing column B, with a padbyte of 0 for ASC and 7 for DESC |
| //if our key range for ASC keys is leading [2,*] and trailing [3,*], |
| // → [x203 - x777] |
| //for this particular plan the leading key is descending (ie index desc) |
| // consider the data |
| // (3,2) ORDER BY A,B→ x302 → ORDER BY A DESC,B → x472 |
| // (3,3) ORDER BY A,B→ x303 → ORDER BY A DESC,B → x473 |
| // (3,4) ORDER BY A,B→ x304 → ORDER BY A DESC,B → x474 |
| // (2,3) ORDER BY A,B→ x203 → ORDER BY A DESC,B → x573 |
| // (2,7) ORDER BY A,B→ x207 → ORDER BY A DESC,B → x577 |
| // And the logical expression (A,B) > (2,3) |
| // In the DESC A order the selected values are not contiguous, |
| // (2,7),(3,2),(3,3),(3,4) |
| // In the normal ASC order by the values are all contiguous |
| // Therefore the key cannot be extracted out and a full filter must be applied |
| // In addition, the boundary of the scan is tricky as the values are not bound |
| // by (2,3) it is instead bound by (2,7), this should map to, [x000,x577] |
| // FUTURE: May be able to perform a type of skip scan for this case. |
| |
| // If the sort order changes, we must clip the portion with the same sort order |
| // and invert the key ranges and swap the upper and lower bounds. |
| List<KeyRange> leftRanges = clipLeft(schema, slot.getPKPosition() |
| + slotOffset - clipLeftSpan, clipLeftSpan, keyRanges, ptr); |
| keyRanges = |
| clipRight(schema, slot.getPKPosition() + slotOffset - 1, keyRanges, |
| leftRanges, ptr); |
| if (prevSortOrder == SortOrder.DESC) { |
| leftRanges = invertKeyRanges(leftRanges); |
| } |
| slotSpanArray[cnf.size()] = clipLeftSpan-1; |
| cnf.add(leftRanges); |
| clipLeftSpan = 0; |
| prevSortOrder = sortOrder; |
| // since we have to clip the portion with the same sort order, we can no longer |
| // extract the nodes from the where clause |
| // for eg. for the schema A VARCHAR DESC, B VARCHAR ASC and query |
| // WHERE (A,B) < ('a','b') |
| // the range (* - a\xFFb) is converted to (~a-*)(*-b) |
| // so we still need to filter on A,B |
| stopExtracting = true; |
| } |
| clipLeftSpan++; |
| slotOffset++; |
| if (slotOffset >= slot.getPKSpan()) { |
| break; |
| } |
| if (iterator.hasNext()) { |
| iterator.next(); |
| } |
| } |
| if (schema.getField(slot.getPKPosition() + slotOffset - 1).getSortOrder() == SortOrder.DESC) { |
| keyRanges = invertKeyRanges(keyRanges); |
| } |
| pkPos = slot.getPKPosition() + slot.getPKSpan(); |
| |
| slotSpanArray[cnf.size()] = clipLeftSpan-1; |
| cnf.add(keyRanges); |
| |
| // TODO: when stats are available, we may want to use a skip scan if the |
| // cardinality of this slot is low. |
| /* |
| * Stop extracting nodes once we encounter: |
| * 1) An unbound range unless we're forcing a skip scan and haven't encountered |
| * a multi-column span. Even if we're trying to force a skip scan, we can't |
| * execute it over a multi-column span. |
| * 2) A non range key as we can extract the first one, but further ones need |
| * to be evaluated in a filter. |
| * 3) As above a non-contiguous range due to sort order |
| */ |
| stopExtracting |= (hasUnboundedRange && !forcedSkipScan) || (hasRangeKey && forcedRangeScan); |
| useSkipScan |= !stopExtracting && !forcedRangeScan && (keyRanges.size() > 1 || hasRangeKey); |
| |
| for (int i = 0; (!hasUnboundedRange || !hasRangeKey) && i < keyRanges.size(); i++) { |
| KeyRange range = keyRanges.get(i); |
| if (range.isUnbound()) { |
| hasUnboundedRange = hasRangeKey = true; |
| } else if (!range.isSingleKey()) { |
| hasRangeKey = true; |
| } |
| } |
| |
| hasMultiRanges |= keyRanges.size() > 1; |
| |
| // We cannot extract if we have multiple ranges and are forcing a range scan. |
| stopExtracting |= forcedRangeScan && hasMultiRanges; |
| |
| // Will be null in cases for which only part of the expression was factored out here |
| // to set the start/end key. An example would be <column> LIKE 'foo%bar' where we can |
| // set the start key to 'foo' but still need to match the regex at filter time. |
| // Don't extract expressions if we're forcing a range scan and we've already come |
| // across a multi-range for a prior slot. The reason is that we have an inexact range after |
| // that, so must filter on the remaining conditions (see issue #467). |
| if (!stopExtracting) { |
| List<Expression> nodesToExtract = keyPart.getExtractNodes(); |
| extractNodes.addAll(nodesToExtract); |
| } |
| } |
| // If we have fully qualified point keys with multi-column spans (i.e. RVC), |
| // we can still use our skip scan. The ScanRanges.create() call will explode |
| // out the keys. |
| slotSpanArray = Arrays.copyOf(slotSpanArray, cnf.size()); |
| ScanRanges scanRanges = ScanRanges.create(schema, cnf, slotSpanArray, nBuckets, useSkipScan, table.getRowTimestampColPos()); |
| context.setScanRanges(scanRanges); |
| if (whereClause == null) { |
| return null; |
| } else { |
| return whereClause.accept(new RemoveExtractedNodesVisitor(extractNodes)); |
| } |
| } |
| |
| private static KeyRange getTrailingRange(RowKeySchema rowKeySchema, int pkPos, KeyRange range, KeyRange clippedResult, ImmutableBytesWritable ptr) { |
| int separatorLength = rowKeySchema.getField(pkPos).getDataType().isFixedWidth() ? 0 : 1; |
| byte[] lowerRange = KeyRange.UNBOUND; |
| boolean lowerInclusive = false; |
| // Lower range of trailing part of RVC must be true, so we can form a new range to intersect going forward |
| if (!range.lowerUnbound() |
| && range.getLowerRange().length > clippedResult.getLowerRange().length |
| && Bytes.startsWith(range.getLowerRange(), clippedResult.getLowerRange())) { |
| lowerRange = range.getLowerRange(); |
| int offset = clippedResult.getLowerRange().length + separatorLength; |
| ptr.set(lowerRange, offset, lowerRange.length - offset); |
| lowerRange = ptr.copyBytes(); |
| lowerInclusive = range.isLowerInclusive(); |
| } |
| byte[] upperRange = KeyRange.UNBOUND; |
| boolean upperInclusive = false; |
| if (!range.upperUnbound() |
| && range.getUpperRange().length > clippedResult.getUpperRange().length |
| && Bytes.startsWith(range.getUpperRange(), clippedResult.getUpperRange())) { |
| upperRange = range.getUpperRange(); |
| int offset = clippedResult.getUpperRange().length + separatorLength; |
| ptr.set(upperRange, offset, upperRange.length - offset); |
| upperRange = ptr.copyBytes(); |
| upperInclusive = range.isUpperInclusive(); |
| } |
| return KeyRange.getKeyRange(lowerRange, lowerInclusive, upperRange, upperInclusive); |
| } |
| |
| private static List<KeyRange> clipRight(RowKeySchema schema, int pkPos, List<KeyRange> keyRanges, |
| List<KeyRange> leftRanges, ImmutableBytesWritable ptr) { |
| List<KeyRange> clippedKeyRanges = Lists.newArrayListWithExpectedSize(keyRanges.size()); |
| for (int i = 0; i < leftRanges.size(); i++) { |
| KeyRange leftRange = leftRanges.get(i); |
| KeyRange range = keyRanges.get(i); |
| KeyRange clippedKeyRange = getTrailingRange(schema, pkPos, range, leftRange, ptr); |
| clippedKeyRanges.add(clippedKeyRange); |
| } |
| return clippedKeyRanges; |
| } |
| |
| private static List<KeyRange> clipLeft(RowKeySchema schema, int pkPos, int clipLeftSpan, List<KeyRange> keyRanges, ImmutableBytesWritable ptr) { |
| List<KeyRange> clippedKeyRanges = Lists.newArrayListWithExpectedSize(keyRanges.size()); |
| for (KeyRange keyRange : keyRanges) { |
| KeyRange clippedKeyRange = schema.clipLeft(pkPos, keyRange, clipLeftSpan, ptr); |
| clippedKeyRanges.add(clippedKeyRange); |
| } |
| return clippedKeyRanges; |
| } |
| |
| private static List<KeyRange> invertKeyRanges(List<KeyRange> keyRanges) { |
| keyRanges = new ArrayList<KeyRange>(keyRanges); |
| for (int i = 0; i < keyRanges.size(); i++) { |
| KeyRange range = keyRanges.get(i); |
| range = range.invert(); |
| keyRanges.set(i, range); |
| } |
| return keyRanges; |
| } |
| |
| /** |
| * Get an optimal combination of key expressions for hash join key range optimization. |
| * @return returns true if the entire combined expression is covered by key range optimization |
| * @param result the optimal combination of key expressions |
| * @param context the temporary context to get scan ranges set by pushKeyExpressionsToScan() |
| * @param statement the statement being compiled |
| * @param expressions the join key expressions |
| * @return the optimal list of key expressions |
| */ |
| public static boolean getKeyExpressionCombination(List<Expression> result, StatementContext context, FilterableStatement statement, List<Expression> expressions) throws SQLException { |
| List<Integer> candidateIndexes = Lists.newArrayList(); |
| final List<Integer> pkPositions = Lists.newArrayList(); |
| PTable table = context.getCurrentTable().getTable(); |
| for (int i = 0; i < expressions.size(); i++) { |
| Expression expression = expressions.get(i); |
| KeyExpressionVisitor visitor = new KeyExpressionVisitor(context, table); |
| KeyExpressionVisitor.KeySlots keySlots = expression.accept(visitor); |
| int minPkPos = Integer.MAX_VALUE; |
| if (keySlots != null) { |
| Iterator<KeyExpressionVisitor.KeySlot> iterator = keySlots.getSlots().iterator(); |
| while (iterator.hasNext()) { |
| KeyExpressionVisitor.KeySlot slot = iterator.next(); |
| if (slot.getPKPosition() < minPkPos) { |
| minPkPos = slot.getPKPosition(); |
| } |
| } |
| if (minPkPos != Integer.MAX_VALUE) { |
| candidateIndexes.add(i); |
| pkPositions.add(minPkPos); |
| } |
| } |
| } |
| |
| if (candidateIndexes.isEmpty()) |
| return false; |
| |
| Collections.sort(candidateIndexes, new Comparator<Integer>() { |
| @Override |
| public int compare(Integer left, Integer right) { |
| return pkPositions.get(left) - pkPositions.get(right); |
| } |
| }); |
| |
| List<Expression> candidates = Lists.newArrayList(); |
| List<List<Expression>> sampleValues = Lists.newArrayList(); |
| for (Integer index : candidateIndexes) { |
| candidates.add(expressions.get(index)); |
| } |
| for (int i = 0; i < 2; i++) { |
| List<Expression> group = Lists.newArrayList(); |
| for (Expression expression : candidates) { |
| PDataType type = expression.getDataType(); |
| group.add(LiteralExpression.newConstant(type.getSampleValue(), type)); |
| } |
| sampleValues.add(group); |
| } |
| |
| int count = 0; |
| int offset = table.getBucketNum() == null ? 0 : SaltingUtil.NUM_SALTING_BYTES; |
| int maxPkSpan = 0; |
| Expression remaining = null; |
| while (count < candidates.size()) { |
| Expression lhs = count == 0 ? candidates.get(0) : new RowValueConstructorExpression(candidates.subList(0, count + 1), false); |
| Expression firstRhs = count == 0 ? sampleValues.get(0).get(0) : new RowValueConstructorExpression(sampleValues.get(0).subList(0, count + 1), true); |
| Expression secondRhs = count == 0 ? sampleValues.get(1).get(0) : new RowValueConstructorExpression(sampleValues.get(1).subList(0, count + 1), true); |
| Expression testExpression = InListExpression.create(Lists.newArrayList(lhs, firstRhs, secondRhs), false, context.getTempPtr(), context.getCurrentTable().getTable().rowKeyOrderOptimizable()); |
| remaining = pushKeyExpressionsToScan(context, statement, testExpression); |
| if (context.getScanRanges().isPointLookup()) { |
| count++; |
| break; // found the best match |
| } |
| int pkSpan = context.getScanRanges().getBoundPkColumnCount() - offset; |
| if (pkSpan <= maxPkSpan) { |
| break; |
| } |
| maxPkSpan = pkSpan; |
| count++; |
| } |
| |
| result.addAll(candidates.subList(0, count)); |
| |
| return count == candidates.size() |
| && (context.getScanRanges().isPointLookup() || context.getScanRanges().useSkipScanFilter()) |
| && (remaining == null || remaining.equals(LiteralExpression.newConstant(true, Determinism.ALWAYS))); |
| } |
| |
| private static class RemoveExtractedNodesVisitor extends StatelessTraverseNoExpressionVisitor<Expression> { |
| private final Set<Expression> nodesToRemove; |
| |
| private RemoveExtractedNodesVisitor(Set<Expression> nodesToRemove) { |
| this.nodesToRemove = nodesToRemove; |
| } |
| |
| @Override |
| public Expression defaultReturn(Expression node, List<Expression> e) { |
| return nodesToRemove.contains(node) ? null : node; |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(OrExpression node) { |
| return node.getChildren().iterator(); |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(AndExpression node) { |
| return node.getChildren().iterator(); |
| } |
| |
| @Override |
| public Expression visit(LiteralExpression node) { |
| return nodesToRemove.contains(node) ? null : node; |
| } |
| |
| @Override |
| public Expression visitLeave(AndExpression node, List<Expression> l) { |
| if (!l.equals(node.getChildren())) { |
| if (l.isEmpty()) { |
| // Don't return null here, because then our defaultReturn will kick in |
| return LiteralExpression.newConstant(true, Determinism.ALWAYS); |
| } |
| if (l.size() == 1) { |
| return l.get(0); |
| } |
| try { |
| return AndExpression.create(l); |
| } catch (SQLException e) { |
| //shouldn't happen |
| throw new RuntimeException(e); |
| } |
| } |
| return node; |
| } |
| } |
| |
| /* |
| * TODO: We could potentially rewrite simple expressions to move constants to the RHS |
| * such that we can form a start/stop key for a scan. For example, rewrite this: |
| * WHEREH a + 1 < 5 |
| * to this instead: |
| * WHERE a < 5 - 1 |
| * Currently the first case would not be optimized. This includes other arithmetic |
| * operators, CASE statements, and string concatenation. |
| */ |
| public static class KeyExpressionVisitor extends StatelessTraverseNoExpressionVisitor<KeyExpressionVisitor.KeySlots> { |
| private static final KeySlots EMPTY_KEY_SLOTS = new KeySlots() { |
| @Override |
| public boolean isPartialExtraction() { |
| return false; |
| } |
| |
| @Override |
| public List<KeySlot> getSlots() { |
| return Collections.emptyList(); |
| } |
| }; |
| |
| private static boolean isDegenerate(List<KeyRange> keyRanges) { |
| return keyRanges == null || keyRanges.isEmpty() || (keyRanges.size() == 1 && keyRanges.get(0) == KeyRange.EMPTY_RANGE); |
| } |
| |
| private KeySlots newKeyParts(KeySlot slot, Expression extractNode, KeyRange keyRange) { |
| if (keyRange == null) { |
| return EMPTY_KEY_SLOTS; |
| } |
| |
| List<KeyRange> keyRanges = Collections.<KeyRange>singletonList(keyRange); |
| return newKeyParts(slot, extractNode, keyRanges); |
| } |
| |
| private KeySlots newKeyParts(KeySlot slot, Expression extractNode, List<KeyRange> keyRanges) { |
| if (isDegenerate(keyRanges)) { |
| return EMPTY_KEY_SLOTS; |
| } |
| |
| List<Expression> extractNodes = extractNode == null || slot.getKeyPart().getExtractNodes().isEmpty() |
| ? Collections.<Expression>emptyList() |
| : Collections.<Expression>singletonList(extractNode); |
| return new SingleKeySlot(new BaseKeyPart(table, slot.getKeyPart().getColumn(), extractNodes), slot.getPKPosition(), slot.getPKSpan(), keyRanges, slot.getOrderPreserving()); |
| } |
| |
| private KeySlots newKeyParts(KeySlot slot, List<Expression> extractNodes, List<KeyRange> keyRanges) { |
| if (isDegenerate(keyRanges)) { |
| return EMPTY_KEY_SLOTS; |
| } |
| |
| return new SingleKeySlot(new BaseKeyPart(table, slot.getKeyPart().getColumn(), extractNodes), slot.getPKPosition(), slot.getPKSpan(), keyRanges, slot.getOrderPreserving()); |
| } |
| |
| private KeySlots newRowValueConstructorKeyParts(RowValueConstructorExpression rvc, List<KeySlots> childSlots) { |
| if (childSlots.isEmpty() || rvc.isStateless()) { |
| return null; |
| } |
| |
| int position = -1; |
| int initialPosition = -1; |
| for (int i = 0; i < childSlots.size(); i++) { |
| KeySlots slots = childSlots.get(i); |
| KeySlot keySlot = slots.getSlots().iterator().next(); |
| List<Expression> childExtractNodes = keySlot.getKeyPart().getExtractNodes(); |
| // Stop if there was a gap in extraction of RVC elements. This is required if the leading |
| // RVC has not row key columns, as we'll still get childSlots if the RVC has trailing row |
| // key columns. We can't rule the RVC out completely when the childSlots is less the the |
| // RVC length, as a partial, *leading* match is optimizable. |
| if (childExtractNodes.size() != 1 || !childExtractNodes.get(0).equals(rvc.getChildren().get(i))) { |
| break; |
| } |
| int pkPosition = keySlot.getPKPosition(); |
| if (pkPosition < 0) { // break for non PK columns |
| break; |
| } |
| // Continue while we have consecutive pk columns |
| if (position == -1) { |
| position = initialPosition = pkPosition; |
| } else if (pkPosition != position) { |
| break; |
| } |
| position++; |
| |
| // If we come to a point where we're not preserving order completely |
| // then stop. We will never get a NO here, but we might get a YES_IF_LAST |
| // if the child expression is only using part of the underlying pk column. |
| // (for example, in the case of SUBSTR). In this case, we must stop building |
| // the row key constructor at that point. |
| assert(keySlot.getOrderPreserving() != OrderPreserving.NO); |
| if (keySlot.getOrderPreserving() == OrderPreserving.YES_IF_LAST) { |
| break; |
| } |
| } |
| if (position > 0) { |
| int span = position - initialPosition; |
| return new SingleKeySlot(new RowValueConstructorKeyPart(table.getPKColumns().get(initialPosition), rvc, span, childSlots), initialPosition, span, EVERYTHING_RANGES); |
| } |
| return null; |
| } |
| |
| private KeySlots newScalarFunctionKeyPart(KeySlot slot, ScalarFunction node) { |
| if (isDegenerate(slot.getKeyRanges())) { |
| return EMPTY_KEY_SLOTS; |
| } |
| KeyPart part = node.newKeyPart(slot.getKeyPart()); |
| if (part == null) { |
| return null; |
| } |
| |
| // Scalar function always returns primitive and never a row value constructor, so span is always 1 |
| return new SingleKeySlot(part, slot.getPKPosition(), slot.getKeyRanges(), node.preservesOrder()); |
| } |
| |
| private KeySlots newCoerceKeyPart(KeySlot slot, final CoerceExpression node) { |
| if (isDegenerate(slot.getKeyRanges())) { |
| return EMPTY_KEY_SLOTS; |
| } |
| final List<Expression> extractNodes = Collections.<Expression>singletonList(node); |
| final KeyPart childPart = slot.getKeyPart(); |
| final ImmutableBytesWritable ptr = context.getTempPtr(); |
| return new SingleKeySlot(new CoerceKeySlot( |
| childPart, ptr, node, extractNodes), slot.getPKPosition(), slot.getKeyRanges()); |
| } |
| |
| /** |
| * |
| * Iterates through all combinations of KeyRanges for a given |
| * PK column (based on its slot position). Useful when expressions |
| * are ORed together and subsequently ANDed. For example: |
| * WHERE (pk1 = 1 OR pk1 = 2) AND (pk2 = 3 OR pk2 = 4) |
| * would iterate through and produce [1,3],[1,4],[2,3],[2,4]. |
| * |
| */ |
| static class SlotsIterator { |
| public final int pkPos; |
| private List<KeySlots> childSlots; |
| private List<SlotRangesIterator> slotRangesIterator; |
| private boolean firstCall = true; |
| |
| SlotsIterator(List<KeySlots> childSlots, int pkPos) { |
| this.childSlots = childSlots; |
| this.pkPos = pkPos; |
| this.slotRangesIterator = Lists.newArrayListWithExpectedSize(childSlots.size() * 3 / 2); |
| for (int i = 0; i < childSlots.size(); i++) { |
| SlotRangesIterator iterator = new SlotRangesIterator(i); |
| slotRangesIterator.add(iterator); |
| iterator.initialize(); |
| } |
| } |
| |
| public KeySlot getSlot(int index) { |
| SlotRangesIterator slotRanges = slotRangesIterator.get(index); |
| return slotRanges.getSlot(); |
| } |
| |
| public KeyRange getRange(int index) { |
| SlotRangesIterator slotRanges = slotRangesIterator.get(index); |
| return slotRanges.getRange(); |
| } |
| |
| public boolean next() { |
| if (firstCall) { |
| boolean hasAny = false; |
| for (int i = 0; i < childSlots.size(); i++) { |
| hasAny |= this.slotRangesIterator.get(i).initialize(); |
| } |
| firstCall = false; |
| return hasAny; |
| } |
| int i = 0; |
| while (i < childSlots.size() && !slotRangesIterator.get(i).next()) { |
| i++; |
| } |
| for (i = 0; i < childSlots.size(); i++) { |
| if (!this.slotRangesIterator.get(i).isWrapped()) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private class SlotRangesIterator { |
| public int slotIndex; |
| public int rangeIndex; |
| public final KeySlots slots; |
| public boolean wrapped; |
| |
| public SlotRangesIterator(int slotsIndex) { |
| this.slots = childSlots.get(slotsIndex); |
| } |
| |
| public boolean isWrapped() { |
| return wrapped || !hasAny(); |
| } |
| |
| private boolean initialize() { |
| slotIndex = 0; |
| rangeIndex = 0; |
| while (slotIndex < slots.getSlots().size() |
| && (slots.getSlots().get(slotIndex) == null |
| || slots.getSlots().get(slotIndex).getKeyRanges().isEmpty() |
| || slots.getSlots().get(slotIndex).getPKPosition() != pkPos)) { |
| slotIndex++; |
| } |
| return hasAny(); |
| } |
| |
| private boolean hasAny() { |
| return slotIndex < slots.getSlots().size(); |
| } |
| |
| public KeySlot getSlot() { |
| if (!hasAny()) return null; |
| return slots.getSlots().get(slotIndex); |
| } |
| |
| public KeyRange getRange() { |
| if (!hasAny()) return null; |
| return getSlot().getKeyRanges().get(rangeIndex); |
| } |
| |
| public boolean next() { |
| if (!hasAny()) { |
| return false; |
| } |
| List<KeyRange> ranges = getSlot().getKeyRanges(); |
| if ((rangeIndex = (rangeIndex + 1) % ranges.size()) == 0) { |
| do { |
| if (((slotIndex = (slotIndex + 1) % slots.getSlots().size()) == 0)) { |
| initialize(); |
| wrapped = true; |
| return false; |
| } |
| } while (getSlot() == null || getSlot().getKeyRanges().isEmpty() || getSlot().getPKPosition() != pkPos); |
| } |
| |
| return true; |
| } |
| } |
| } |
| |
| /** |
| * Ands together an arbitrary set of compiled expressions (represented as a list of KeySlots) |
| * by intersecting each unique combination among the childSlots. |
| * @param andExpression expressions being anded together |
| * @param childSlots compiled form of child expressions being anded together. |
| * @return |
| */ |
| private KeySlots andKeySlots(AndExpression andExpression, List<KeySlots> childSlots) { |
| if(childSlots.isEmpty()) { |
| return null; |
| } |
| // Exit early if it's already been determined that one of the child slots cannot |
| // possibly be true. |
| boolean partialExtraction = andExpression.getChildren().size() != childSlots.size(); |
| int nChildSlots = childSlots.size(); |
| for (int i = 0; i < nChildSlots; i++) { |
| KeySlots childSlot = childSlots.get(i); |
| if (childSlot == EMPTY_KEY_SLOTS) { |
| return EMPTY_KEY_SLOTS; |
| } |
| // If any child slots represent partially extracted expressions, then carry |
| // that forward. An example of a partially extracted expression would be a |
| // RVC of (K1, K2, NonK3) in which only leading PK columns are extracted |
| // from the RVC. |
| partialExtraction |= childSlot.isPartialExtraction(); |
| } |
| boolean mayExtractNodes = true; |
| ImmutableBytesWritable ptr = context.getTempPtr(); |
| RowKeySchema rowKeySchema = table.getRowKeySchema(); |
| int nPkColumns = table.getPKColumns().size(); |
| KeySlot[] keySlotArray = new KeySlot[nPkColumns]; |
| int initPkPos = (table.getBucketNum() ==null ? 0 : 1) + (this.context.getConnection().getTenantId() != null && table.isMultiTenant() ? 1 : 0) + (table.getViewIndexId() == null ? 0 : 1); |
| |
| List<List<List<KeyRange[]>>> slotsTrailingRanges = Lists.newArrayListWithExpectedSize(nPkColumns); |
| // Process all columns being ANDed in position order to guarantee |
| // we have all information for leading PK columns before we attempt |
| // to intersect them. For example: |
| // (A, B, C) >= (1, 2, 3) AND (B, C) < (4, 5) AND A = 1 |
| // will processing slot 0 (i.e PK column A) across all children first, |
| // followed by slot 1 (PK column B), and finally slot 2 (C). This is |
| // done because we carry forward any constraints from preceding PK |
| // columns which may impact following PK columns. In the above example |
| // we'd carry forward that (B,C) >= (2,3) since we know that A is 1. |
| for (int pkPos = initPkPos; pkPos < nPkColumns; pkPos++) { |
| SlotsIterator iterator = new SlotsIterator(childSlots, pkPos); |
| OrderPreserving orderPreserving = null; |
| List<Expression> extractNodes = Lists.newArrayList(); |
| List<KeyRange> keyRanges = Lists.newArrayList(); |
| // This is the information carried forward as we process in PK order. |
| // It's parallel with the list of keyRanges. |
| List<KeyRange[]> trailingRangesList = Lists.<KeyRange[]>newArrayList(); |
| KeyRange result = null; |
| TrailingRangeIterator trailingRangeIterator = new TrailingRangeIterator(initPkPos, pkPos, slotsTrailingRanges); |
| // Iterate through all combinations (i.e. constraints) for the PK slot being processed. |
| // For example, with (A = 1 OR A = 2) AND (A,B) > (1,2) AND C = 3, we'd process the |
| // following two combinations: |
| // A=1,(A,B) > (1,2) |
| // A=2,(A,B) > (1,2) |
| // If we have no constraints for a PK, then we still must iterate through the information |
| // that may have been rolled up based on the processing of previous PK slots. For example, |
| // in the above ANDed expressions, we have no constraint on B, but we would end up with |
| // rolled up information based on the B part of the (A,B) constraint. |
| while (iterator.next() || (trailingRangeIterator.hasNext() && result != KeyRange.EMPTY_RANGE)) { |
| result = null; |
| KeyRange[] trailingRanges = newTrailingRange(); |
| for (int i = 0; i < nChildSlots && result != KeyRange.EMPTY_RANGE; i++) { |
| KeySlot slot = iterator.getSlot(i); |
| // Rollup the order preserving and concatenate the extracted expressions. |
| // Extracted expressions end up being removed from the AND expression at |
| // the top level call (pushKeyExpressionsToScan) with anything remaining |
| // ending up as a Filter (rather than contributing to the start/stop row |
| // of the scan. |
| if (slot != null) { |
| KeyRange otherRange = iterator.getRange(i); |
| KeyRange range = result; |
| if (slot.getOrderPreserving() != null) { |
| orderPreserving = slot.getOrderPreserving().combine(orderPreserving); |
| } |
| if (slot.getKeyPart().getExtractNodes() != null) { |
| extractNodes.addAll(slot.getKeyPart().getExtractNodes()); |
| } |
| // Keep a running intersection of the ranges we see. Note that the |
| // ranges are derived from constants appearing on the RHS of a comparison |
| // expression. For example, the expression A > 5 would produce a keyRange |
| // of (5, *) for slot 0 (assuming A is the leading PK column) If the result |
| // ends up as an empty key, that combination is ruled out. This is essentially |
| // doing constant reduction. |
| result = intersectRanges(pkPos, range, otherRange, trailingRanges); |
| } |
| } |
| |
| if (result != KeyRange.EMPTY_RANGE) { |
| Map<KeyRange,List<KeyRange[]>> results = Maps.newHashMap(); |
| trailingRangeIterator.init(); |
| // Process all constraints that have been rolled up from previous |
| // processing of PK slots. This occurs for RVCs which span PK slots |
| // in which the leading part of the RVC is determined to be equal |
| // to a constant on the RHS. |
| while (trailingRangeIterator.hasNext()) { |
| // Loop through all combinations of values for all previously |
| // calculated slots. |
| do { |
| // Loop through all combinations of range constraints for the |
| // current combinations of values. If no valid combinations |
| // are found, we can rule out the result. We can also end up |
| // modifying the result if it has an intersection with the |
| // range constraints. |
| do { |
| KeyRange priorTrailingRange = trailingRangeIterator.getRange(); |
| if (priorTrailingRange != KeyRange.EVERYTHING_RANGE) { |
| KeyRange[] intTrailingRanges = Arrays.copyOf(trailingRanges, trailingRanges.length); |
| // Intersect the current result with each range constraint. We essentially |
| // rule out the result when we find a constraint that has no intersection |
| KeyRange intResult = intersectRanges(pkPos, result, priorTrailingRange, intTrailingRanges); |
| if (intResult != KeyRange.EMPTY_RANGE) { |
| addResult(intResult, intTrailingRanges, results); |
| } |
| } |
| } while (trailingRangeIterator.nextTrailingRange()); |
| } while (trailingRangeIterator.nextRange()); |
| } |
| if (results.isEmpty() && result != null) { // No trailing range constraints |
| keyRanges.add(result); |
| trailingRangesList.add(trailingRanges); |
| } else { |
| mayExtractNodes &= results.size() <= 1; |
| for (Map.Entry<KeyRange,List<KeyRange[]>> entry : results.entrySet()) { |
| // Add same KeyRange with each KeyRange[] since the two lists are parallel |
| for (KeyRange[] trailingRange : entry.getValue()) { |
| keyRanges.add(entry.getKey()); |
| trailingRangesList.add(trailingRange); |
| } |
| } |
| } |
| } |
| } |
| |
| if (result == null && keyRanges.isEmpty()) { |
| slotsTrailingRanges.add(Collections.<List<KeyRange[]>>emptyList()); |
| } else { |
| // If we encountered a result for this slot and |
| // there are no ranges, this is the degenerate case. |
| if (keyRanges.isEmpty()) { |
| return EMPTY_KEY_SLOTS; |
| } |
| // Similar to KeyRange.coalesce(), except we must combine together |
| // any rolled up constraints (as a list of KeyRanges) for a |
| // particular value (as they're coalesced together). We maintain |
| // these KeyRange constraints as a parallel list between keyRanges |
| // and trailingRangesList. |
| keyRanges = coalesceKeyRangesAndTrailingRanges(keyRanges, trailingRangesList, slotsTrailingRanges); |
| int maxSpan = 1; |
| for (KeyRange aRange : keyRanges) { |
| int span = rowKeySchema.computeMaxSpan(pkPos, aRange, context.getTempPtr()); |
| if (span > maxSpan) { |
| maxSpan = span; |
| } |
| } |
| keySlotArray[pkPos] = new KeySlot( |
| new BaseKeyPart(table, table.getPKColumns().get(pkPos), mayExtractNodes ? extractNodes : Collections.<Expression>emptyList()), |
| pkPos, |
| maxSpan, |
| keyRanges, |
| orderPreserving); |
| } |
| } |
| |
| // Filters trailing part of RVC based on ranges from PK columns after the one we're |
| // currently processing that may overlap with this range. For example, with a PK |
| // columns A,B,C and a range of A from [(1,2,3) - (4,5,6)] and B from (6-*), we |
| // can filter the trailing part of the RVC for A, because the trailing part of |
| // the RVC (2,3)-(5,6) does not intersect with (6-*). By removing the trailing |
| // part of the RVC, we end up with a range of A from [1-4] and B from (6-*) which |
| // enables us to use a skip scan. |
| for (int i = 0; i < keySlotArray.length; i++) { |
| KeySlot keySlot = keySlotArray[i]; |
| if (keySlot == null) continue; |
| int pkSpan = keySlot.getPKSpan(); |
| int pkPos = keySlot.getPKPosition(); |
| boolean slotWasIntersected = false; |
| List<KeyRange> keyRanges = keySlot.getKeyRanges(); |
| List<KeyRange> slotTrimmedResults = Lists.newArrayListWithExpectedSize(keyRanges.size()); |
| for (KeyRange result : keyRanges) { |
| boolean resultWasIntersected = false; |
| Set<KeyRange> trimmedResults = Sets.newHashSetWithExpectedSize(keyRanges.size()); |
| for (int trailingPkPos = pkPos+1; trailingPkPos < pkPos+pkSpan && trailingPkPos < nPkColumns; trailingPkPos++) { |
| KeySlot nextKeySlot = keySlotArray[trailingPkPos]; |
| if (nextKeySlot == null) continue; |
| for (KeyRange trailingRange : nextKeySlot.getKeyRanges()) { |
| resultWasIntersected = true; |
| KeyRange intResult = intersectTrailing(result, pkPos, trailingRange, trailingPkPos); |
| if (intResult != KeyRange.EMPTY_RANGE) { |
| trimmedResults.add(intResult); |
| } |
| } |
| } |
| if (resultWasIntersected) { |
| slotWasIntersected = true; |
| slotTrimmedResults.addAll(trimmedResults); |
| mayExtractNodes &= trimmedResults.size() <= 1; |
| } else { |
| slotTrimmedResults.add(result); |
| } |
| } |
| if (slotTrimmedResults.isEmpty()) { |
| return EMPTY_KEY_SLOTS; |
| } |
| if (slotWasIntersected) { |
| // Re-coalesce the ranges and recalc the max span since the ranges may have changed |
| slotTrimmedResults = KeyRange.coalesce(slotTrimmedResults); |
| pkSpan = 1; |
| for (KeyRange trimmedResult : slotTrimmedResults) { |
| pkSpan = Math.max(pkSpan, rowKeySchema.computeMaxSpan(pkPos, trimmedResult, ptr)); |
| } |
| } |
| List<Expression> extractNodes = mayExtractNodes ? |
| keySlotArray[pkPos].getKeyPart().getExtractNodes() : Collections.<Expression>emptyList(); |
| keySlotArray[pkPos] = new KeySlot( |
| new BaseKeyPart(table, table.getPKColumns().get(pkPos), extractNodes), |
| pkPos, |
| pkSpan, |
| slotTrimmedResults, |
| keySlotArray[pkPos].getOrderPreserving()); |
| } |
| List<KeySlot> keySlots = Arrays.asList(keySlotArray); |
| // If we have a salt column, skip that slot because |
| // they'll never be an expression that uses it directly. |
| keySlots = keySlots.subList(initPkPos, keySlots.size()); |
| return new MultiKeySlot(keySlots, partialExtraction); |
| } |
| |
| private KeyRange[] newTrailingRange() { |
| KeyRange[] trailingRanges = new KeyRange[table.getPKColumns().size()]; |
| for (int i = 0; i < trailingRanges.length; i++) { |
| trailingRanges[i] = KeyRange.EVERYTHING_RANGE; |
| } |
| return trailingRanges; |
| } |
| |
| private static void addResult(KeyRange result, KeyRange[] trailingRange, Map<KeyRange,List<KeyRange[]>> results) { |
| List<KeyRange[]> trailingRanges = Lists.<KeyRange[]>newArrayList(trailingRange); |
| List<KeyRange[]> priorTrailingRanges = results.put(result, trailingRanges); |
| if (priorTrailingRanges != null) { |
| // This is tricky case. We may have multiple possible values based on the rolled up range |
| // constraints from previous slots. We track unique ranges and concatenate together the |
| // trailing range data. If there's more than one element in the set (i.e. more than one |
| // possible result), we'll end up have more combinations than there actually are because |
| // the constraint only apply for a single value, not for *all* combinations (which is a |
| // limitation of our representation derived from what can be handled by our SkipScanFilter). |
| // For example, if we we've gathered these ranges so far in a three PK table: (1,2), (A,B) |
| // and have X as a constraint for value A and Y as a constraint for value B, we have the |
| // following possible combinations: 1AX, 2AX, 1BY, 2BY. However, our SkipScanFilter only |
| // supports identifying combinations for *all* combinations of (1,2),(A,B),(X,Y) or |
| // AX, 1AY, 1BX, 1BY, 2AX, 2AY, 2BX, 2BY. See WhereOptimizerTest.testNotRepresentableBySkipScan() |
| // for an example. |
| trailingRanges.addAll(priorTrailingRanges); |
| } |
| } |
| |
| private List<KeyRange> coalesceKeyRangesAndTrailingRanges(List<KeyRange> keyRanges, |
| List<KeyRange[]> trailingRangesList, List<List<List<KeyRange[]>>> slotsTrailingRanges) { |
| List<Pair<KeyRange,List<KeyRange[]>>> pairs = coalesce(keyRanges, trailingRangesList); |
| List<List<KeyRange[]>> trailingRanges = Lists.newArrayListWithExpectedSize(pairs.size()); |
| List<KeyRange>coalescedKeyRanges = Lists.newArrayListWithExpectedSize(pairs.size()); |
| for (Pair<KeyRange,List<KeyRange[]>> pair : pairs) { |
| coalescedKeyRanges.add(pair.getFirst()); |
| trailingRanges.add(pair.getSecond()); |
| } |
| slotsTrailingRanges.add(trailingRanges); |
| return coalescedKeyRanges; |
| } |
| |
| public static final Comparator<Pair<KeyRange,List<KeyRange[]>>> KEY_RANGE_PAIR_COMPARATOR = new Comparator<Pair<KeyRange,List<KeyRange[]>>>() { |
| @Override public int compare(Pair<KeyRange,List<KeyRange[]>> o1, Pair<KeyRange,List<KeyRange[]>> o2) { |
| return KeyRange.COMPARATOR.compare(o1.getFirst(), o2.getFirst()); |
| } |
| }; |
| |
| private static boolean isEverythingRanges(KeyRange[] ranges) { |
| for (KeyRange range : ranges) { |
| if (range != KeyRange.EVERYTHING_RANGE) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private static List<KeyRange[]> concat(List<KeyRange[]> list1, List<KeyRange[]> list2) { |
| if (list1.size() == 1 && isEverythingRanges(list1.get(0))) { |
| if (list2.size() == 1 && isEverythingRanges(list1.get(0))) { |
| return Collections.emptyList(); |
| } |
| return list2; |
| } |
| if (list2.size() == 1 && isEverythingRanges(list2.get(0))) { |
| return list1; |
| } |
| |
| List<KeyRange[]> newList = Lists.<KeyRange[]>newArrayListWithExpectedSize(list1.size()+list2.size()); |
| newList.addAll(list1); |
| newList.addAll(list2); |
| return newList; |
| } |
| |
| /** |
| * Similar to KeyRange.coelesce, but con |
| */ |
| @NonNull |
| public static List<Pair<KeyRange,List<KeyRange[]>>> coalesce(List<KeyRange> keyRanges, List<KeyRange[]> trailingRangesList) { |
| List<Pair<KeyRange,List<KeyRange[]>>> tmp = Lists.newArrayListWithExpectedSize(keyRanges.size()); |
| int nKeyRanges = keyRanges.size(); |
| for (int i = 0; i < nKeyRanges; i++) { |
| KeyRange keyRange = keyRanges.get(i); |
| KeyRange[] trailingRange = trailingRangesList.get(i); |
| Pair<KeyRange,List<KeyRange[]>> pair = new Pair<KeyRange,List<KeyRange[]>>(keyRange,Lists.<KeyRange[]>newArrayList(trailingRange)); |
| tmp.add(pair); |
| } |
| Collections.sort(tmp, KEY_RANGE_PAIR_COMPARATOR); |
| List<Pair<KeyRange,List<KeyRange[]>>> tmp2 = Lists.<Pair<KeyRange,List<KeyRange[]>>>newArrayListWithExpectedSize(tmp.size()); |
| Pair<KeyRange,List<KeyRange[]>> range = tmp.get(0); |
| for (int i=1; i<tmp.size(); i++) { |
| Pair<KeyRange,List<KeyRange[]>> otherRange = tmp.get(i); |
| KeyRange intersect = range.getFirst().intersect(otherRange.getFirst()); |
| if (KeyRange.EMPTY_RANGE == intersect) { |
| tmp2.add(range); |
| range = otherRange; |
| } else { |
| KeyRange newRange = range.getFirst().union(otherRange.getFirst()); |
| range = new Pair<KeyRange,List<KeyRange[]>>(newRange,concat(range.getSecond(),otherRange.getSecond())); |
| } |
| } |
| tmp2.add(range); |
| List<Pair<KeyRange,List<KeyRange[]>>> tmp3 = Lists.<Pair<KeyRange,List<KeyRange[]>>>newArrayListWithExpectedSize(tmp2.size()); |
| range = tmp2.get(0); |
| for (int i=1; i<tmp2.size(); i++) { |
| Pair<KeyRange,List<KeyRange[]>> otherRange = tmp2.get(i); |
| assert !range.getFirst().upperUnbound(); |
| assert !otherRange.getFirst().lowerUnbound(); |
| if (range.getFirst().isUpperInclusive() != otherRange.getFirst().isLowerInclusive() |
| && Bytes.equals(range.getFirst().getUpperRange(), otherRange.getFirst().getLowerRange())) { |
| KeyRange newRange = KeyRange.getKeyRange( |
| range.getFirst().getLowerRange(), range.getFirst().isLowerInclusive(), |
| otherRange.getFirst().getUpperRange(), otherRange.getFirst().isUpperInclusive()); |
| range = new Pair<KeyRange,List<KeyRange[]>>(newRange,concat(range.getSecond(),otherRange.getSecond())); |
| } else { |
| tmp3.add(range); |
| range = otherRange; |
| } |
| } |
| tmp3.add(range); |
| |
| return tmp3; |
| } |
| |
| /** |
| * |
| * Iterates over all unique combinations of the List<KeyRange[]> representing |
| * the constraints from previous slot positions. For example, if we have |
| * a RVC of (A,B) = (2,1), then if A=2, we know that B must be 1. |
| * |
| */ |
| static class TrailingRangeIterator { |
| private final List<List<List<KeyRange[]>>> slotTrailingRangesList; |
| private final int[] rangePos; |
| private final int[] trailingRangePos; |
| private final int initPkPos; |
| private final int pkPos; |
| private int trailingRangePosIndex; |
| private int rangePosIndex; |
| private boolean hasMore = true; |
| |
| TrailingRangeIterator (int initPkPos, int pkPos, List<List<List<KeyRange[]>>> slotsTrailingRangesList) { |
| this.slotTrailingRangesList = slotsTrailingRangesList; |
| int nSlots = pkPos - initPkPos; |
| rangePos = new int[nSlots]; |
| trailingRangePos = new int[nSlots]; |
| this.initPkPos = initPkPos; |
| this.pkPos = pkPos; |
| init(); |
| } |
| |
| public void init() { |
| Arrays.fill(rangePos, 0); |
| Arrays.fill(trailingRangePos, 0); |
| rangePosIndex = rangePos.length - 1; |
| trailingRangePosIndex = trailingRangePos.length - 1; |
| this.hasMore = pkPos > initPkPos && skipEmpty(); |
| } |
| |
| public boolean hasNext() { |
| return hasMore && skipEmpty(); |
| } |
| |
| public KeyRange getRange() { |
| if (!hasMore) { |
| throw new NoSuchElementException(); |
| } |
| KeyRange priorTrailingRange = KeyRange.EVERYTHING_RANGE; |
| for (int priorPkPos = initPkPos; priorPkPos < pkPos; priorPkPos++) { |
| List<List<KeyRange[]>>trailingKeyRangesList = slotTrailingRangesList.get(priorPkPos-initPkPos); |
| if (!trailingKeyRangesList.isEmpty()) { |
| List<KeyRange[]> slotTrailingRanges = trailingKeyRangesList.get(rangePos[priorPkPos-initPkPos]); |
| if (!slotTrailingRanges.isEmpty()) { |
| KeyRange[] slotTrailingRange = slotTrailingRanges.get(trailingRangePos[priorPkPos-initPkPos]); |
| priorTrailingRange = priorTrailingRange.intersect(slotTrailingRange[pkPos]); |
| } |
| } |
| } |
| |
| return priorTrailingRange; |
| } |
| |
| private boolean skipEmptyTrailingRanges() { |
| while (trailingRangePosIndex >= 0 && |
| (slotTrailingRangesList.get(trailingRangePosIndex).isEmpty() |
| || slotTrailingRangesList.get(trailingRangePosIndex).get(rangePos[trailingRangePosIndex]).isEmpty())) { |
| trailingRangePosIndex--; |
| } |
| if (trailingRangePosIndex >= 0) { |
| return true; |
| } |
| return false; |
| } |
| |
| private boolean skipEmptyRanges() { |
| trailingRangePosIndex = trailingRangePos.length - 1; |
| while (rangePosIndex >= 0 && |
| (slotTrailingRangesList.get(rangePosIndex).isEmpty())) { |
| rangePosIndex--; |
| } |
| return rangePosIndex >= 0; |
| } |
| |
| private boolean skipEmpty() { |
| if (!hasMore || slotTrailingRangesList.isEmpty() || rangePosIndex < 0) { |
| return hasMore=false; |
| } |
| do { |
| if (skipEmptyTrailingRanges()) { |
| return true; |
| } |
| } while (skipEmptyRanges()); |
| return hasMore = rangePosIndex >= 0; |
| } |
| |
| public boolean nextRange() { |
| trailingRangePosIndex = trailingRangePos.length - 1; |
| while (rangePosIndex >= 0 && |
| (slotTrailingRangesList.get(rangePosIndex).isEmpty() |
| || (rangePos[rangePosIndex] = (rangePos[rangePosIndex] + 1) |
| % slotTrailingRangesList.get(rangePosIndex).size()) == 0)) { |
| rangePosIndex--; |
| } |
| return rangePosIndex >= 0; |
| } |
| |
| public boolean nextTrailingRange() { |
| while (trailingRangePosIndex >= 0 && |
| (slotTrailingRangesList.get(trailingRangePosIndex).isEmpty() |
| || slotTrailingRangesList.get(trailingRangePosIndex).get(rangePos[trailingRangePosIndex]).isEmpty() |
| || (trailingRangePos[trailingRangePosIndex] = (trailingRangePos[trailingRangePosIndex] + 1) |
| % slotTrailingRangesList.get(trailingRangePosIndex).get(rangePos[trailingRangePosIndex]).size()) == 0)) { |
| trailingRangePosIndex--; |
| } |
| if (trailingRangePosIndex >= 0) { |
| return true; |
| } |
| return false; |
| } |
| } |
| |
| private KeyRange intersectRanges(int pkPos, KeyRange range, KeyRange otherRange, KeyRange[] trailingRanges) { |
| // We need to initialize result to the other range rather than |
| // initializing it to EVERYTHING_RANGE to handle the IS NULL case. |
| // Otherwise EVERYTHING_RANGE intersected below with NULL_RANGE |
| // becomes an EMPTY_RANGE. |
| if (range == null) { |
| range = otherRange; |
| } |
| KeyRange result = range; |
| ImmutableBytesWritable ptr = context.getTempPtr(); |
| RowKeySchema rowKeySchema = table.getRowKeySchema(); |
| int minSpan = rowKeySchema.computeMinSpan(pkPos, result, ptr); |
| int otherMinSpan = rowKeySchema.computeMinSpan(pkPos, otherRange, ptr); |
| KeyRange otherClippedRange = otherRange; |
| KeyRange clippedRange = result; |
| if (minSpan != otherMinSpan && result != KeyRange.EVERYTHING_RANGE && otherRange != KeyRange.EVERYTHING_RANGE) { |
| if (otherMinSpan > minSpan) { |
| otherClippedRange = rowKeySchema.clipLeft(pkPos, otherRange, minSpan, ptr); |
| } else if (minSpan > otherMinSpan) { |
| clippedRange = rowKeySchema.clipLeft(pkPos, result, otherMinSpan, ptr); |
| } |
| } |
| |
| // intersect result with otherRange |
| result = clippedRange.intersect(otherClippedRange); |
| if (result == KeyRange.EMPTY_RANGE) { |
| return result; |
| } |
| if (minSpan != otherMinSpan) { |
| // If trailing ranges are of different spans, intersect them at the common |
| // span and add remaining part of range used to trailing ranges |
| // Without the special case for single key values, the trailing ranges |
| // code doesn't work correctly for WhereOptimizerTest.testMultiSlotTrailingIntersect() |
| if (result.isSingleKey() && !(range.isSingleKey() && otherRange.isSingleKey())) { |
| int trailingPkPos = pkPos + Math.min(minSpan, otherMinSpan); |
| KeyRange trailingRange = getTrailingRange(rowKeySchema, trailingPkPos, minSpan > otherMinSpan ? range : otherRange, result, ptr); |
| trailingRanges[trailingPkPos] = trailingRanges[trailingPkPos].intersect(trailingRange); |
| } else { |
| // Add back clipped part of range |
| if (otherMinSpan > minSpan) { |
| result = concatSuffix(result, otherRange); |
| } else if (minSpan > otherMinSpan) { |
| result = concatSuffix(result, range); |
| } |
| } |
| } |
| return result; |
| } |
| |
| private static KeyRange concatSuffix(KeyRange result, KeyRange otherRange) { |
| byte[] lowerRange = result.getLowerRange(); |
| byte[] clippedLowerRange = lowerRange; |
| byte[] fullLowerRange = otherRange.getLowerRange(); |
| if (!result.lowerUnbound() && Bytes.startsWith(fullLowerRange, clippedLowerRange)) { |
| lowerRange = fullLowerRange; |
| } |
| byte[] upperRange = result.getUpperRange(); |
| byte[] clippedUpperRange = upperRange; |
| byte[] fullUpperRange = otherRange.getUpperRange(); |
| if (!result.lowerUnbound() && Bytes.startsWith(fullUpperRange, clippedUpperRange)) { |
| upperRange = fullUpperRange; |
| } |
| if (lowerRange == clippedLowerRange && upperRange == clippedUpperRange) { |
| return result; |
| } |
| return KeyRange.getKeyRange(lowerRange, result.isLowerInclusive(), upperRange, result.isUpperInclusive()); |
| } |
| |
| /** |
| * Intersects an RVC that starts at pkPos with an overlapping range that starts at otherPKPos. |
| * For example, ((A, B) - (J, K)) intersected with (F - *) would return ((A,F) - (J, K)) |
| * ((A, B) - (J, K)) intersected with (M - P) would return (A-J) since both of the trailing |
| * part of the RVC, B and K, do not intersect with B and K. |
| * @param result an RVC expression starting from pkPos and with length of at least otherPKPos - pkPos. |
| * @param pkPos the PK position of the leading part of the RVC expression |
| * @param otherRange the other range to intersect with the overlapping part of the RVC. |
| * @param otherPKPos the PK position of the leading part of the other range |
| * @return resulting KeyRange from the intersection, potentially an empty range if the result RVC |
| * is a single key and the trailing part of the key does not intersect with the RVC. |
| */ |
| private KeyRange intersectTrailing(KeyRange result, int pkPos, KeyRange otherRange, int otherPKPos) { |
| RowKeySchema rowKeySchema = table.getRowKeySchema(); |
| ImmutableBytesWritable ptr = context.getTempPtr(); |
| int separatorLength = table.getPKColumns().get(otherPKPos-1).getDataType().isFixedWidth() ? 0 : 1; |
| boolean lowerInclusive = result.isLowerInclusive(); |
| byte[] lowerRange = result.getLowerRange(); |
| ptr.set(lowerRange); |
| // Position ptr at the point at which the two ranges overlap |
| if (rowKeySchema.position(ptr, pkPos, otherPKPos)) { |
| int lowerOffset = ptr.getOffset(); |
| // Increase the length of the ptr to include the entire trailing bytes |
| ptr.set(ptr.get(), lowerOffset, lowerRange.length - lowerOffset); |
| byte[] trailingBytes = ptr.copyBytes(); |
| |
| // Special case for single key since single keys of different span lengths |
| // will never overlap. We do not need to process both the lower and upper |
| // ranges since they are the same. |
| if (result.isSingleKey() && otherRange.isSingleKey()) { |
| // Find the span of the trailing bytes as it could be more than one. |
| // We need this to determine if the slot at the last position would |
| // have a separator byte (i.e. is variable length). |
| int pos = otherPKPos; |
| rowKeySchema.iterator(trailingBytes, ptr, otherPKPos); |
| while (rowKeySchema.next(ptr, pos, trailingBytes.length) != null) { |
| pos++; |
| } |
| byte[] otherLowerRange = otherRange.getLowerRange(); |
| boolean isFixedWidthAtEnd = table.getPKColumns().get(pos).getDataType().isFixedWidth(); |
| // If the otherRange starts with the overlapping trailing byte *and* we're comparing |
| // the entire key (i.e. not just a leading subset), then we have an intersection. |
| if (Bytes.startsWith(otherLowerRange, trailingBytes) && |
| (isFixedWidthAtEnd || |
| otherLowerRange.length == trailingBytes.length || |
| otherLowerRange[trailingBytes.length] == QueryConstants.SEPARATOR_BYTE)) { |
| return result; |
| } |
| // Otherwise, there is no overlap |
| return KeyRange.EMPTY_RANGE; |
| } |
| // If we're not dealing with single keys, then we can use our normal intersection |
| if (otherRange.intersect(KeyRange.getKeyRange(trailingBytes)) == KeyRange.EMPTY_RANGE) { |
| // Exit early since the upper range is the same as the lower range |
| if (result.isSingleKey()) { |
| return KeyRange.EMPTY_RANGE; |
| } |
| ptr.set(result.getLowerRange(), 0, lowerOffset - separatorLength); |
| lowerRange = ptr.copyBytes(); |
| } |
| } |
| boolean upperInclusive = result.isUpperInclusive(); |
| byte[] upperRange = result.getUpperRange(); |
| ptr.set(upperRange); |
| if (rowKeySchema.position(ptr, pkPos, otherPKPos)) { |
| int upperOffset = ptr.getOffset(); |
| ptr.set(ptr.get(), upperOffset, upperRange.length - upperOffset); |
| if (otherRange.intersect(KeyRange.getKeyRange(ptr.copyBytes())) == KeyRange.EMPTY_RANGE) { |
| ptr.set(ptr.get(), 0, upperOffset - separatorLength); |
| upperRange = ptr.copyBytes(); |
| } |
| } |
| if (lowerRange == result.getLowerRange() && upperRange == result.getUpperRange()) { |
| return result; |
| } |
| KeyRange range = KeyRange.getKeyRange(lowerRange, lowerInclusive, upperRange, upperInclusive); |
| return range; |
| } |
| |
| private KeySlots orKeySlots(OrExpression orExpression, List<KeySlots> childSlots) { |
| // If any children were filtered out, filter out the entire |
| // OR expression because we don't have enough information to |
| // constraint the scan start/stop key. An example would be: |
| // WHERE organization_id=? OR key_value_column = 'x' |
| // In this case, we cannot simply filter the key_value_column, |
| // because we end up bubbling up only the organization_id=? |
| // expression to form the start/stop key which is obviously wrong. |
| // For an OR expression, you need to be able to extract |
| // everything or nothing. |
| if (orExpression.getChildren().size() != childSlots.size()) { |
| return null; |
| } |
| int initialPos = (table.getBucketNum() ==null ? 0 : 1) + (this.context.getConnection().getTenantId() != null && table.isMultiTenant() ? 1 : 0) + (table.getViewIndexId() == null ? 0 : 1); |
| KeySlot theSlot = null; |
| List<Expression> slotExtractNodes = Lists.<Expression>newArrayList(); |
| int thePosition = -1; |
| boolean partialExtraction = false; |
| // TODO: Have separate list for single span versus multi span |
| // For multi-span, we only need to keep a single range. |
| List<KeyRange> slotRanges = Lists.newArrayList(); |
| for (KeySlots childSlot : childSlots) { |
| if (childSlot == EMPTY_KEY_SLOTS) { |
| // TODO: can this ever happen and can we safely filter the expression tree? |
| continue; |
| } |
| // When we OR together expressions, we can only extract the entire OR expression |
| // if all sub-expressions have been completely extracted. Otherwise, we must |
| // leave the OR as a post filter. |
| partialExtraction |= childSlot.isPartialExtraction(); |
| // TODO: Do the same optimization that we do for IN if the childSlots specify a fully qualified row key |
| for (KeySlot slot : childSlot.getSlots()) { |
| if (slot == null) { |
| continue; |
| } |
| /* |
| * If we see a different PK column than before, we can't |
| * optimize it because our SkipScanFilter only handles |
| * top level expressions that are ANDed together (where in |
| * the same column expressions may be ORed together). |
| * For example, WHERE a=1 OR b=2 cannot be handled, while |
| * WHERE (a=1 OR a=2) AND (b=2 OR b=3) can be handled. |
| * TODO: We could potentially handle these cases through |
| * multiple, nested SkipScanFilters, where each OR expression |
| * is handled by its own SkipScanFilter and the outer one |
| * increments the child ones and picks the one with the smallest |
| * key. |
| */ |
| if (thePosition == -1) { |
| theSlot = slot; |
| thePosition = slot.getPKPosition(); |
| } else if (thePosition != slot.getPKPosition()) { |
| return null; |
| } |
| slotExtractNodes.addAll(slot.getKeyPart().getExtractNodes()); |
| slotRanges.addAll(slot.getKeyRanges()); |
| } |
| } |
| |
| if (thePosition == -1) { |
| return null; |
| } |
| if (theSlot == null) { |
| theSlot = new KeySlot(new BaseKeyPart(table, table.getPKColumns().get(initialPos), slotExtractNodes), initialPos, 1, EVERYTHING_RANGES, null); |
| } |
| return newKeyParts( |
| theSlot, |
| partialExtraction ? slotExtractNodes : Collections.<Expression>singletonList(orExpression), |
| slotRanges.isEmpty() ? EVERYTHING_RANGES : KeyRange.coalesce(slotRanges)); |
| } |
| |
| private final PTable table; |
| private final StatementContext context; |
| |
| public KeyExpressionVisitor(StatementContext context, PTable table) { |
| this.context = context; |
| this.table = table; |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(CoerceExpression node) { |
| return node.getChildren().iterator(); |
| } |
| |
| @Override |
| public KeySlots visitLeave(CoerceExpression node, List<KeySlots> childParts) { |
| if (childParts.isEmpty()) { |
| return null; |
| } |
| return newCoerceKeyPart(childParts.get(0).getSlots().get(0), node); |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(AndExpression node) { |
| return node.getChildren().iterator(); |
| } |
| |
| @Override |
| public KeySlots visitLeave(AndExpression node, List<KeySlots> l) { |
| KeySlots keyExpr = andKeySlots(node, l); |
| return keyExpr; |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(OrExpression node) { |
| return node.getChildren().iterator(); |
| } |
| |
| @Override |
| public KeySlots visitLeave(OrExpression node, List<KeySlots> l) { |
| KeySlots keySlots = orKeySlots(node, l); |
| if (keySlots == null) { |
| return null; |
| } |
| return keySlots; |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(RowValueConstructorExpression node) { |
| return node.getChildren().iterator(); |
| } |
| |
| @Override |
| public KeySlots visitLeave(RowValueConstructorExpression node, List<KeySlots> childSlots) { |
| return newRowValueConstructorKeyParts(node, childSlots); |
| } |
| |
| @Override |
| public KeySlots visit(RowKeyColumnExpression node) { |
| PColumn column = table.getPKColumns().get(node.getPosition()); |
| return new SingleKeySlot(new BaseKeyPart(table, column, Collections.<Expression>singletonList(node)), node.getPosition(), 1, EVERYTHING_RANGES); |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(ComparisonExpression node) { |
| Expression rhs = node.getChildren().get(1); |
| if (!rhs.isStateless() || node.getFilterOp() == CompareOp.NOT_EQUAL) { |
| return Collections.emptyIterator(); |
| } |
| return Iterators.singletonIterator(node.getChildren().get(0)); |
| } |
| |
| @Override |
| public KeySlots visitLeave(ComparisonExpression node, List<KeySlots> childParts) { |
| // Delay adding to extractedNodes, until we're done traversing, |
| // since we can't yet tell whether or not the PK column references |
| // are contiguous |
| if (childParts.isEmpty()) { |
| return null; |
| } |
| Expression rhs = node.getChildren().get(1); |
| KeySlots childSlots = childParts.get(0); |
| KeySlot childSlot = childSlots.getSlots().get(0); |
| KeyPart childPart = childSlot.getKeyPart(); |
| //SortOrder sortOrder = childPart.getColumn().getSortOrder(); |
| CompareOp op = node.getFilterOp(); |
| //CompareOp op = sortOrder.transform(node.getFilterOp()); |
| KeyRange keyRange = childPart.getKeyRange(op, rhs); |
| return newKeyParts(childSlot, node, keyRange); |
| } |
| |
| // TODO: consider supporting expression substitution in the PK for pre-joined tables |
| // You'd need to register the expression for a given PK and substitute with a column |
| // reference for this during ExpressionBuilder. |
| @Override |
| public Iterator<Expression> visitEnter(ScalarFunction node) { |
| int index = node.getKeyFormationTraversalIndex(); |
| if (index < 0) { |
| return Collections.emptyIterator(); |
| } |
| return Iterators.singletonIterator(node.getChildren().get(index)); |
| } |
| |
| @Override |
| public KeySlots visitLeave(ScalarFunction node, List<KeySlots> childParts) { |
| if (childParts.isEmpty()) { |
| return null; |
| } |
| return newScalarFunctionKeyPart(childParts.get(0).getSlots().get(0), node); |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(LikeExpression node) { |
| // TODO: can we optimize something that starts with '_' like this: foo LIKE '_a%' ? |
| if (node.getLikeType() == LikeType.CASE_INSENSITIVE || // TODO: remove this when we optimize ILIKE |
| ! (node.getChildren().get(1) instanceof LiteralExpression) || node.startsWithWildcard()) { |
| return Collections.emptyIterator(); |
| } |
| |
| return Iterators.singletonIterator(node.getChildren().get(0)); |
| } |
| |
| @Override |
| public KeySlots visitLeave(LikeExpression node, List<KeySlots> childParts) { |
| // TODO: optimize ILIKE by creating two ranges for the literal prefix: one with lower case, one with upper case |
| if (childParts.isEmpty()) { |
| return null; |
| } |
| // for SUBSTR(<column>,1,3) LIKE 'foo%' |
| KeySlots childSlots = childParts.get(0); |
| KeySlot childSlot = childSlots.getSlots().get(0); |
| final String startsWith = node.getLiteralPrefix(); |
| SortOrder sortOrder = node.getChildren().get(0).getSortOrder(); |
| byte[] key = PVarchar.INSTANCE.toBytes(startsWith, sortOrder); |
| // If the expression is an equality expression against a fixed length column |
| // and the key length doesn't match the column length, the expression can |
| // never be true. |
| // An zero length byte literal is null which can never be compared against as true |
| Expression firstChild = node.getChildren().get(0); |
| Integer childNodeFixedLength = firstChild.getDataType().isFixedWidth() ? firstChild.getMaxLength() : null; |
| if (childNodeFixedLength != null && key.length > childNodeFixedLength) { |
| return EMPTY_KEY_SLOTS; |
| } |
| // TODO: is there a case where we'd need to go through the childPart to calculate the key range? |
| PColumn column = childSlot.getKeyPart().getColumn(); |
| PDataType type = column.getDataType(); |
| byte[] lowerRange = key; |
| byte[] upperRange = ByteUtil.nextKey(key); |
| Integer columnFixedLength = column.getMaxLength(); |
| if (type.isFixedWidth()) { |
| if (columnFixedLength != null) { // Sanity check - should always be non null |
| // Always use minimum byte to fill as otherwise our key is bigger |
| // that it should be when the sort order is descending. |
| lowerRange = type.pad(lowerRange, columnFixedLength, SortOrder.ASC); |
| upperRange = type.pad(upperRange, columnFixedLength, SortOrder.ASC); |
| } |
| } else if (column.getSortOrder() == SortOrder.DESC && table.rowKeyOrderOptimizable()) { |
| // Append a zero byte if descending since a \xFF byte will be appended to the lowerRange |
| // causing rows to be skipped that should be included. For example, with rows 'ab', 'a', |
| // a lowerRange of 'a\xFF' would skip 'ab', while 'a\x00\xFF' would not. |
| lowerRange = Arrays.copyOf(lowerRange, lowerRange.length+1); |
| lowerRange[lowerRange.length-1] = QueryConstants.SEPARATOR_BYTE; |
| } |
| KeyRange range = type.getKeyRange(lowerRange, true, upperRange, false); |
| if (column.getSortOrder() == SortOrder.DESC) { |
| range = range.invert(); |
| } |
| // Only extract LIKE expression if pattern ends with a wildcard and everything else was extracted |
| return newKeyParts(childSlot, node.endsWithOnlyWildcard() ? node : null, range); |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(InListExpression node) { |
| return Iterators.singletonIterator(node.getChildren().get(0)); |
| } |
| |
| @Override |
| public KeySlots visitLeave(InListExpression node, List<KeySlots> childParts) { |
| if (childParts.isEmpty()) { |
| return null; |
| } |
| |
| List<Expression> keyExpressions = node.getKeyExpressions(); |
| Set<KeyRange> ranges = Sets.newHashSetWithExpectedSize(keyExpressions.size()); |
| KeySlot childSlot = childParts.get(0).getSlots().get(0); |
| KeyPart childPart = childSlot.getKeyPart(); |
| // Handles cases like WHERE substr(foo,1,3) IN ('aaa','bbb') |
| for (Expression key : keyExpressions) { |
| KeyRange range = childPart.getKeyRange(CompareOp.EQUAL, key); |
| if (range == null) { |
| return null; |
| } |
| if (range != KeyRange.EMPTY_RANGE) { // null means it can't possibly be in range |
| ranges.add(range); |
| } |
| } |
| return newKeyParts(childSlot, node, new ArrayList<KeyRange>(ranges)); |
| } |
| |
| @Override |
| public Iterator<Expression> visitEnter(IsNullExpression node) { |
| return Iterators.singletonIterator(node.getChildren().get(0)); |
| } |
| |
| @Override |
| public KeySlots visitLeave(IsNullExpression node, List<KeySlots> childParts) { |
| if (childParts.isEmpty()) { |
| return null; |
| } |
| KeySlots childSlots = childParts.get(0); |
| KeySlot childSlot = childSlots.getSlots().get(0); |
| PColumn column = childSlot.getKeyPart().getColumn(); |
| PDataType type = column.getDataType(); |
| boolean isFixedWidth = type.isFixedWidth(); |
| // Nothing changes for IS NULL and IS NOT NULL when DESC since |
| // we represent NULL the same way for ASC and DESC |
| if (isFixedWidth) { // if column can't be null |
| return node.isNegate() ? null : |
| newKeyParts(childSlot, node, type.getKeyRange(new byte[SchemaUtil.getFixedByteSize(column)], true, |
| KeyRange.UNBOUND, true)); |
| } else { |
| KeyRange keyRange = node.isNegate() ? KeyRange.IS_NOT_NULL_RANGE : KeyRange.IS_NULL_RANGE; |
| return newKeyParts(childSlot, node, keyRange); |
| } |
| } |
| |
| /** |
| * |
| * Top level data structure used to drive the formation |
| * of the start/stop row of scans, essentially taking the |
| * expression tree of a WHERE clause and producing the |
| * ScanRanges instance during query compilation. |
| * |
| */ |
| public static interface KeySlots { |
| |
| /** |
| * List of slots that store binding of constant values |
| * for primary key columns. For example: |
| * WHERE pk1 = 'foo' and pk2 = 'bar' |
| * would produce two KeySlot instances that store that |
| * pk1 = 'foo' and pk2 = 'bar'. |
| * @return |
| */ |
| public List<KeySlot> getSlots(); |
| /** |
| * Tracks whether or not the contained KeySlot(s) contain |
| * a slot that includes only a partial extraction of the |
| * involved expressions. For example: (A AND B) in the case |
| * of A being a PK column and B being a KV column, the |
| * KeySlots representing the AND would return true for |
| * isPartialExtraction. |
| * @return true if a partial expression extraction was |
| * done and false otherwise. |
| */ |
| public boolean isPartialExtraction(); |
| } |
| |
| /** |
| * |
| * Used during query compilation to represent the constant value of a |
| * primary key column based on expressions in the WHERE clause. These |
| * are combined together during the compilation of ANDs and ORs to |
| * to produce the start and stop scan range. |
| * |
| */ |
| static final class KeySlot { |
| private final int pkPosition; // Position in primary key |
| private final int pkSpan; // Will be > 1 for RVC |
| private final KeyPart keyPart; // Used to produce the KeyRanges below |
| // Multiple ranges means values that have been ORed together |
| private final List<KeyRange> keyRanges; |
| // If order rows returned from scan will match desired order declared in query |
| private final OrderPreserving orderPreserving; |
| |
| KeySlot(KeyPart keyPart, int pkPosition, int pkSpan, List<KeyRange> keyRanges, OrderPreserving orderPreserving) { |
| this.pkPosition = pkPosition; |
| this.pkSpan = pkSpan; |
| this.keyPart = keyPart; |
| this.keyRanges = keyRanges; |
| this.orderPreserving = orderPreserving; |
| } |
| |
| public KeyPart getKeyPart() { |
| return keyPart; |
| } |
| |
| public int getPKPosition() { |
| return pkPosition; |
| } |
| |
| public int getPKSpan() { |
| return pkSpan; |
| } |
| |
| public List<KeyRange> getKeyRanges() { |
| return keyRanges; |
| } |
| |
| public final KeySlot concatExtractNodes(List<Expression> extractNodes) { |
| return new KeySlot( |
| new BaseKeyPart(this.getKeyPart().getTable(), this.getKeyPart().getColumn(), |
| SchemaUtil.concat(this.getKeyPart().getExtractNodes(),extractNodes)), |
| this.getPKPosition(), |
| this.getPKSpan(), |
| this.getKeyRanges(), |
| this.getOrderPreserving()); |
| } |
| |
| public OrderPreserving getOrderPreserving() { |
| return orderPreserving; |
| } |
| } |
| |
| /** |
| * |
| * Implementation of KeySlots for AND and OR expressions. The |
| * List<KeySlot> will be in PK order. |
| * |
| */ |
| public static class MultiKeySlot implements KeySlots { |
| private final List<KeySlot> childSlots; |
| private final boolean partialExtraction; |
| |
| private MultiKeySlot(List<KeySlot> childSlots, boolean partialExtraction) { |
| this.childSlots = childSlots; |
| this.partialExtraction = partialExtraction; |
| } |
| |
| @Override |
| public List<KeySlot> getSlots() { |
| return childSlots; |
| } |
| |
| @Override |
| public boolean isPartialExtraction() { |
| return partialExtraction; |
| } |
| } |
| |
| /** |
| * |
| * Implementation of KeySlots for a constant value, |
| * |
| */ |
| public static class SingleKeySlot implements KeySlots { |
| private final List<KeySlot> slots; |
| |
| SingleKeySlot(KeyPart part, int pkPosition, List<KeyRange> ranges) { |
| this(part, pkPosition, 1, ranges); |
| } |
| |
| private SingleKeySlot(KeyPart part, int pkPosition, List<KeyRange> ranges, OrderPreserving orderPreserving) { |
| this(part, pkPosition, 1, ranges, orderPreserving); |
| } |
| |
| private SingleKeySlot(KeyPart part, int pkPosition, int pkSpan, List<KeyRange> ranges) { |
| this(part,pkPosition,pkSpan,ranges, null); |
| } |
| |
| private SingleKeySlot(KeyPart part, int pkPosition, int pkSpan, List<KeyRange> ranges, OrderPreserving orderPreserving) { |
| this.slots = Collections.singletonList(new KeySlot(part, pkPosition, pkSpan, ranges, orderPreserving)); |
| } |
| |
| @Override |
| public List<KeySlot> getSlots() { |
| return slots; |
| } |
| |
| @Override |
| public boolean isPartialExtraction() { |
| return this.slots.get(0).getKeyPart().getExtractNodes().isEmpty(); |
| } |
| |
| } |
| |
| public static class BaseKeyPart implements KeyPart { |
| @Override |
| public KeyRange getKeyRange(CompareOp op, Expression rhs) { |
| ImmutableBytesWritable ptr = new ImmutableBytesWritable(); |
| rhs.evaluate(null, ptr); |
| // If the column is fixed width, fill is up to it's byte size |
| PDataType type = getColumn().getDataType(); |
| if (type.isFixedWidth()) { |
| Integer length = getColumn().getMaxLength(); |
| if (length != null) { |
| // Go through type to pad as the fill character depends on the type. |
| type.pad(ptr, length, SortOrder.ASC); |
| } |
| } |
| byte[] key = ByteUtil.copyKeyBytesIfNecessary(ptr); |
| KeyRange range = ByteUtil.getKeyRange(key, rhs.getSortOrder().transform(op)/*op*/, type); |
| // Constants will have been inverted, so we invert them back here so that |
| // RVC comparisons work correctly (see PHOENIX-3383). |
| if (rhs.getSortOrder() == SortOrder.DESC) { |
| range = range.invert(); |
| } |
| return range; |
| } |
| |
| private final PTable table; |
| private final PColumn column; |
| private final List<Expression> nodes; |
| |
| private BaseKeyPart(PTable table, PColumn column, List<Expression> nodes) { |
| this.table = table; |
| this.column = column; |
| this.nodes = nodes; |
| } |
| |
| @Override |
| public List<Expression> getExtractNodes() { |
| return nodes; |
| } |
| |
| @Override |
| public PColumn getColumn() { |
| return column; |
| } |
| |
| @Override |
| public PTable getTable() { |
| return table; |
| } |
| } |
| |
| private static class CoerceKeySlot implements KeyPart { |
| |
| private final KeyPart childPart; |
| private final ImmutableBytesWritable ptr; |
| private final CoerceExpression node; |
| private final List<Expression> extractNodes; |
| |
| public CoerceKeySlot(KeyPart childPart, ImmutableBytesWritable ptr, |
| CoerceExpression node, List<Expression> extractNodes) { |
| this.childPart = childPart; |
| this.ptr = ptr; |
| this.node = node; |
| this.extractNodes = extractNodes; |
| } |
| |
| @Override |
| public KeyRange getKeyRange(CompareOp op, Expression rhs) { |
| KeyRange range = childPart.getKeyRange(op, rhs); |
| byte[] lower = range.getLowerRange(); |
| if (!range.lowerUnbound()) { |
| ptr.set(lower); |
| /*** |
| Do the reverse translation so we can optimize out the coerce expression |
| For the actual type of the coerceBytes call, we use the node type instead of |
| the rhs type, because for IN, the rhs type will be VARBINARY and no coerce |
| will be done in that case (and we need it to be done). |
| */ |
| node.getChild().getDataType().coerceBytes(ptr, node.getDataType(), |
| rhs.getSortOrder(), SortOrder.ASC); |
| lower = ByteUtil.copyKeyBytesIfNecessary(ptr); |
| } |
| byte[] upper = range.getUpperRange(); |
| if (!range.upperUnbound()) { |
| ptr.set(upper); |
| // Do the reverse translation so we can optimize out the coerce expression |
| node.getChild().getDataType().coerceBytes(ptr, node.getDataType(), |
| rhs.getSortOrder(), SortOrder.ASC); |
| upper = ByteUtil.copyKeyBytesIfNecessary(ptr); |
| } |
| range = KeyRange.getKeyRange(lower, range.isLowerInclusive(), upper, |
| range.isUpperInclusive()); |
| return range; |
| } |
| |
| @Override |
| public List<Expression> getExtractNodes() { |
| return extractNodes; |
| } |
| |
| @Override |
| public PColumn getColumn() { |
| return childPart.getColumn(); |
| } |
| |
| @Override |
| public PTable getTable() { |
| return childPart.getTable(); |
| } |
| } |
| |
| private class RowValueConstructorKeyPart implements KeyPart { |
| private final RowValueConstructorExpression rvc; |
| private final PColumn column; |
| private final List<Expression> nodes; |
| private final List<KeySlots> childSlots; |
| |
| private RowValueConstructorKeyPart(PColumn column, RowValueConstructorExpression rvc, int span, List<KeySlots> childSlots) { |
| this.column = column; |
| if (span == rvc.getChildren().size()) { |
| this.rvc = rvc; |
| this.nodes = Collections.<Expression>singletonList(rvc); |
| this.childSlots = childSlots; |
| } else { |
| this.rvc = new RowValueConstructorExpression(rvc.getChildren().subList(0, span),rvc.isStateless()); |
| this.nodes = Collections.<Expression>emptyList(); |
| this.childSlots = childSlots.subList(0, span); |
| } |
| } |
| |
| @Override |
| public List<Expression> getExtractNodes() { |
| return nodes; |
| } |
| |
| @Override |
| public PColumn getColumn() { |
| return column; |
| } |
| |
| @Override |
| public PTable getTable() { |
| return table; |
| } |
| |
| @Override |
| public KeyRange getKeyRange(CompareOp op, Expression rhs) { |
| // With row value constructors, we need to convert the operator for any transformation we do on individual values |
| // to prevent keys from being increased to the next key as would be done for fixed width values. The next key is |
| // done to compensate for the start key (lower range) always being inclusive (thus we convert > to >=) and the |
| // end key (upper range) always being exclusive (thus we convert <= to <). |
| boolean usedAllOfLHS = !nodes.isEmpty(); |
| final CompareOp rvcElementOp = op == CompareOp.LESS_OR_EQUAL ? CompareOp.LESS : op == CompareOp.GREATER ? CompareOp.GREATER_OR_EQUAL : op; |
| if (op != CompareOp.EQUAL) { |
| // We need to transform the comparison operator for a LHS row value constructor |
| // that is shorter than a RHS row value constructor when we're extracting it. |
| // For example: a < (1,2) is true if a = 1, so we need to switch |
| // the compare op to <= like this: a <= 1. Since we strip trailing nulls |
| // in the rvc, we don't need to worry about the a < (1,null) case. |
| if (usedAllOfLHS) { |
| if (rvc.getChildren().size() < rhs.getChildren().size()) { |
| if (op == CompareOp.LESS) { |
| op = CompareOp.LESS_OR_EQUAL; |
| } else if (op == CompareOp.GREATER_OR_EQUAL) { |
| op = CompareOp.GREATER; |
| } |
| } |
| } else { |
| // If we're not using all of the LHS, we need to expand the range on either |
| // side to take into account the rest of the LHS. For example: |
| // WHERE (pk1, pk3) > ('a',1) AND pk1 = 'a'. In this case, we'll end up |
| // only using (pk1) and ('a'), so if we use a > operator the expression |
| // would end up as degenerate since we'd have a non inclusive range for |
| // ('a'). By switching the operator to extend the range, we end up with |
| // an ('a') inclusive range which is correct. |
| if (rvc.getChildren().size() < rhs.getChildren().size()) { |
| if (op == CompareOp.LESS) { |
| op = CompareOp.LESS_OR_EQUAL; |
| } else if (op == CompareOp.GREATER) { |
| op = CompareOp.GREATER_OR_EQUAL; |
| } |
| } |
| } |
| } |
| if (!usedAllOfLHS || rvc.getChildren().size() != rhs.getChildren().size()) { |
| // We know that rhs was converted to a row value constructor and that it's a constant |
| rhs= new RowValueConstructorExpression(rhs.getChildren().subList(0, Math.min(rvc.getChildren().size(), rhs.getChildren().size())), rhs.isStateless()); |
| } |
| /* |
| * Recursively transform the RHS row value constructor by applying the same logic as |
| * is done elsewhere during WHERE optimization: optimizing out LHS functions by applying |
| * the appropriate transformation to the RHS key. |
| */ |
| // Child slot iterator parallel with child expressions of the LHS row value constructor |
| final Iterator<KeySlots> keySlotsIterator = childSlots.iterator(); |
| try { |
| // Call our static row value expression constructor with the current LHS row value constructor and |
| // the current RHS (which has already been coerced to match the LHS expression). We pass through an |
| // implementation of ExpressionComparabilityWrapper that transforms the RHS key to match the row key |
| // structure of the LHS column. This is essentially optimizing out the expressions on the LHS by |
| // applying the appropriate transformations to the RHS (through the KeyPart#getKeyRange method). |
| // For example, with WHERE (invert(a),b) < ('abc',5), the 'abc' would be inverted by going through the |
| // childPart.getKeyRange defined for the invert function. |
| rhs = BaseExpression.coerce(rvc, rhs, new ExpressionComparabilityWrapper() { |
| |
| @Override |
| public Expression wrap(final Expression lhs, final Expression rhs, boolean rowKeyOrderOptimizable) throws SQLException { |
| final KeyPart childPart = keySlotsIterator.next().getSlots().get(0).getKeyPart(); |
| // TODO: DelegateExpression |
| return new BaseTerminalExpressionWrap(childPart, rhs, rvcElementOp, |
| lhs); |
| } |
| |
| }, table.rowKeyOrderOptimizable()); |
| } catch (SQLException e) { |
| return null; // Shouldn't happen |
| } |
| ImmutableBytesWritable ptr = context.getTempPtr(); |
| if (!rhs.evaluate(null, ptr)) { // Don't return if evaluated to null |
| return null; |
| } |
| byte[] key = ByteUtil.copyKeyBytesIfNecessary(ptr); |
| KeyRange range = ByteUtil.getKeyRange(key, /*rvc.getChildren().get(rhs.getChildren().size()-1).getSortOrder().transform(op)*/op, PVarbinary.INSTANCE); |
| return range; |
| } |
| |
| private class BaseTerminalExpressionWrap extends BaseTerminalExpression { |
| private final KeyPart childPart; |
| private final Expression rhs; |
| private final CompareOp rvcElementOp; |
| private final Expression lhs; |
| |
| public BaseTerminalExpressionWrap(KeyPart childPart, Expression rhs, |
| CompareOp rvcElementOp, Expression lhs) { |
| this.childPart = childPart; |
| this.rhs = rhs; |
| this.rvcElementOp = rvcElementOp; |
| this.lhs = lhs; |
| } |
| |
| @Override |
| public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) { |
| if (childPart == null) { |
| return rhs.evaluate(tuple, ptr); |
| } |
| if (!rhs.evaluate(tuple, ptr)) { |
| return false; |
| } |
| if (ptr.getLength() == 0) { |
| ptr.set(ByteUtil.EMPTY_BYTE_ARRAY); |
| return true; |
| } |
| // The op used to compute rvcElementOp did not take into account the sort order, |
| // and thus we need to transform it here before delegating to the child part |
| // which will do the required inversion. |
| KeyRange range = childPart.getKeyRange( |
| rhs.getSortOrder().transform(rvcElementOp), rhs); |
| // Swap the upper and lower range if descending to compensate for the transform |
| // we did above of the rvcElementOp. |
| if (rhs.getSortOrder() == SortOrder.DESC) { |
| range = KeyRange.getKeyRange(range.getUpperRange(), |
| range.isUpperInclusive(), range.getLowerRange(), |
| range.isLowerInclusive()); |
| } |
| // This can happen when an EQUAL operator is used and the expression cannot |
| // possibly match. |
| if (range == KeyRange.EMPTY_RANGE) { |
| return false; |
| } |
| /** |
| We have to take the range and condense it down to a single key. We use which |
| ever part of the range is inclusive (which implies being bound as well). This |
| works in all cases, including this substring one, which produces a lower |
| inclusive range and an upper non inclusive range. |
| (a, substr(b,1,1)) IN (('a','b'), ('c','d')) |
| */ |
| byte[] key = range.isLowerInclusive() ? |
| range.getLowerRange() : range.getUpperRange(); |
| /** |
| FIXME: |
| this is kind of a hack. The above call will fill a fixed width key,but |
| we don't want to fill the key yet because it can throw off our the logic we |
| use to compute the next key when we evaluate the RHS row value constructor |
| below. We could create a new childPart with a delegate column that returns |
| null for getByteSize(). |
| */ |
| if (lhs.getDataType().isFixedWidth() && |
| lhs.getMaxLength() != null && key.length > lhs.getMaxLength()) { |
| // Don't use PDataType.pad(), as this only grows the value, |
| // while this is shrinking it. |
| key = Arrays.copyOf(key, lhs.getMaxLength()); |
| } |
| ptr.set(key); |
| return true; |
| } |
| |
| @Override |
| public PDataType getDataType() { |
| return childPart.getColumn().getDataType(); |
| } |
| |
| @Override |
| public boolean isNullable() { |
| return childPart.getColumn().isNullable(); |
| } |
| |
| @Override |
| public Integer getMaxLength() { |
| return lhs.getMaxLength(); |
| } |
| |
| @Override |
| public Integer getScale() { |
| return childPart.getColumn().getScale(); |
| } |
| |
| @Override |
| public SortOrder getSortOrder() { |
| //See PHOENIX-4969: Clean up and unify code paths for RVCs with |
| // respect to Optimizations for SortOrder |
| //Handle the different paths for InList vs Normal Comparison |
| //The code paths in InList assume the sortOrder is ASC for |
| // their optimizations |
| //The code paths for Comparisons on RVC rewrite equality, |
| // for the non-equality cases return actual sort order |
| //This work around should work |
| // but a more general approach can be taken. |
| if(rvcElementOp == CompareOp.EQUAL || |
| rvcElementOp == CompareOp.NOT_EQUAL){ |
| return SortOrder.ASC; |
| } |
| return childPart.getColumn().getSortOrder(); |
| } |
| |
| @Override |
| public <T> T accept(ExpressionVisitor<T> visitor) { |
| return null; |
| } |
| } |
| } |
| } |
| } |