blob: 2837f7647c04e6361b0a70aa38d880bc117ba311 [file] [log] [blame]
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.tree;
import java.util.ArrayList;
import java.util.List;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.OperationResult;
import com.sleepycat.je.dbi.CursorImpl;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.txn.LockType;
/**
* Estimates the number of non-deleted BIN entries between two end points,
* using information returned in TrackingInfo from Tree.getParentINForChildIN.
* Used for estimating dup counts, e.g., for join query optimization. Accuracy
* is limited by the number of samples taken to compute the average number of
* entries at each level. Currently only two samples (at the end points) are
* taken, and because the tree is not balanced making the number of entries
* highly variable, the count can easily be off by a factor of two.
*/
public class CountEstimator {
/* If exceeded, there must be a bug of some kind. */
private static final int MAX_RETRIES_AFTER_SPLIT = 100;
/**
* Returns an estimate of the number of records between two end points
* specified by begin/end cursor positions.
*/
public static long count(DatabaseImpl dbImpl,
CursorImpl beginCursor,
boolean beginInclusive,
CursorImpl endCursor,
boolean endInclusive) {
/* If the two cursors are at the same position, return 1. */
if (beginCursor.isOnSamePosition(endCursor)) {
return 1;
}
/* Compute estimate for cursors at different positions. */
final CountEstimator estimator = new CountEstimator(dbImpl);
return estimator.count(beginCursor, endCursor) +
(beginInclusive ? 1 : 0) +
(endInclusive ? 1 : 0);
}
private final DatabaseImpl dbImpl;
private List<TrackingInfo> beginStack;
private List<TrackingInfo> endStack;
private final List<List<TrackingInfo>> avgEntriesStacks =
new ArrayList<List<TrackingInfo>>();
private int levelCount;
private int rootLevel;
private float[] avgEntries;
private CountEstimator(DatabaseImpl dbImpl) {
this.dbImpl = dbImpl;
}
private long count(CursorImpl beginCursor, CursorImpl endCursor) {
for (int numRetries = 0;; numRetries += 1) {
/*
* If we have retried too many times, give up. This is probably
* due to a bug of some kind, and we shouldn't loop forever.
*/
if (numRetries > MAX_RETRIES_AFTER_SPLIT) {
throw EnvironmentFailureException.unexpectedState();
}
/*
* Set up the initial info for the computation. Retry if a split
* occurs.
*/
beginStack = beginCursor.getAncestorPath();
if (beginStack == null) {
continue;
}
endStack = endCursor.getAncestorPath();
if (endStack == null) {
continue;
}
if (!findCommonAncestor()) {
continue;
}
/* Get the the average entries from the two end points. */
getAvgEntries(beginCursor, endCursor);
/*
* Return the count. FUTURE: Taking more samples between the two
* end points would increase accuracy.
*/
return countTotal();
}
}
/**
* Find the common ancestor node for the two end points, which we'll call
* the root level. If no common ancestor can be found, return false to
* restart the process, because a split must have occurred in between
* getting the two stacks for the end points.
*/
private boolean findCommonAncestor() {
levelCount = beginStack.size();
if (levelCount != endStack.size()) {
/* Must have been a root split. */
return false;
}
rootLevel = -1;
for (int level = levelCount - 1; level >= 0; level -= 1) {
if (beginStack.get(level).nodeId == endStack.get(level).nodeId) {
rootLevel = level;
break;
}
}
if (rootLevel < 0) {
/* Must have been a split. */
return false;
}
return true;
}
/**
* This method starts with a preliminary average using just the two end
* points.
*/
private void getAvgEntries(CursorImpl beginCursor, CursorImpl endCursor) {
avgEntriesStacks.clear();
if (!addAvgEntriesSample(beginStack)) {
sampleNextBIN(beginCursor, true /*moveForward*/);
}
if (!addAvgEntriesSample(endStack)) {
sampleNextBIN(endCursor, false /*moveForward*/);
}
computeAvgEntries();
}
/**
* FUTURE: use internal skip method instead, saving a btree lookup.
*/
private void sampleNextBIN(
CursorImpl beginOrEndCursor,
boolean moveForward) {
final CursorImpl cursor =
beginOrEndCursor.cloneCursor(true /*samePosition*/);
try {
cursor.latchBIN();
if (moveForward) {
cursor.setOnLastSlot();
} else {
cursor.setOnFirstSlot();
}
final OperationResult result = cursor.getNext(
null /*foundKey*/, null /*foundData*/,
LockType.NONE, false /*dirtyReadAll*/,
moveForward, true /*alreadyLatched*/,
null /*rangeConstraint*/);
if (result != null) {
final List<TrackingInfo> stack = cursor.getAncestorPath();
if (stack != null) {
addAvgEntriesSample(stack);
}
}
} finally {
cursor.close();
}
}
/**
* At each level we compute the average number of entries. This will be
* used as a multipler to estimate the number of nodes for any IN at that
* level.
*/
private void computeAvgEntries() {
avgEntries = new float[levelCount];
avgEntries[levelCount - 1] = 1.0F;
if (avgEntriesStacks.size() == 0) {
return;
}
for (int level = levelCount - 1; level > 0; level -= 1) {
long totalEntries = 0;
for (List<TrackingInfo> stack : avgEntriesStacks) {
totalEntries += stack.get(level).entries;
}
final float avg = totalEntries / ((float) avgEntriesStacks.size());
avgEntries[level - 1] = avg * avgEntries[level];
}
}
private boolean addAvgEntriesSample(List<TrackingInfo> stack) {
if (isFirstBIN(stack) || isLastBIN(stack)) {
return false;
}
avgEntriesStacks.add(stack);
return true;
}
private boolean isFirstBIN(List<TrackingInfo> stack) {
for (int i = 0; i < stack.size() - 1; i += 1) {
final TrackingInfo info = stack.get(i);
if (info.index != 0) {
return false;
}
}
return true;
}
private boolean isLastBIN(List<TrackingInfo> stack) {
for (int i = 0; i < stack.size() - 1; i += 1) {
final TrackingInfo info = stack.get(i);
if (info.index != info.entries - 1) {
return false;
}
}
return true;
}
/**
* Count the total for each node that is between the two end points.
*/
private long countTotal() {
long total = 0;
/* Add nodes between the end points at the root level. */
final int rootIndex1 = beginStack.get(rootLevel).index + 1;
final int rootIndex2 = endStack.get(rootLevel).index;
if (rootIndex2 > rootIndex1) {
total += Math.round((rootIndex2 - rootIndex1) *
avgEntries[rootLevel]);
}
/* Add nodes under the end points at lower levels. */
for (int level = rootLevel + 1; level < levelCount; level += 1) {
/* Add nodes under left end point that are to its right. */
final int leftIndex = beginStack.get(level).index;
final int lastIndex = beginStack.get(level).entries - 1;
if (lastIndex > leftIndex) {
total += Math.round((lastIndex - leftIndex) *
avgEntries[level]);
}
/* Add nodes under right end point that are to its left. */
final int rightIndex = endStack.get(level).index;
final int firstIndex = 0;
if (rightIndex > firstIndex) {
total += Math.round((rightIndex - firstIndex) *
avgEntries[level]);
}
}
return total;
}
/* For future use, if getKeyRatios is exposed in the API. */
static class KeyRatios {
final double less;
final double equal;
final double greater;
KeyRatios(double less, double equal, double greater) {
this.less = less;
this.equal = equal;
this.greater = greater;
}
@Override
public String toString() {
return "less: " + less +
" equal: " + equal +
" greater: " + greater;
}
}
/*
* For future use, if getKeyRatios is exposed in the API. Be sure to test
* boundary conditions when index is 0 or nEntries.
*
* Algorithm copied from __bam_key_range in BDB btree/bt_stat.c.
*/
static KeyRatios getKeyRatios(List<TrackingInfo> infoByLevel,
boolean exact) {
double factor = 1.0;
double less = 0.0;
double greater = 0.0;
/*
* At each level we know that INs greater than index contain keys
* greater than what we are looking for and those less than index are
* less than. The one pointed to by index may have some less, some
* greater or even equal. If index is equal to the number of entries,
* then the key is out of range and everything is less.
*/
for (final TrackingInfo info : infoByLevel) {
if (info.index == 0) {
greater += (factor * (info.entries - 1)) / info.entries;
} else if (info.index == info.entries) {
less += factor;
} else {
less += (factor * info.index) / info.entries;
greater += (factor * ((info.entries - info.index) - 1)) /
info.entries;
}
/* Factor at next level down is 1/n'th the amount at this level. */
factor /= info.entries;
/*
System.out.println("factor: " + factor +
" less: " + less +
" greater: " + greater);
*/
}
/*
* If there was an exact match then assign the 1/n'th factor to the key
* itself. Otherwise that factor belongs to those greater than the
* key, unless the key was out of range.
*/
final double equal;
if (exact) {
equal = factor;
} else {
if (less != 1.0) {
greater += factor;
}
equal = 0.0;
}
return new KeyRatios(less, equal, greater);
}
}