blob: 534490fb44175fa72665837a8535f4dc6ad30a8b [file] [log] [blame]
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.tree;
/**
* Contains static methods for estimating record storage size.
*
* Currently this only applies to KVS because we assume that VLSNs are
* preserved.
*/
public class StorageSize {
/*
* Maximum size of the per-LN overhead.
*
* The overhead is variable and depends on several factors, see
* LNLogEntry.getSize(). The following cases are considered:
*
* 25: cleaned and migrated LN (no txn info), no TTL:
* 22: header (type, checksum, flags, prevOffset, size, vlsn)
* 2: data length
* 1: flags
*
* 43: insertion, with TTL:
* 25: same as above
* 2: expiration
* 8: txnId
* 8: lastLoggedLsn
*
* 53: update, with TTL:
* 43: same as above
* 8: abortLsn
* 2: abortExpiration
*
* 50 is used as a conservative estimate for LN_OVERHEAD. Updates will be
* relatively infrequent.
*/
private final static int LN_OVERHEAD = 50;
/*
* Maximum size of the per-slot overhead.
*
* The overhead is variable and depends on several factors, see
* IN.getLogSize. The following cases are considered:
*
* 11: Minimum for all cases
* 8: lsn
* 1: keySize
* 1: state
* 1: expiration
*
* 12: Secondary DB, with TTL
* 11: minimum above
* 1: data size
*
* 13: Separate LN in primary DB, with TTL
* 11: minimum above
* 2: lastLoggedSize
*
* 20: Embedded LN in primary DB, with TTL
* 11: minimum above
* 1: data size
* 8: vlsn
*
* 12 is used for SEC_SLOT_OVERHEAD as a conservative estimate.
*
* 14 is used for PRI_SLOT_OVERHEAD and in the customer formula for both
* the separate LN and embedded LN cases. The slot overhead for the
* embedded case will be larger, but in that case there are significant
* savings because the primary key is not duplicated.
*/
private final static int SEC_SLOT_OVERHEAD = 12;
private final static int PRI_SLOT_OVERHEAD = 14;
private final static int PRI_EMBEDDED_LN_SLOT_OVERHEAD = 20;
/* Static methods only. */
private StorageSize() {}
/**
* Returns the estimated disk storage size for the record in the given BIN
* slot. This method does not fetch the LN.
* <p>
* For KVS, a formula that customers will use to predict the storage for a
* given set of records, not including obsolete size (size available for
* reclamation by the cleaner), is as follows.
* <p>
* The storage overhead for a single Row (JE primary record) is:
* <pre>
* Serialized size of the Row, all fields (JE key + data size)
* +
* Serialized size of the PrimaryKey fields (JE key size)
* +
* Fixed per-Row internal overhead (64: LN_OVERHEAD + PRI_SLOT_OVERHEAD)
* </pre>
*
* The storage overhead for an Index record is:
* <pre>
* Serialized size of the IndexKey fields (JE key size)
* +
* Serialized size of the PrimaryKey fields (JE data size)
* +
* Fixed per-IndexKey internal overhead (12: SEC_SLOT_OVERHEAD)
* </pre>
*
* This method returns the size estimate for an actual record based on the
* use of that formula, getting the key and data size (or lastLoggedSize)
* from the BIN. The amount calculated using the formula above will
* normally be larger than the size returned by this method, for several
* reasons:
* <ul>
* <li>
* This method uses the key size after it is reduced by prefix
* compression.
* </li>
* <li>
* For a separate (non-embedded) LN, this method uses the lastLoggedSize
* rather than adding LN_OVERHEAD to the data size (this is why
* LN_OVERHEAD is not referenced in code here). This is more accurate
* since the actual LN overhead is reduced due to integer packing, etc.
* Also, this method cannot fetch the LN, so the data size is unknown.
* </li>
* <li>
* For an embedded LN in a primary DB, the returned size does not
* include the LN size, since the LN is always obsolete. This means the
* primary key size is not counted redundantly and the LN_OVERHEAD is not
* included in the return value, as they are in the formula. These are
* significant differences, but since embedded LNs require a data size
* LTE 16, this is not expected to be a common use case. If it becomes
* common, we should add a new case for this to the customer formula.
* </li>
* </ul>
*
* In addition, the size returned by this method will normally be larger
* than the actual storage size on disk. This is because this method uses
* PRI_SLOT_OVERHEAD and SEC_SLOT_OVERHEAD to calculate the Btree slot
* space, rather than using the serialized size of the slot. These constant
* values are somewhat larger than the actual overheads, since they do not
* take into account integer packing, etc. See the comments above these
* constants. The serialized slot size was not used here for simplicity and
* speed, plus this additional size compensates for uncounted sizes such as
* per-BIN and UIN overhead.
*
* @return the estimated storage size, or zero when the size is unknown
* because a non-embedded LN is not resident and the LN was logged with a
* JE version prior to 6.0.
*/
public static int getStorageSize(final BIN bin, final int idx) {
final int storedKeySize = bin.getStoredKeySize(idx);
/*
* For a JE secondary DB record (KVS Index record), return:
*
* data-size + key-size + SEC_SLOT_OVERHEAD
*
* where data-size is serialized IndexKey size
* and key-size is serialized PrimaryKey size.
*
* The storedKeySize includes key-size, data-size, and one extra byte
* for data (primary key) size. We subtract it here because it is
* included in SEC_SLOT_OVERHEAD.
*/
if (bin.getDatabase().getSortedDuplicates()) {
return storedKeySize - 1 + SEC_SLOT_OVERHEAD;
}
/*
* For an embedded-LN JE primary DB record (KVS Row):
*
* Return data-size + key-size + PRI_SLOT_OVERHEAD
*
* where (data-size + key-size) is serialized Row size
* and key-size is serialized PrimaryKey size
*
* The storedKeySize includes key-size, data-size, and one extra byte
* for data (primary key) size. We subtract it here because it is
* included in PRI_EMBEDDED_LN_SLOT_OVERHEAD.
*/
if (bin.isEmbeddedLN(idx)) {
return storedKeySize - 1 + PRI_EMBEDDED_LN_SLOT_OVERHEAD;
}
/*
* For a separate (non-embedded) JE primary DB record (KVS Row):
*
* Return LN-log-size + key-size + PRI_SLOT_OVERHEAD
*
* where LN-log-size is LN_OVERHEAD (or less) + data-size + key-size
* and (data-size + key-size) is serialized Row size
* and key-size is serialized PrimaryKey size
*
* The storedKeySize is the key-size alone.
*/
final int lastLoggedSize = bin.getLastLoggedSize(idx);
if (lastLoggedSize == 0) {
/* Size is unknown. */
return 0;
}
return lastLoggedSize + storedKeySize + PRI_SLOT_OVERHEAD;
}
}