blob: 261b4e141caae8914befcdaf79c93b118e0b9349 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.hllmap;
import java.math.BigInteger;
import org.apache.datasketches.hash.MurmurHash3;
/**
* Base class and API for all the maps.
*
* @author Lee Rhodes
* @author Alexander Saydakov
* @author Kevin Lang
*/
abstract class Map {
private static final String LS = System.getProperty("line.separator");
static final long SEED = 1234567890L;
static final int SIX_BIT_MASK = 0X3F; // 6 bits
static final int TEN_BIT_MASK = 0X3FF; //10 bits
// These parameters are tuned as a set to avoid pathological resizing.
// Consider modeling the behavior before changing any of them
static final int COUPON_MAP_MIN_NUM_ENTRIES = 157;
static final double COUPON_MAP_SHRINK_TRIGGER_FACTOR = 0.5;
static final double COUPON_MAP_GROW_TRIGGER_FACTOR = 15.0 / 16.0;
static final double COUPON_MAP_TARGET_FILL_FACTOR = 2.0 / 3.0;
final int keySizeBytes_;
Map(final int keySizeBytes) {
keySizeBytes_ = keySizeBytes;
}
/**
* Update this map with a key and a coupon.
* Return the cardinality estimate of all identifiers that have been associated with this key,
* including this update.
* @param key the dimensional criteria for measuring cardinality
* @param coupon the property associated with the key for which cardinality is to be measured.
* @return the cardinality estimate of all identifiers that have been associated with this key,
* including this update.
*/
abstract double update(byte[] key, short coupon);
/**
* Updates this map with an index and a coupon
* @param index the given index
* @param coupon the given coupon
* @return the cardinality estimate of all identifiers that have been associated with this key,
* including this update.
*/
abstract double update(int index, short coupon);
/**
* Returns the estimate of the cardinality of identifiers associated with the given key.
* @param key the given key
* @return the estimate of the cardinality of identifiers associated with the given key.
*/
abstract double getEstimate(byte[] key);
/**
* Update the internal estimate at the given index
* @param index the given index
* @param estimate the given estimate
*/
void updateEstimate(final int index, final double estimate) {}
/**
* Returns the upper bound cardinality with respect to {@link #getEstimate(byte[])} associated
* with the given key. This approximates the RSE with 68% confidence.
* @param key the given key
* @return the upper bound cardinality with respect to {@link #getEstimate(byte[])} associated
* with the given key.
*/
abstract double getUpperBound(byte[] key);
/**
* Returns the lower bound cardinality with respect to {@link #getEstimate(byte[])} associated
* with the given key. This approximates the RSE with 68% confidence.
* @param key the given key
* @return the lower bound cardinality with respect to {@link #getEstimate(byte[])} associated
* with the given key.
*/
abstract double getLowerBound(byte[] key);
abstract int findKey(byte[] key);
abstract int findOrInsertKey(byte[] key);
abstract CouponsIterator getCouponsIterator(int index);
abstract int getMaxCouponsPerEntry();
abstract int getCapacityCouponsPerEntry();
abstract int getActiveEntries();
abstract int getDeletedEntries();
abstract double getEntrySizeBytes();
abstract int getTableEntries();
abstract int getCapacityEntries();
abstract int getCurrentCountEntries();
abstract long getMemoryUsageBytes();
int getKeySizeBytes() {
return keySizeBytes_;
}
/**
* Delete the key at the given index
* @param index the given index
*/
void deleteKey(final int index) {}
/**
* Returns <code>true</code> if the two specified sub-arrays of bytes are <i>equal</i> to one another.
* Two arrays are considered equal if all corresponding pairs of elements in the two arrays are
* equal. In other words, two arrays are equal if and only if they contain the same elements
* in the same order.
*
* @param a one sub-array to be tested for equality
* @param offsetA the offset in bytes of the start of sub-array <i>a</i>.
* @param b the other sub-array to be tested for equality
* @param offsetB the offset in bytes of the start of sub-array <i>b</i>.
* @param length the length in bytes of the two sub-arrays.
* @return <code>true</code> if the two sub-arrays are equal
*/
static final boolean arraysEqual(final byte[] a, final int offsetA, final byte[] b,
final int offsetB, final int length) {
for (int i = 0; i < length; i++) {
if (a[i + offsetA] != b[i + offsetB]) {
return false;
}
}
return true;
}
/**
* Returns the HLL array index and value as a 16-bit coupon given the identifier to be hashed
* and k.
* @param identifier the given identifier
* @return the HLL array index and value
*/
static final int coupon16(final byte[] identifier) {
final long[] hash = MurmurHash3.hash(identifier, SEED);
final int hllIdx = (int) (((hash[0] >>> 1) % 1024) & TEN_BIT_MASK); //hash[0] for 10-bit address
final int lz = Long.numberOfLeadingZeros(hash[1]);
final int value = (lz > 62 ? 62 : lz) + 1;
return (value << 10) | hllIdx;
}
static final int coupon16Value(final int coupon) {
return (coupon >>> 10) & SIX_BIT_MASK;
}
static final int getIndex(final long hash, final int tableEntries) {
return (int) ((hash >>> 1) % tableEntries);
}
static final int getStride(final long hash, final int tableEntries) {
return (int) ((hash >>> 1) % (tableEntries - 2L) + 1L);
}
static boolean isBitSet(final byte[] byteArr, final int bitIndex) {
final int byteIndex = bitIndex / 8;
final int mask = 1 << (bitIndex % 8);
return (byteArr[byteIndex] & mask) > 0;
}
static boolean isBitClear(final byte[] byteArr, final int bitIndex) {
final int byteIndex = bitIndex / 8;
final int mask = 1 << (bitIndex % 8);
return (byteArr[byteIndex] & mask) == 0;
}
static void clearBit(final byte[] byteArr, final int index) {
final int byteIndex = index / 8;
final int mask = 1 << (index % 8);
byteArr[byteIndex] &= ~mask;
}
static void setBit(final byte[] bits, final int index) {
final int byteIndex = index / 8;
final int mask = 1 << (index % 8);
bits[byteIndex] |= mask;
}
/**
* Returns the next prime number that is greater than the given target. There will be
* no prime numbers less than the returned prime number that are greater than the given target.
* @param target the starting value to begin the search for the next prime
* @return the next prime number that is greater than the given target.
*/
static int nextPrime(final int target) {
return BigInteger.valueOf(target).nextProbablePrime().intValueExact();
}
static String fmtLong(final long value) {
return String.format("%,d", value);
}
static String fmtDouble(final double value) {
return String.format("%,.3f", value);
}
@Override
public String toString() {
final String mcpe = Map.fmtLong(getMaxCouponsPerEntry());
final String ccpe = Map.fmtLong(getCapacityCouponsPerEntry());
final String te = Map.fmtLong(getTableEntries());
final String ce = Map.fmtLong(getCapacityEntries());
final String cce = Map.fmtLong(getCurrentCountEntries());
final String ae = Map.fmtLong(getActiveEntries());
final String de = Map.fmtLong(getDeletedEntries());
final String esb = Map.fmtDouble(getEntrySizeBytes());
final String mub = Map.fmtLong(getMemoryUsageBytes());
final StringBuilder sb = new StringBuilder();
final String thisSimpleName = this.getClass().getSimpleName();
sb.append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS);
sb.append(" Max Coupons Per Entry : ").append(mcpe).append(LS);
sb.append(" Capacity Coupons Per Entry: ").append(ccpe).append(LS);
sb.append(" Table Entries : ").append(te).append(LS);
sb.append(" Capacity Entries : ").append(ce).append(LS);
sb.append(" Current Count Entries : ").append(cce).append(LS);
sb.append(" Active Entries : ").append(ae).append(LS);
sb.append(" Deleted Entries : ").append(de).append(LS);
sb.append(" Entry Size Bytes : ").append(esb).append(LS);
sb.append(" Memory Usage Bytes : ").append(mub).append(LS);
sb.append("### END SKETCH SUMMARY").append(LS);
return sb.toString();
}
}