blob: 38ddd75ae3429a095546f5050623b38002257341 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.internal.util;
import java.util.Arrays;
import java.util.function.IntConsumer;
import java.util.function.IntPredicate;
import java.util.function.IntSupplier;
/**
* A common superclass for hash maps and hash sets
*
*/
public abstract class Common_hash_support {
// set to true to collect statistics for tuning
// you have to also put a call to showHistogram() at the end of the run
protected static final boolean TUNE = false;
protected static final int MIN_SIZE = 10; // 10 / .66 =15.15
protected static final int MIN_CAPACITY = 16;
protected static final int MIN_CAPACITY_SHRINK = 64; // don't shrink below this - thrashing
protected final float loadFactor;
protected final int initialCapacity;
protected int histogram [];
protected int maxProbe = 0;
protected int sizeWhichTriggersExpansion;
private int size = 0; // number of elements in the table
protected int removed = 0; // for rebalancing
/** set to the first found_removed when searching */
protected int found_removed;
protected boolean secondTimeShrinkable = false;
/**
* @param initialSizeBeforeExpanding the number of elements the table should hold before expanding
*/
public Common_hash_support(int initialSizeBeforeExpanding) {
this(initialSizeBeforeExpanding, 0.66F);
}
public Common_hash_support(int initialSizeBeforeExpanding, float factor) {
this.loadFactor = factor;
this.initialCapacity = tableSpace(initialSizeBeforeExpanding, factor);
if (TUNE) {
histogram = new int[200];
Arrays.fill(histogram, 0);
maxProbe = 0;
}
}
public Common_hash_support(Common_hash_support orig) {
this(orig.initialCapacity);
this.sizeWhichTriggersExpansion = orig.sizeWhichTriggersExpansion;
this.size = orig.size;
this.removed = orig.removed;
this.secondTimeShrinkable = orig.secondTimeShrinkable;
// copy doesn't do tuning measurements
this.histogram = null;
}
public void clear() {
// see if size is less than the 1/2 size that triggers expansion
if (size + removed < (sizeWhichTriggersExpansion >>> 1)) {
// if 2nd time then shrink by 50%
// this is done to avoid thrashing around the threshold
if (secondTimeShrinkable) {
secondTimeShrinkable = false;
final int newCapacity = Math.max(initialCapacity, keys_length() >>> 1);
if (newCapacity < keys_length()) {
newTable(newCapacity); // shrink table by 50%
size = 0;
removed = 0;
resetHistogram();
if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
System.out.println("TRAcE_MODE Common_hash clear 2nd time shrinkable, newCapacity=" + newCapacity + ", keys_length: " + keys_length());
}
return;
} else { // don't shrink below minimum
clearExisting();
if (PositiveIntSet.IS_TRACE_MODE_SWITCH) {
System.out.println("TRAcE_MODE Common_hash clear 2nd time shrinkable but nothing done, below minimum: newCapacity=" + newCapacity + ", keys_length: " + keys_length());
}
return;
}
} else {
if (PositiveIntSet.IS_TRACE_MODE_SWITCH) System.out.println("TRACE_MODE Common_hash clear setting 2nd time shrinkable");
secondTimeShrinkable = true;
}
} else {
secondTimeShrinkable = false; // reset this to require 2 triggers in a row
}
clearExisting();
}
private void clearExisting() {
clearKeysAndValues();
size = 0;
removed = 0;
resetHistogram();
}
/** It gets a ref to the current value of table, and then searches that array.
* Side effect: found_removed is set to the position of the first REMOVED_KEY (if any) encountered
* during the search.
*
* @param hash the hash code of the key
* @param is_eq_or_not_present true if the key at the int position is == to the key, or is 0
* @param is_removed_key true if the key at the int position is "removed"
* @return the probeAddr in keys array. The value is the not-present-value if not found
*/
protected int findPosition(
int hash,
IntPredicate is_eq_or_not_present,
IntPredicate is_removed_key) {
found_removed = -1;
// final int hash = key_hash.getAsInt();
// final int[] localKeys = keys;
final int bitMask = keys_length() - 1;
int nbrProbes = 1;
int probeDelta = 0;
int probeAddr = hash & bitMask;
for (;;) {
if (is_eq_or_not_present.test(probeAddr)) {
if (TUNE) {
updateHistogram(nbrProbes);
}
return probeAddr;
}
if (found_removed == -1 && is_removed_key.test(probeAddr)) {
found_removed = probeAddr;
}
if (TUNE) nbrProbes++;
if (probeDelta < 13) {
probeDelta ++; // stop at a prime
// which guarantees all slots eventually traversed.
}
probeAddr = bitMask & (probeAddr + probeDelta);
}
//
// // fast paths
// if (is_eq_or_not_present.test(probeAddr)) {
//// final int testKey = localKeys[probeAddr];
//// if (testKey == 0 || testKey == key) {
// if (TUNE) {
// updateHistogram(1);
// }
// return probeAddr;
// }
// if (is_removed_key.test(probeAddr)) {
//// testKey == REMOVED_KEY) {
// found_removed = probeAddr;
// }
// return findPosition2(is_eq_or_not_present, is_removed_key, probeAddr);
}
// private int findPosition2(IntPredicate is_eq_or_not_present,
// IntPredicate is_removed_key,
// int probeAddr) {
// final int bitMask = keys_length() - 1;
// int nbrProbes = 2;
// int probeDelta = 1;
// probeAddr = bitMask & (probeAddr + (probeDelta++));
//
// while (true) {
// if (is_eq_or_not_present.test(probeAddr)) {
//// final int testKey = localKeys[probeAddr];
//// if (testKey == 0 || testKey == key) {
// break;
// }
//
// if (found_removed == -1 && is_removed_key.test(probeAddr)) {
// found_removed = probeAddr;
// }
// nbrProbes++;
// if (probeDelta < 13) {
// probeDelta ++; // stop at a prime
// // which guarantees all slots eventually traversed.
// }
// probeAddr = bitMask & (probeAddr + (probeDelta++));
// }
//
// if (TUNE) {
// final int pv = histogram[nbrProbes];
//
// histogram[nbrProbes] = 1 + pv;
// if (maxProbe < nbrProbes) {
// maxProbe = nbrProbes;
// }
// }
// return probeAddr;
// }
/**
* As REMOVED tokens populate,
* the number of 0's (representing free cells and
* also the end of bucket chain searches) drops.
*
* If this drops a lot, then searches take much longer.
* If there are no 0's left, searches never terminate!
*
* Keep the number of 0's at about 1 - load factor
*
*/
private void maybeRebalanceRemoves() {
final int old_capacity = keys_length();
if (old_capacity <= MIN_CAPACITY_SHRINK) {
return;
}
int new_capacity = old_capacity >> 1;
// //debug
// int sz = size;
/******************************************************
* Logic for clearing out the removed markers in bulk
*
* 3 cases: resize up (never happens), down, or the same
*
* Don't clear out if there's room left unless
* there's too much room left and should downsize
*
* Don't over do - avoid thrashing, avoid work for small sizes
*
* Case for upsizing: none. It's guaranteed there are always enough
* 0's for the load factor, by put/add implementation.
*
* Case for downsizing: if the size < 1/3 of the new capacity or,
* number being removed would be > 1/3 of the new capacity
******************************************************/
if (removed + size >= sizeWhichTriggersExpansion) {
Misc.internalError(); // put or add always keeps this false. remove always reduces or keeps this value the same
}
// REMOVED tags if there are enough of them to make it worthwhile
// or if just should shrink because removed + size is smaller than 1/3 the new capacity
int one_third_new_capacity = sizeWhichTriggersExpansion >> 2;
int one_half_new_capacity = new_capacity >> 1;
if (removed > one_half_new_capacity || (size < one_third_new_capacity)) {
if (size >= one_third_new_capacity) {
new_capacity = old_capacity;
}
if (TUNE) {
System.out.println("Capacity (maybe) decreasing from " + old_capacity + " to " + new_capacity + " removed= " + removed + " size= " + size);
}
copyOld2New(new_capacity, old_capacity);
}
// //debug
// if (sz != size)
// System.out.println("debug");
}
/**
* This method calls the subclass's copy_to_new_table method,
* passing an inner lambda containing common code for copying old to new.
*
* That inner lambda, when invoked by the copy_to_new_table method,
* is passed another lambda of one argument (the old index)
* which is called to copy each element.
* @param new_capacity
* @param old_capacity
*/
private void copyOld2New(int new_capacity, int old_capacity) {
copy_to_new_table(
new_capacity,
old_capacity,
// this code assigned to "commonCopy" arg
(IntConsumer copyToNew, IntPredicate is_valid_old_key) -> {
newTable(new_capacity);
removed = 0; // reset before put, otherwise, causes premature expansion
for (int i = 0; i < old_capacity; i++) {
if (is_valid_old_key.test(i)) {
copyToNew.accept(i);
}
}
});
}
/**
* advance pos until it points to a non 0 or is 1 past end
* If pos is negative, start at 0.
* Don't move if pos already has valid key
* @param pos -
* @return updated pos
*/
protected int moveToNextFilled(int pos) {
final int max = keys_length();
if (pos < 0) {
pos = 0;
}
while (true) {
if (pos >= max) {
return pos;
}
if (is_valid_key(pos)) {
return pos;
}
pos ++;
}
}
/**
* decrement pos until it points to a non 0 or is -1
* If pos is beyond end start at end.
* Don't move if pos already has valid key
* @param pos -
* @return updated pos
*/
protected int moveToPreviousFilled(int pos) {
final int max = keys_length();
if (pos > max) {
pos = max - 1;
}
while (true) {
if (pos < 0) {
return pos;
}
if (is_valid_key(pos)) {
return pos;
}
pos --;
}
}
// called by clear, increase table size, decrease table size
protected void newTable(int capacity) {
capacity = Math.max(MIN_SIZE, Misc.nextHigherPowerOf2(capacity));
newKeysAndValues(capacity);
sizeWhichTriggersExpansion = (int)(capacity * loadFactor);
}
protected void incrementSize() {
size++;
if (size + removed >= sizeWhichTriggersExpansion) {
maybeIncreaseTableCapacity();
}
}
private void maybeIncreaseTableCapacity() {
int old_capacity = keys_length();
int new_capacity = (removed >= size)
? old_capacity
: 2 * old_capacity;
if (TUNE) {
System.out.println("Capacity (maybe) increasing from " + old_capacity + " to " + new_capacity + ", removes= " + removed + " size=" + size);
}
copyOld2New(new_capacity, old_capacity);
}
protected void commonPutOrAddNotFound() {
if (found_removed != -1) {
removed --; // used up a removed slot
}
incrementSize();
// debugValidate();
}
/**
* only called if actually found and removed an entry
*/
protected void commonRemove() {
removed ++;
size --;
maybeRebalanceRemoves();
// debugValidate();
}
public int size() {
return size;
}
@FunctionalInterface
public interface CommonCopyOld2New {
void apply(IntConsumer copyToNew, IntPredicate is_valid_old_key);
}
protected abstract boolean is_valid_key(int pos);
protected abstract int keys_length();
protected abstract void newKeysAndValues(int capacity);
protected abstract void clearKeysAndValues();
protected abstract void copy_to_new_table(int new_capacity, int old_capacity, CommonCopyOld2New r);
protected void resetHistogram() {
if (TUNE) {
histogram = new int[200];
Arrays.fill(histogram, 0);
maxProbe = 0;
}
}
private void updateHistogram(int nbrProbes) {
histogram[nbrProbes] = 1 + histogram[nbrProbes];
if (maxProbe < nbrProbes) {
maxProbe = nbrProbes;
}
}
public void showHistogram() {
if (TUNE) {
int sumI = 0;
for (int i : histogram) {
sumI += i;
}
System.out.format(
"Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n",
loadFactor, maxProbe, sumI);
for (int i = 0; i <= maxProbe; i++) {
if (i == maxProbe && histogram[i] == 0) {
System.out.println("huh?");
}
System.out.println(i + ": " + histogram[i]);
}
System.out.println("bytes / entry = " + (float) (keys_length()) * 8 / size());
System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n",
size(),
(int) ((keys_length() >>> 1) * loadFactor),
(int) (keys_length() * loadFactor));
}
}
// test case use
int getCapacity() {
return keys_length();
}
protected abstract class CommonKeyIterator implements IntListIterator {
protected int curPosition;
protected final int firstPosition;
protected CommonKeyIterator() {
this.curPosition = moveToNextFilled(0);
firstPosition = curPosition;
}
@Override
public boolean hasNext() {
return curPosition < keys_length() && curPosition >= 0;
}
@Override
public boolean hasPrevious() {
if (curPosition > keys_length() || curPosition <= 0) {
return false;
}
int test = moveToPreviousFilled(curPosition - 1);
return test >= 0;
}
@Override
/**
* @see org.apache.uima.internal.util.IntListIterator#moveToStart()
*/
public void moveToStart() {
curPosition = moveToNextFilled(0);
}
@Override
/**
* @see org.apache.uima.internal.util.IntListIterator#moveToEnd()
*/
public void moveToEnd() {
curPosition = moveToPreviousFilled(keys_length() - 1);
}
}
/**
*
* @param numberOfElements -
* @param factor -
* @return capacity of the main table (either 2 byte or 4 byte entries)
*/
public static int tableSpace(int numberOfElements, Float factor) {
if (numberOfElements < 0) {
throw new IllegalArgumentException("must be > 0");
}
final int capacity = Math.round(numberOfElements / factor);
return Math.max(16, Misc.nextHigherPowerOf2(capacity));
}
protected void debugValidate() {
// count non-0, non-removed, compare to size
int sum = 0;
for (int i = 0; i < keys_length(); i ++) {
if (is_valid_key(i)) {
sum ++;
if (sum > size) {
System.out.println("debug");
}
}
}
}
}