/* | |
* Licensed to the Apache Software Foundation (ASF) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. The ASF licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, | |
* software distributed under the License is distributed on an | |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
* KIND, either express or implied. See the License for the | |
* specific language governing permissions and limitations | |
* under the License. | |
*/ | |
package org.apache.uima.internal.util; | |
import java.lang.reflect.Array; | |
import java.util.Arrays; | |
import java.util.Collection; | |
import java.util.Iterator; | |
import java.util.NoSuchElementException; | |
import java.util.Set; | |
import org.apache.uima.cas.FeatureStructure; | |
/** | |
* A set of Objects of type T | |
* This impl is for use in a single thread case only, when table is being updated. | |
* Multiple reader threads are OK if there's no writing. | |
* | |
* Supports shrinking (reallocating the big table) | |
* | |
* Removed objects replaced with special marker object in the table | |
* so find operations continue to work (they can't stop upon finding this object). | |
* | |
*/ | |
public class ObjHashSet<T> implements Set<T> { | |
public static final float DEFAULT_LOAD_FACTOR = 0.66F; | |
// set to true to collect statistics for tuning | |
// you have to also put a call to showHistogram() at the end of the run | |
private static final boolean TUNE = false; | |
private final T removedMarker; | |
private final Class<T> clazz; // for reifying the T type | |
private final float loadFactor = DEFAULT_LOAD_FACTOR; | |
private final int initialCapacity; | |
private int histogram []; | |
private int maxProbe = 0; | |
private int sizeWhichTriggersExpansion; | |
private int size; // number of elements in the table | |
private int nbrRemoved; // number of removed elements (coded as removed) | |
// the actual Object table, operated as a hashtable | |
private T [] keys; | |
private T [] emptyKeyArray = null; | |
private boolean secondTimeShrinkable = false; | |
private int modificationCount = 0; | |
public ObjHashSet(Class<T> clazz, T removedMarker) { | |
this(12, clazz, removedMarker); // default initial size | |
} | |
/** | |
* @param initialCapacity - you can add this many before expansion | |
* @param clazz - a superclass of the stored items | |
* @param removedMarker - a unique value never stored in the table, used to mark removed items | |
*/ | |
public ObjHashSet(int initialCapacity, Class<T> clazz, T removedMarker) { | |
this.clazz = clazz; | |
this.initialCapacity = initialCapacity; | |
newTableKeepSize(initialCapacity); | |
size = 0; | |
if (TUNE) { | |
histogram = new int[200]; | |
Arrays.fill(histogram, 0); | |
maxProbe = 0; | |
} | |
this.removedMarker = removedMarker; | |
} | |
/** | |
* Copy constructor | |
* @param ohs - | |
*/ | |
public ObjHashSet(ObjHashSet<T> ohs) { | |
this.removedMarker = ohs.removedMarker; | |
this.clazz = ohs.clazz; | |
this.initialCapacity = ohs.initialCapacity; | |
this.histogram = ohs.histogram; | |
this.maxProbe = ohs.maxProbe; | |
this.sizeWhichTriggersExpansion = ohs.sizeWhichTriggersExpansion; | |
this.size = ohs.size; | |
this.nbrRemoved = ohs.nbrRemoved; | |
this.keys = ohs.keys.clone(); | |
this.secondTimeShrinkable = ohs.secondTimeShrinkable; | |
this.modificationCount = ohs.modificationCount; | |
} | |
public ObjHashSet(ObjHashSet<T> ohs, boolean readOnly) { | |
if (!readOnly) Misc.internalError(); | |
this.removedMarker = ohs.removedMarker; | |
this.clazz = ohs.clazz; | |
this.initialCapacity = ohs.initialCapacity; | |
this.histogram = ohs.histogram; | |
this.maxProbe = ohs.maxProbe; | |
this.sizeWhichTriggersExpansion = ohs.sizeWhichTriggersExpansion; | |
this.size = ohs.size; | |
this.nbrRemoved = ohs.nbrRemoved; | |
this.keys = (size == 0) ? emptyKeyArray() : ohs.keys.clone(); | |
this.secondTimeShrinkable = ohs.secondTimeShrinkable; | |
this.modificationCount = ohs.modificationCount; | |
} | |
private T[] emptyKeyArray() { | |
if (emptyKeyArray == null) { | |
emptyKeyArray = (T[]) Array.newInstance(clazz, 1); | |
} | |
return emptyKeyArray; | |
} | |
private void newTableKeepSize(int capacity) { | |
capacity = Misc.nextHigherPowerOf2(capacity); | |
keys = (T[]) Array.newInstance(clazz, capacity); | |
sizeWhichTriggersExpansion = (int)(capacity * loadFactor); | |
nbrRemoved = 0; | |
} | |
private void incrementSize() { | |
if (size + nbrRemoved >= sizeWhichTriggersExpansion) { | |
increaseTableCapacity(); | |
} | |
size++; | |
} | |
// public boolean wontExpand() { | |
// return wontExpand(1); | |
// } | |
// | |
// public boolean wontExpand(int n) { | |
// return (size + nbrRemoved + n) < sizeWhichTriggersExpansion; | |
// } | |
/** | |
* @return the current capacity, >= size | |
*/ | |
public int getCapacity() { | |
return keys.length; | |
} | |
/** | |
* This may not increase the table capacity, but may just | |
* clean out the REMOVED items | |
*/ | |
private void increaseTableCapacity() { | |
final int oldCapacity = getCapacity(); | |
// keep same capacity if just removing the "removed" markers would | |
// shrink the used slots to the same they would have been had there been no removed, and | |
// the capacity was doubled. | |
final int newCapacity = (nbrRemoved > size) ? oldCapacity : oldCapacity << 1; | |
final T [] oldKeys = keys; | |
if (TUNE) {System.out.println("Capacity increasing from " + oldCapacity + " to " + newCapacity);} | |
newTableKeepSize(newCapacity); | |
for (T key : oldKeys) { | |
if (key != null && key != removedMarker) { | |
addInner(key); | |
} | |
} | |
} | |
// called by clear | |
private void newTable(int capacity) { | |
newTableKeepSize(capacity); | |
resetTable(); | |
} | |
private void resetHistogram() { | |
if (TUNE) { | |
histogram = new int[200]; | |
Arrays.fill(histogram, 0); | |
maxProbe = 0; | |
} | |
} | |
private void resetArray() { | |
Arrays.fill(keys, null); | |
resetTable(); | |
} | |
private void resetTable() { | |
resetHistogram(); | |
size = 0; | |
modificationCount ++; | |
} | |
@Override | |
public void clear() { | |
// see if size is less than the 1/4 size that triggers expansion | |
if (size < (sizeWhichTriggersExpansion >>> 2)) { | |
// if 2nd time then shrink by 50% | |
// this is done to avoid thrashing around the threshold | |
if (secondTimeShrinkable) { | |
secondTimeShrinkable = false; | |
final int currentCapacity = getCapacity(); | |
final int newCapacity = Math.max(initialCapacity, currentCapacity >>> 1); | |
if (newCapacity < currentCapacity) { | |
newTable(newCapacity); // shrink table by 50% | |
} else { // don't shrink below minimum | |
resetArray(); | |
} | |
return; | |
} else { | |
secondTimeShrinkable = true; | |
} | |
} else { | |
secondTimeShrinkable = false; // reset this to require 2 triggers in a row | |
} | |
resetArray(); | |
} | |
/** | |
* returns a position in the key/value table | |
* if the key is not found, then the position will be to the | |
* place where the key would be put upon adding (without reusing REMOVED, and the | |
* current internal value of keys[position] would be 0. | |
* | |
* if the key is found, then keys[position] == key | |
* @param obj the object to find | |
* @return the probeAddr in keys array - might reference a slot holding null, or the key value if found | |
*/ | |
private int findPosition(final T obj) { | |
return findPosition(obj, false); | |
} | |
private int findPosition(final T obj, final boolean includeRemoved) { | |
if (obj == null) { | |
throw new IllegalArgumentException("null is an invalid key"); | |
} | |
final int hash = Misc.hashInt(obj.hashCode()); | |
int nbrProbes = 1; | |
int probeDelta = 1; | |
int probeAddr; | |
final T[] localKeys = keys; | |
final int bitMask = localKeys.length - 1; | |
probeAddr = hash & bitMask; | |
while (true) { | |
final T testKey = localKeys[probeAddr]; | |
if (testKey == null || testKey.equals(obj) || (includeRemoved && testKey == removedMarker)) { | |
break; | |
} | |
nbrProbes++; | |
probeAddr = bitMask & (probeAddr + (probeDelta++)); | |
} | |
if (TUNE) { | |
final int pv = histogram[nbrProbes]; | |
histogram[nbrProbes] = 1 + pv; | |
if (maxProbe < nbrProbes) { | |
maxProbe = nbrProbes; | |
} | |
} | |
return probeAddr; | |
} | |
@Override | |
public boolean contains(Object obj) { // arg must be Object to fit Collection API | |
return (clazz.isAssignableFrom(obj.getClass())) ? (find((T) obj) != -1) : false; | |
} | |
/** | |
* @param obj the object to find in the table (if it is there) | |
* @return the position of obj in the table, or -1 if not in the table | |
*/ | |
public int find(T obj) { | |
if (obj == null || size == 0) { | |
return -1; | |
} | |
final int pos = findPosition(obj); | |
return obj.equals(keys[pos]) ? pos : -1; // keys[pos] can be null | |
} | |
/** | |
* | |
* @param obj - the object to add | |
* @return true if this set did not already contain the specified element | |
*/ | |
@Override | |
public boolean add(T obj) { | |
if (obj == null) { | |
throw new IllegalArgumentException("argument must be non-null"); | |
} | |
final int i = findPosition(obj, true); // include REMOVED | |
if (obj.equals(keys[i])) { // keys[i] may be null | |
return false; // false if already present | |
} | |
if (keys[i] == removedMarker) { | |
nbrRemoved --; | |
assert (nbrRemoved >= 0); | |
} | |
keys[i] = obj; | |
incrementSize(); | |
modificationCount ++; | |
return true; | |
} | |
/** | |
* used for increasing table size | |
* @param rawKey | |
*/ | |
private void addInner(T obj) { | |
final int i = findPosition(obj); | |
assert(keys[i] == null); | |
keys[i] = obj; | |
} | |
/** | |
* Can't replace the item with a null because other keys that were | |
* stored in the table which previously collided with the removed item | |
* won't be found. UIMA-4204 | |
* @param rawKey the value to remove | |
* @return true if the key was present | |
*/ | |
@Override | |
public boolean remove(Object rawKey) { | |
if (rawKey == null) { | |
return false; | |
} | |
final int pos = findPosition((T) rawKey); // null or equal obj | |
return (rawKey.equals(keys[pos])) ? removeAtPosition(pos) : false; | |
} | |
private boolean removeAtPosition(int pos) { | |
// found, remove it | |
keys[pos] = removedMarker; // at runtime, this cast is a no-op | |
size --; | |
modificationCount ++; | |
nbrRemoved ++; | |
return true; | |
} | |
/** | |
* @see Set#size() | |
*/ | |
@Override | |
public int size() { | |
return size; | |
} | |
public void showHistogram() { | |
if (TUNE) { | |
int sumI = 0; | |
for (int i : histogram) { | |
sumI += i; | |
} | |
System.out.format( | |
"Histogram of number of probes, loadfactor = %.1f, maxProbe=%,d nbr of find operations at last table size=%,d%n", | |
loadFactor, maxProbe, sumI); | |
for (int i = 0; i <= maxProbe; i++) { | |
if (i == maxProbe && histogram[i] == 0) { | |
System.out.println("huh?"); | |
} | |
System.out.println(i + ": " + histogram[i]); | |
} | |
System.out.println("bytes / entry = " + (float) (keys.length) * 4 / size()); | |
System.out.format("size = %,d, prevExpansionTriggerSize = %,d, next = %,d%n", | |
size(), | |
(int) ((keys.length >>> 1) * loadFactor), | |
(int) (keys.length * loadFactor)); | |
} | |
} | |
/** | |
*/ | |
/** | |
* For iterator use | |
* @param index - a magic number returned by the internal find | |
* @return the T at that spot, or null if nothing there | |
*/ | |
public T get(int index) { | |
T obj = keys[index]; | |
if (obj == null || obj == removedMarker) { | |
return null; // null, not present | |
} | |
return obj; | |
} | |
/** | |
* advance pos until it points to a non 0 or is 1 past end | |
* @param pos - | |
* @return updated pos | |
*/ | |
public int moveToNextFilled(int pos) { | |
// if (pos < 0) { | |
// pos = 0; | |
// } | |
final int max = getCapacity(); | |
while (true) { | |
if (pos >= max) { | |
return pos; | |
} | |
T v = keys[pos]; | |
if (v != null && v != removedMarker) { | |
return pos; | |
} | |
pos ++; | |
} | |
} | |
/** | |
* decrement pos until it points to a non 0 or is -1 | |
* @param pos - | |
* @return updated pos | |
*/ | |
public int moveToPreviousFilled(int pos) { | |
final int max = getCapacity(); | |
if (pos > max) { | |
pos = max - 1; | |
} | |
while (true) { | |
if (pos < 0) { | |
return pos; | |
} | |
T v = keys[pos]; | |
if (v != null && v != removedMarker) { | |
return pos; | |
} | |
pos --; | |
} | |
} | |
private class ObjHashSetIterator implements Iterator<T> { | |
/** | |
* Keep this always pointing to a non-0 entry, or | |
* if not valid, outside the range | |
*/ | |
protected int curPosition; | |
private ObjHashSetIterator() { | |
this.curPosition = moveToNextFilled(0); | |
} | |
@Override | |
public final boolean hasNext() { | |
return curPosition < getCapacity(); | |
} | |
@Override | |
public final T next() { | |
if (curPosition >= getCapacity()) { | |
throw new NoSuchElementException(); | |
} | |
try { | |
T r = get(curPosition); | |
curPosition = moveToNextFilled(curPosition + 1); | |
return r; | |
} catch (ArrayIndexOutOfBoundsException e) { | |
throw new NoSuchElementException(); | |
} | |
} | |
@Override | |
public void remove() { | |
int pos = moveToPrevious(curPosition - 1); | |
if (pos >= 0) { | |
removeAtPosition(pos); | |
} | |
} | |
} | |
@Override | |
public Iterator<T> iterator() { | |
return new ObjHashSetIterator(); | |
} | |
private int moveToFirst() { | |
return (size() == 0) ? -1 : moveToNextFilled(0); | |
} | |
// private int moveToLast() { | |
// return (size() == 0) ? -1 : moveToPreviousFilled(getCapacity() -1); | |
// } | |
// private int moveToNext(int position) { | |
// if (position < 0) { | |
// return position; | |
// } | |
// final int n = moveToNextFilled(position + 1); | |
// return (n >= getCapacity()) ? -1 : n; | |
// } | |
private int moveToPrevious(int position) { | |
if (position >= getCapacity()) { | |
return -1; | |
} | |
return moveToPreviousFilled(position - 1); | |
} | |
// public boolean isValid(int position) { | |
// return (position >= 0) && (position < getCapacity()); | |
// } | |
/** | |
* if the fs is in the set, the iterator should return it. | |
* if not, return -1 (makes iterator invalid) | |
* @param fs position to this fs | |
* @return the index if present, otherwise -1; | |
*/ | |
public int moveTo(FeatureStructure fs) { | |
if (clazz.isAssignableFrom(fs.getClass())) { | |
int pos = find((T)fs); | |
if (pos >= 0) { | |
return pos; | |
} | |
} | |
return -1; | |
} | |
@Override | |
public <T2> T2[] toArray(T2[] a) { | |
final int s = size(); | |
if (s == 0) { | |
if (a.length >= 1) { | |
a[0] = null; // part of the contract of toArray, where the array a size is > | |
} | |
return a; | |
} | |
final T2[] r = (a.length >= s)? a : (T2[]) Array.newInstance(a.getClass(), s); | |
int pos = moveToFirst(); | |
for (int i = 0; i < size; i ++) { | |
r[i] = (T2) get(pos); | |
pos = moveToNextFilled(pos + 1); | |
} | |
if (a.length > s) { | |
r[s] = null; // part of the contract of toArray, where the array a size is > | |
} | |
return r; | |
} | |
@Override | |
public T[] toArray() { | |
return toArray((T[]) Array.newInstance(clazz, size())); | |
} | |
/* (non-Javadoc) | |
* @see java.lang.Object#toString() | |
*/ | |
@Override | |
public String toString() { | |
return String | |
.format( | |
"%s [loadFactor=%s, initialCapacity=%s, sizeWhichTriggersExpansion=%s, size=%s, secondTimeShrinkable=%s%n keys=%s]", | |
this.getClass().getName(), loadFactor, initialCapacity, sizeWhichTriggersExpansion, size, secondTimeShrinkable, | |
Arrays.toString(keys)); | |
} | |
// Collection methods | |
@Override | |
public boolean isEmpty() { | |
return size == 0; | |
} | |
@Override | |
public boolean containsAll(Collection<?> c) { | |
c.stream().allMatch(item -> contains(item)); | |
return false; | |
} | |
@Override | |
public boolean addAll(Collection<? extends T> c) { | |
boolean[] anyChanged = {false}; | |
c.stream().forEach(item -> anyChanged[0] |= add(item)); | |
return anyChanged[0]; | |
} | |
@Override | |
public boolean removeAll(Collection<?> c) { | |
boolean[] anyChanged = {false}; | |
c.stream().forEach(item -> anyChanged[0] |= remove(item)); | |
return anyChanged[0]; | |
} | |
@Override | |
public boolean retainAll(Collection<?> c) { | |
boolean anyChanged = false; | |
Iterator<T> it = iterator(); | |
while (it.hasNext()) { | |
T v = it.next(); | |
if (!c.contains(v)) { | |
anyChanged = true; | |
it.remove(); | |
} | |
} | |
return anyChanged; | |
} | |
/** | |
* @return the modificiation count | |
*/ | |
public int getModificationCount() { | |
return modificationCount; | |
} | |
} |