blob: 18aef908d15da3edf2f8d576fdb2263898d87995 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.jcas.impl;
import java.util.Arrays;
import java.util.Random;
import org.apache.uima.cas.impl.FeatureStructureImpl;
/**
* Special space-saving table that maps between CAS addr and
* JCas cover objects.
*
* Assumptions: Each addr has a corresponding JCas; it is not
* permitted to "update" an addr with a different JCas
* cover class (unless the table is cleared first).
*
* Table always a power of 2 in size - permits faster hashing
*
*/
public class JCasHashMap {
// set to true to collect statistics for tuning
// you have to also put a call to jcas.showJfsFromCaddrHistogram() at the end of the run
private static final boolean TUNE = false;
// This inner class is only here to get access to the "next(bits)" method of Random
private static class MyRandom extends Random {
protected int next(int bits) {
return super.next(bits);
}
private static final long serialVersionUID = 1L;
}
//These are for tuning
private int histogram [];
private int nbrProbes;
private int maxProbe = 0;
private int sizeWhichTriggersExpansion;
private float loadFactor = (float)0.50;
private int size; // number of elements in the table
private FeatureStructureImpl [] table;
private MyRandom random = new MyRandom();
// These are for hashing the CAS address
private int bits; // number of random bits to generate for a probe
private int bitsMask; // 1's to "and" with result to keep in range
private int casAddr;
JCasHashMap(int initialSize) {
// round initialSize to a power of 2
int n = initialSize;
int i = 0;
while (n != 0) {
i++;
n = n >> 1;
}
// n = 1, i = 1
// n = 2,3 i = 2
// n = 4,5,6,7 i = 3
if (1<< (i - 1) == initialSize) {
i = i - 1; // if initial size was a power of 2, correct for that
}
// n = 2 i = 1
// n = 3,4 i = 2
// n = 5,6,7,8 i = 3
bits = i;
bitsMask = (1<<i) - 1;
initialSize = 1<<i;
table = new FeatureStructureImpl[initialSize];
sizeWhichTriggersExpansion = (int)(initialSize * loadFactor);
size = 0;
if (TUNE) {
histogram = new int[30];
Arrays.fill(histogram, 0);
}
}
public void clear() {
Arrays.fill(table, null);
size = 0;
}
public FeatureStructureImpl get(int key) {
FeatureStructureImpl maybe = table[probe(key)];
while ((null != maybe) && (maybe.getAddress() != key)) {
maybe = table[nextProbe()];
}
if (TUNE) {
histogram[Math.min(histogram.length - 1, nbrProbes - 1)]++;
maxProbe = Math.max(maxProbe, nbrProbes);
}
return maybe;
}
public void put(FeatureStructureImpl value) {
final int key = value.getAddress();
int probeAddr = probe(key);
if (TUNE) {
if (key < 200) {
System.out.println("key = " + key + ", probe= " + probeAddr);
}
}
while (null != table[probeAddr]) {
probeAddr = nextProbe();
}
if (TUNE) {
histogram[Math.min(histogram.length - 1, nbrProbes - 1)]++;
maxProbe = Math.max(maxProbe, nbrProbes);
}
table[probeAddr] = value;
size++;
if (size >= sizeWhichTriggersExpansion) {
increaseSize();
}
}
public int size() {
return size;
}
private int probe(int addr) {
casAddr = addr;
random.setSeed(addr);
random.next(1); // once to randomize all bits
if (TUNE)
nbrProbes = 0;
return nextProbe();
}
private int nextProbe() {
if (TUNE) {
nbrProbes++;
}
// adding the casAddr insures the probe sequence is
// different for different addrs, even if by chance the
// random bit sequence is the same for two casAddrs.
return (random.next(bits) + casAddr) & bitsMask;
}
private void increaseSize() {
final FeatureStructureImpl [] oldTable = table;
final int oldCapacity = oldTable.length;
int newCapacity = 2 * oldCapacity;
bits += 1;
bitsMask = (1<<bits) - 1;
sizeWhichTriggersExpansion = (int)(newCapacity * loadFactor);
if (TUNE)
System.out.println("Size increasing from " + oldCapacity + " to " + newCapacity);
table = new FeatureStructureImpl [newCapacity];
size = 0;
for (int i = 0; i < oldCapacity; i++) {
if (oldTable[i] != null) {
put(oldTable[i]);
}
}
}
public void showHistogram() {
if (TUNE) {
System.out.println("Histogram of number of probes, factor = " + loadFactor + ", max = "
+ maxProbe);
System.out.println("bytes / entry = " + (float) (table.length) * 4 / size);
for (int i = 0; i < histogram.length; i++) {
System.out.println(i + ": " + histogram[i]);
}
}
}
}