blob: 620f88a81b0a582c68a9279761bec176900a69cf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.internal.util;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import junit.framework.TestCase;
public class PositiveIntSetTest extends TestCase {
Random rand = new Random();
static class R {
boolean useInitParms = false;
int initSize = 2;
int estMin = 15;
int estMax = 100;
int firstValue = estMin;
float add_pc = .9f;
float clear_pc = .001f;
float remove_not_present_pc = .01f;
}
private R r = new R();
private Set<Integer> cs = new HashSet<Integer>();
private PositiveIntSet s;
private int gi;
public void testSwitchFromHashToBitWithOffset() {
PositiveIntSet_impl s = new PositiveIntSet_impl();
int [] x = new int[100];
int ix = 0;
s.add(x[ix++] = 100000); // start as bit set with offset 100000
s.add(x[ix++] = 101328); // switches to intSet
for (int i = 0; i < 14; i++) {
s.add(x[ix++] = 100001 + i);
}
s.add(x[ix++] = 100001 + 14); // switches to hashSet, size = 64 entries in short table = 32 words + 11
for (int i = 0; i < 24; i++) {
s.add(x[ix++] = 100100 + i);
}
// next causes an exception on 2.7.0 rc2
s.add(x[ix++] = 99999); // switch to bit set with key being the lowest value
s.add(x[ix++] = 100501);
// validate s
assertEquals(19 + 24, s.size());
for (int i = 0; i < (19 + 24); i++) {
assertTrue(s.contains(x[i]));
}
assertFalse(s.contains(1));
}
public void testBasic() {
PositiveIntSet_impl s = new PositiveIntSet_impl();
s.add(128);
assertTrue(s.isBitSet);
s.add(128128);
assertFalse(s.isBitSet);
IntListIterator it = s.getOrderedIterator();
assertTrue(it.hasNext());
assertEquals(128, it.nextNvc());
assertTrue(it.hasNext());
assertEquals(128128, it.nextNvc());
assertFalse(it.hasNext());
// test offset
int bb = 300000;
s = new PositiveIntSet_impl();
assertTrue(s.useOffset);
s.add(bb);
s.add(bb);
s.add(bb+1);
assertTrue(s.isBitSet);
s.add(bb+2);
assertTrue(s.isBitSet);
assertEquals(3, s.size());
assertTrue(s.useOffset);
// test offset converting to hashset
s.add(bb - 6000);
assertEquals(4, s.size());
assertFalse(s.isBitSet);
it = s.getOrderedIterator();
assertEquals(bb-6000, it.next());
assertEquals(bb, it.next());
assertEquals(bb+1, it.next());
assertEquals(bb+2, it.next());
bb = 67;
s = new PositiveIntSet_impl();
assertTrue(s.useOffset);
s.add(bb);
s.add(bb);
s.add(bb+1);
s.add(bb+2);
assertEquals(3, s.size());
assertTrue(s.isBitSet);
assertTrue(s.useOffset);
// test offset converting to bitset with no offset
s.add(bb - 66);
assertEquals(4, s.size());
assertTrue(s.isBitSet);
assertFalse(s.useOffset);
it = s.getOrderedIterator();
assertEquals(bb-66, it.next());
assertEquals(bb, it.next());
assertEquals(bb+1, it.next());
assertEquals(bb+2, it.next());
// test switch from IntSet to bit set
s.clear(); // keeps useOffset false
s.add(1216); // makes the space used by bit set = 41 words
for (int i = 1; i < 23; i++) {
s.add(i);
// System.out.println("i is " + i + ", isBitSet = " + s.isBitSet);
assertTrue("i is " + i, (i < 16) ? (s.isIntSet) : s.isBitSet);
}
it = s.getOrderedIterator();
for (int i = 1; i < 23; i++) {
assertEquals(i, it.next());
}
assertEquals(1216,it.next());
boolean reached = false;
for (int i = 10; i < 20000/*5122*/; i = i <<1) {
s.add(i); // switches to hash set when i = 2560. == 1010 0000 0000 (>>5 = 101 0000 = 80 (decimal)
// hash set size for 19 entries = 19 * 3 = 57
// bit set size for 2560 = 80 words
// bit set size for prev i (1280 dec) = 101 0000 0000 (>>5 = 10 1000) = 40 words
// if (!s.isBitSet) {
// System.out.println("is Bit set? " + s.isBitSet + ", i = " + i + ", # of entries is " + s.size());
// }
reached = i >= 2560;
assertTrue((!reached) ? s.isBitSet : !s.isBitSet);
}
assertTrue(reached);
}
public void testiterators() {
PositiveIntSet_impl s = new PositiveIntSet_impl();
int [] e = new int [] {123, 987, 789, 155,
156, 177, 444, 333,
242, 252, 262, 243,
221, 219, 217, 300,
399};
int [] eOrdered = Arrays.copyOf(e, e.length);
Arrays.sort(eOrdered);
for (int i : e) {s.add(i);}
int[] r = s.toUnorderedIntArray();
assertTrue(Arrays.equals(r, eOrdered));
s.clear();
e[0] = 125;
e[2] = 1500000;
eOrdered = Arrays.copyOf(e, e.length);
Arrays.sort(eOrdered);
for (int i : e) {s.add(i);}
r = s.toUnorderedIntArray();
assertFalse(Arrays.equals(r, e));
assertFalse(s.isBitSet);
r = s.toOrderedIntArray();
assertTrue(Arrays.equals(r, eOrdered));
}
/**
* TODO extra cases to verify (paths)
*
* 1) Switching from bit set due to lower bound dropping below offset
* - where result would (?) fit in "short" hash set
* - where result would be size < 16
*
* 2) Switching from bit set (tiny) to intSet
*/
public void testBit2ShortHash() {
PositiveIntSet_impl s = new PositiveIntSet_impl();
for (int i = 0; i < 16; i++) {
s.add(1000 + i);
}
assertTrue(s.isBitSet && s.isOffsetBitSet());
s.add(874); // bit sets with offset have some space below; this is below that
assertTrue(s.isBitSet && (!s.isOffsetBitSet()));
s.add(1);
assertTrue(s.isBitSet); // without offset, fits
s.add(10000);
assertTrue(s.isHashSet && s.isShortHashSet() && !s.isBitSet);
s.add(100000);
assertTrue(!s.isShortHashSet());
int[] sv = s.toIntArray();
Arrays.sort(sv);
assertTrue(Arrays.equals(sv, new int[] {1, 874, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
1010, 1011, 1012, 1013, 1014, 1015, 10000, 100000}));
// try going from bit set with offset directly to hash set
s = new PositiveIntSet_impl();
for (int i = 0; i < 16; i++) {
s.add(1000 + i);
}
s.add(10000);
assertTrue(s.isHashSet && s.isShortHashSet() && !s.isBitSet);
s.add(100000);
assertTrue(!s.isShortHashSet());
sv = s.toIntArray();
Arrays.sort(sv);
assertTrue(Arrays.equals(sv, new int[] { 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
1010, 1011, 1012, 1013, 1014, 1015, 10000, 100000}));
// going from bit set to intset
s = new PositiveIntSet_impl();
s.add(1000);
assertTrue(s.isBitSet && s.isOffsetBitSet());
s.add(1259); // 1258 doesn't switch, because 1258 "fits" in existing bit set allocation
assertTrue(!s.isBitSet && s.isIntSet);
// going from int set to bit set
for (int i = 0; i < 14; i++) {
s.add(1001 + i); // add ints that will fit in bit set
assertTrue(s.isIntSet);
}
s.add(1015);
assertTrue(s.isBitSet && s.isOffsetBitSet());
assertTrue(Arrays.equals(s.toIntArray(), new int[] { 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
1010, 1011, 1012, 1013, 1014, 1015, 1259}));
s.remove(1015); // removing doesn't cause re-sizing
assertTrue(s.isBitSet && s.isOffsetBitSet());
assertTrue(Arrays.equals(s.toIntArray(), new int[] { 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
1010, 1011, 1012, 1013, 1014, 1259}));
}
public void testRandom() {
long seed = rand.nextLong();
System.out.println("PositiveIntSet test random seed = " + seed);
// seed = 3324098689001789475L;
rand.setSeed(seed);
for (gi = 0; gi < 100 /*10000*/; gi++) {
generateRandomParameters();
runRandomTests();
}
}
public void generateRandomParameters() {
r.initSize = rand.nextInt(100000) + 2;
r.add_pc = rand.nextFloat() + .5f;
r.clear_pc = rand.nextFloat() * .01f;
r.remove_not_present_pc = rand.nextFloat() * .1f;
r.useInitParms = choose(.2f);
r.estMin = rand.nextInt(100000) + 1;
r.estMax = r.estMin + ((choose(.5f)) ? rand.nextInt(1000) : rand.nextInt(100000));
}
/**
* Random testing
*
* Do updates to both positiveintset and plain java set<Integer>
*
* adds and removes (in x % ratio)
* adds are unique x % of time
* removes remove existing x % of time
*
* clear() done x % of each run.
*
* clustering:
* values > offset x % of runs.
* values in range offset - 32K to offset + 32K x % of runs
* values < (offset + n * 32) x % of runs
*
* checks:
* at end:
* content as expected via compare to plain java set<integer>
* style of intSet as expected
* iterator: produces values, as expected via compare
*
*
*/
private void runRandomTests() {
s = r.useInitParms ? new PositiveIntSet_impl(r.initSize, r.estMin, r.estMax) : new PositiveIntSet_impl();
cs = new HashSet<Integer>();
dadd(r.firstValue);
for (int i = 0; i < 10000; i++) {
add_remove_clear(i);
}
compare();
}
/**
* @param perCent the percent to use
* @return true x perCent of the time, randomly
*/
private boolean choose(float perCent) {
return rand.nextFloat() <= perCent;
}
private void add_remove_clear(int i) {
if (choose(r.clear_pc)) {
s.clear();
cs.clear();
} else if (choose(r.add_pc)) {
dadd(r.firstValue + i);
} else {
dremove(i);
}
}
private void compare() {
int[] v1 = s.toIntArray();
int[] v2 = new int[cs.size()];
Iterator<Integer> it = cs.iterator();
int i = 0;
while (it.hasNext()) {
v2[i++] = it.next();
}
Arrays.sort(v1);
Arrays.sort(v2);
assertTrue(Arrays.equals(v1, v2));
IntListIterator it2 = s.iterator();
i = 0;
while (it2.hasNext()) {
v1[i++] = it2.nextNvc();
}
Arrays.sort(v1);
assertTrue(Arrays.equals(v1, v2));
}
private boolean dadd(int v) {
boolean wasAdded = s.add(v);
boolean wasAddedc = cs.add(v);
assertEquals(wasAdded, wasAddedc);
return wasAdded;
}
private boolean dremove(int i) {
int key = getKeyToBeRemoved(i);
boolean wasRemoved = s.remove(key);
boolean wasRemovedc = cs.remove(key);
assertEquals(wasRemoved, wasRemovedc);
return wasRemoved;
}
private int getKeyToBeRemoved(int w) {
int sz = cs.size();
if (sz == 0 || choose(r.remove_not_present_pc)) {
return nonPresentValue();
}
Iterator<Integer> it = cs.iterator();
int which = Math.min(sz - 1, w % 8);
for (int i = 0; i < which; i++) {
it.next();
}
return it.next();
}
private int nonPresentValue() {
for (int i = 1; ; i++) {
if (!cs.contains(i)) {
return i;
}
if (i > 0) {
i = -i;
} else {
i = (-i) + 1;
}
}
}
}