blob: 12a2997a8500b27fec44c91670d75f6eb206dea1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
import org.junit.Before;
import org.junit.Test;
public class TestBytesRefHash extends LuceneTestCase {
BytesRefHash hash;
ByteBlockPool pool;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
pool = newPool();
hash = newHash(pool);
}
private ByteBlockPool newPool(){
return random().nextBoolean() && pool != null ? pool
: new ByteBlockPool(new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random().nextInt(25)));
}
private BytesRefHash newHash(ByteBlockPool blockPool) {
final int initSize = 2 << 1 + random().nextInt(5);
return random().nextBoolean() ? new BytesRefHash(blockPool) : new BytesRefHash(
blockPool, initSize, new BytesRefHash.DirectBytesStartArray(initSize));
}
/**
* Test method for {@link org.apache.lucene.util.BytesRefHash#size()}.
*/
@Test
public void testSize() {
BytesRefBuilder ref = new BytesRefBuilder();
int num = atLeast(2);
for (int j = 0; j < num; j++) {
final int mod = 1+random().nextInt(39);
for (int i = 0; i < 797; i++) {
String str;
do {
str = TestUtil.randomRealisticUnicodeString(random(), 1000);
} while (str.length() == 0);
ref.copyChars(str);
int count = hash.size();
int key = hash.add(ref.get());
if (key < 0)
assertEquals(hash.size(), count);
else
assertEquals(hash.size(), count + 1);
if(i % mod == 0) {
hash.clear();
assertEquals(0, hash.size());
hash.reinit();
}
}
}
}
/**
* Test method for
* {@link org.apache.lucene.util.BytesRefHash#get(int, BytesRef)}
* .
*/
@Test
public void testGet() {
BytesRefBuilder ref = new BytesRefBuilder();
BytesRef scratch = new BytesRef();
int num = atLeast(2);
for (int j = 0; j < num; j++) {
Map<String, Integer> strings = new HashMap<>();
int uniqueCount = 0;
for (int i = 0; i < 797; i++) {
String str;
do {
str = TestUtil.randomRealisticUnicodeString(random(), 1000);
} while (str.length() == 0);
ref.copyChars(str);
int count = hash.size();
int key = hash.add(ref.get());
if (key >= 0) {
assertNull(strings.put(str, Integer.valueOf(key)));
assertEquals(uniqueCount, key);
uniqueCount++;
assertEquals(hash.size(), count + 1);
} else {
assertTrue((-key)-1 < count);
assertEquals(hash.size(), count);
}
}
for (Entry<String, Integer> entry : strings.entrySet()) {
ref.copyChars(entry.getKey());
assertEquals(ref.get(), hash.get(entry.getValue().intValue(), scratch));
}
hash.clear();
assertEquals(0, hash.size());
hash.reinit();
}
}
/**
* Test method for {@link org.apache.lucene.util.BytesRefHash#compact()}.
*/
@Test
public void testCompact() {
BytesRefBuilder ref = new BytesRefBuilder();
int num = atLeast(2);
for (int j = 0; j < num; j++) {
int numEntries = 0;
final int size = 797;
BitSet bits = new BitSet(size);
for (int i = 0; i < size; i++) {
String str;
do {
str = TestUtil.randomRealisticUnicodeString(random(), 1000);
} while (str.length() == 0);
ref.copyChars(str);
final int key = hash.add(ref.get());
if (key < 0) {
assertTrue(bits.get((-key)-1));
} else {
assertFalse(bits.get(key));
bits.set(key);
numEntries++;
}
}
assertEquals(hash.size(), bits.cardinality());
assertEquals(numEntries, bits.cardinality());
assertEquals(numEntries, hash.size());
int[] compact = hash.compact();
assertTrue(numEntries < compact.length);
for (int i = 0; i < numEntries; i++) {
bits.set(compact[i], false);
}
assertEquals(0, bits.cardinality());
hash.clear();
assertEquals(0, hash.size());
hash.reinit();
}
}
/**
* Test method for
* {@link org.apache.lucene.util.BytesRefHash#sort()}.
*/
@Test
public void testSort() {
BytesRefBuilder ref = new BytesRefBuilder();
int num = atLeast(2);
for (int j = 0; j < num; j++) {
// Sorts by unicode code point order (is there a simple way, e.g. a Collator?)
SortedSet<String> strings = new TreeSet<>(TestUtil.STRING_CODEPOINT_COMPARATOR);
for (int i = 0; i < 797; i++) {
String str;
do {
str = TestUtil.randomRealisticUnicodeString(random(), 1000);
} while (str.length() == 0);
ref.copyChars(str);
hash.add(ref.get());
strings.add(str);
}
int[] sort = hash.sort();
assertTrue(strings.size() < sort.length);
int i = 0;
BytesRef scratch = new BytesRef();
for (String string : strings) {
ref.copyChars(string);
assertEquals(ref.get(), hash.get(sort[i++], scratch));
}
hash.clear();
assertEquals(0, hash.size());
hash.reinit();
}
}
/**
* Test method for
* {@link org.apache.lucene.util.BytesRefHash#add(org.apache.lucene.util.BytesRef)}
* .
*/
@Test
public void testAdd() {
BytesRefBuilder ref = new BytesRefBuilder();
BytesRef scratch = new BytesRef();
int num = atLeast(2);
for (int j = 0; j < num; j++) {
Set<String> strings = new HashSet<>();
int uniqueCount = 0;
for (int i = 0; i < 797; i++) {
String str;
do {
str = TestUtil.randomRealisticUnicodeString(random(), 1000);
} while (str.length() == 0);
ref.copyChars(str);
int count = hash.size();
int key = hash.add(ref.get());
if (key >=0) {
assertTrue(strings.add(str));
assertEquals(uniqueCount, key);
assertEquals(hash.size(), count + 1);
uniqueCount++;
} else {
assertFalse(strings.add(str));
assertTrue((-key)-1 < count);
assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
assertEquals(count, hash.size());
}
}
assertAllIn(strings, hash);
hash.clear();
assertEquals(0, hash.size());
hash.reinit();
}
}
@Test
public void testFind() throws Exception {
BytesRefBuilder ref = new BytesRefBuilder();
BytesRef scratch = new BytesRef();
int num = atLeast(2);
for (int j = 0; j < num; j++) {
Set<String> strings = new HashSet<>();
int uniqueCount = 0;
for (int i = 0; i < 797; i++) {
String str;
do {
str = TestUtil.randomRealisticUnicodeString(random(), 1000);
} while (str.length() == 0);
ref.copyChars(str);
int count = hash.size();
int key = hash.find(ref.get()); //hash.add(ref);
if (key >= 0) { // string found in hash
assertFalse(strings.add(str));
assertTrue(key < count);
assertEquals(str, hash.get(key, scratch).utf8ToString());
assertEquals(count, hash.size());
} else {
key = hash.add(ref.get());
assertTrue(strings.add(str));
assertEquals(uniqueCount, key);
assertEquals(hash.size(), count + 1);
uniqueCount++;
}
}
assertAllIn(strings, hash);
hash.clear();
assertEquals(0, hash.size());
hash.reinit();
}
}
@Test
public void testConcurrentAccessToBytesRefHash() throws Exception {
int num = atLeast(2);
for (int j = 0; j < num; j++) {
int numStrings = 797;
List<String> strings = new ArrayList<>(numStrings);
for (int i = 0; i < numStrings; i++) {
final String str = TestUtil.randomRealisticUnicodeString(random(), 1, 1000);
hash.add(new BytesRef(str));
assertTrue(strings.add(str));
}
int hashSize = hash.size();
AtomicInteger notFound = new AtomicInteger();
AtomicInteger notEquals = new AtomicInteger();
AtomicInteger wrongSize = new AtomicInteger();
int numThreads = atLeast(3);
CountDownLatch latch = new CountDownLatch(numThreads);
Thread[] threads = new Thread[numThreads];
for (int i = 0; i < threads.length; i++) {
int loops = atLeast(100);
threads[i] =
new Thread(
() -> {
BytesRef scratch = new BytesRef();
latch.countDown();
try {
latch.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
for (int k = 0; k < loops; k++) {
BytesRef find = new BytesRef(strings.get(k % strings.size()));
int id = hash.find(find);
if (id < 0) {
notFound.incrementAndGet();
} else {
BytesRef get = hash.get(id, scratch);
if (!get.bytesEquals(find)) {
notEquals.incrementAndGet();
}
}
if (hash.size() != hashSize) {
wrongSize.incrementAndGet();
}
}
},
"t" + i);
}
for (Thread t : threads) t.start();
for (Thread t : threads) t.join();
assertEquals(0, notFound.get());
assertEquals(0, notEquals.get());
assertEquals(0, wrongSize.get());
hash.clear();
assertEquals(0, hash.size());
hash.reinit();
}
}
@Test(expected = MaxBytesLengthExceededException.class)
public void testLargeValue() {
int[] sizes = new int[] { random().nextInt(5),
ByteBlockPool.BYTE_BLOCK_SIZE - 33 + random().nextInt(31),
ByteBlockPool.BYTE_BLOCK_SIZE - 1 + random().nextInt(37) };
BytesRef ref = new BytesRef();
for (int i = 0; i < sizes.length; i++) {
ref.bytes = new byte[sizes[i]];
ref.offset = 0;
ref.length = sizes[i];
try {
assertEquals(i, hash.add(ref));
} catch (MaxBytesLengthExceededException e) {
if (i < sizes.length - 1)
fail("unexpected exception at size: " + sizes[i]);
throw e;
}
}
}
/**
* Test method for
* {@link org.apache.lucene.util.BytesRefHash#addByPoolOffset(int)}
* .
*/
@Test
public void testAddByPoolOffset() {
BytesRefBuilder ref = new BytesRefBuilder();
BytesRef scratch = new BytesRef();
BytesRefHash offsetHash = newHash(pool);
int num = atLeast(2);
for (int j = 0; j < num; j++) {
Set<String> strings = new HashSet<>();
int uniqueCount = 0;
for (int i = 0; i < 797; i++) {
String str;
do {
str = TestUtil.randomRealisticUnicodeString(random(), 1000);
} while (str.length() == 0);
ref.copyChars(str);
int count = hash.size();
int key = hash.add(ref.get());
if (key >= 0) {
assertTrue(strings.add(str));
assertEquals(uniqueCount, key);
assertEquals(hash.size(), count + 1);
int offsetKey = offsetHash.addByPoolOffset(hash.byteStart(key));
assertEquals(uniqueCount, offsetKey);
assertEquals(offsetHash.size(), count + 1);
uniqueCount++;
} else {
assertFalse(strings.add(str));
assertTrue((-key)-1 < count);
assertEquals(str, hash.get((-key)-1, scratch).utf8ToString());
assertEquals(count, hash.size());
int offsetKey = offsetHash.addByPoolOffset(hash.byteStart((-key)-1));
assertTrue((-offsetKey)-1 < count);
assertEquals(str, hash.get((-offsetKey)-1, scratch).utf8ToString());
assertEquals(count, hash.size());
}
}
assertAllIn(strings, hash);
for (String string : strings) {
ref.copyChars(string);
int key = hash.add(ref.get());
BytesRef bytesRef = offsetHash.get((-key)-1, scratch);
assertEquals(ref.get(), bytesRef);
}
hash.clear();
assertEquals(0, hash.size());
offsetHash.clear();
assertEquals(0, offsetHash.size());
hash.reinit(); // init for the next round
offsetHash.reinit();
}
}
private void assertAllIn(Set<String> strings, BytesRefHash hash) {
BytesRefBuilder ref = new BytesRefBuilder();
BytesRef scratch = new BytesRef();
int count = hash.size();
for (String string : strings) {
ref.copyChars(string);
int key = hash.add(ref.get()); // add again to check duplicates
assertEquals(string, hash.get((-key)-1, scratch).utf8ToString());
assertEquals(count, hash.size());
assertTrue("key: " + key + " count: " + count + " string: " + string,
key < count);
}
}
}