blob: 387b6b60d0cb2ca8b635148f455e87b6a57d9ebd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class TestStableMSBRadixSorter extends LuceneTestCase {
private void test(BytesRef[] refs, int len) {
BytesRef[] expected = ArrayUtil.copyOfSubArray(refs, 0, len);
Arrays.sort(expected);
int maxLength = 0;
for (int i = 0; i < len; ++i) {
BytesRef ref = refs[i];
maxLength = Math.max(maxLength, ref.length);
}
switch (random().nextInt(3)) {
case 0:
maxLength += TestUtil.nextInt(random(), 1, 5);
break;
case 1:
maxLength = Integer.MAX_VALUE;
break;
default:
// leave unchanged
break;
}
final int finalMaxLength = maxLength;
new StableMSBRadixSorter(maxLength) {
private BytesRef[] temp;
@Override
protected int byteAt(int i, int k) {
assertTrue(k < finalMaxLength);
BytesRef ref = refs[i];
if (ref.length <= k) {
return -1;
}
return ref.bytes[ref.offset + k] & 0xff;
}
@Override
protected void swap(int i, int j) {
BytesRef tmp = refs[i];
refs[i] = refs[j];
refs[j] = tmp;
}
@Override
protected void save(int i, int j) {
if (temp == null) {
temp = new BytesRef[refs.length];
}
temp[j] = refs[i];
}
@Override
protected void restore(int i, int j) {
if (temp != null) {
System.arraycopy(temp, i, refs, i, j - i);
}
}
}.sort(0, len);
BytesRef[] actual = ArrayUtil.copyOfSubArray(refs, 0, len);
assertArrayEquals(expected, actual);
// Verify that the arrays are not only equal after sorting with Arrays#sort and
// StableMSBRadixSorter
// but also that they have the very same instance at every index.
// This is different from MSBRadixSorter which does not guarantee ordering of the same value.
assertEquals(expected.length, actual.length);
for (int i = 0; i < expected.length; i++) {
assertSame(expected[i].bytes, actual[i].bytes);
}
}
public void testEmpty() {
test(new BytesRef[random().nextInt(5)], 0);
}
public void testOneValue() {
BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random()));
test(new BytesRef[] {bytes}, 1);
}
public void testTwoValues() {
BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random()));
BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random()));
test(new BytesRef[] {bytes1, bytes2}, 2);
}
private void testRandom(int commonPrefixLen, int maxLen) {
byte[] commonPrefix = new byte[commonPrefixLen];
random().nextBytes(commonPrefix);
final int len = random().nextInt(100000);
BytesRef[] bytes = new BytesRef[len + random().nextInt(50)];
for (int i = 0; i < len; ++i) {
byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)];
random().nextBytes(b);
System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen);
bytes[i] = new BytesRef(b);
}
test(bytes, len);
}
public void testRandom() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(0, 10);
}
}
public void testRandomWithLotsOfDuplicates() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(0, 2);
}
}
public void testRandomWithSharedPrefix() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(TestUtil.nextInt(random(), 1, 30), 10);
}
}
public void testRandomWithSharedPrefixAndLotsOfDuplicates() {
for (int iter = 0; iter < 10; ++iter) {
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
}
}
public void testRandom2() {
// how large our alphabet is
int letterCount = TestUtil.nextInt(random(), 2, 10);
// how many substring fragments to use
int substringCount = TestUtil.nextInt(random(), 2, 10);
Set<BytesRef> substringsSet = new HashSet<>();
// how many strings to make
int stringCount = atLeast(10000);
// System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + "
// stringCount=" + stringCount);
while (substringsSet.size() < substringCount) {
int length = TestUtil.nextInt(random(), 2, 10);
byte[] bytes = new byte[length];
for (int i = 0; i < length; i++) {
bytes[i] = (byte) random().nextInt(letterCount);
}
BytesRef br = new BytesRef(bytes);
substringsSet.add(br);
// System.out.println("add substring count=" + substringsSet.size() + ": " + br);
}
BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
double[] chance = new double[substrings.length];
double sum = 0.0;
for (int i = 0; i < substrings.length; i++) {
chance[i] = random().nextDouble();
sum += chance[i];
}
// give each substring a random chance of occurring:
double accum = 0.0;
for (int i = 0; i < substrings.length; i++) {
accum += chance[i] / sum;
chance[i] = accum;
}
Set<BytesRef> stringsSet = new HashSet<>();
int iters = 0;
while (stringsSet.size() < stringCount && iters < stringCount * 5) {
int count = TestUtil.nextInt(random(), 1, 5);
BytesRefBuilder b = new BytesRefBuilder();
for (int i = 0; i < count; i++) {
double v = random().nextDouble();
accum = 0.0;
for (int j = 0; j < substrings.length; j++) {
accum += chance[j];
if (accum >= v) {
b.append(substrings[j]);
break;
}
}
}
BytesRef br = b.toBytesRef();
stringsSet.add(br);
// System.out.println("add string count=" + stringsSet.size() + ": " + br);
iters++;
}
test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
}
}