blob: 91a8001f10e9039de9ce26d9087bfc61aa6d3929 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.fst;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.fst.UpToTwoPositiveIntOutputs.TwoLongs;
import static org.apache.lucene.util.fst.FSTTester.getRandomString;
import static org.apache.lucene.util.fst.FSTTester.toIntsRef;
public class TestFSTsMisc extends LuceneTestCase {
private MockDirectoryWrapper dir;
@Override
public void setUp() throws Exception {
super.setUp();
dir = newMockDirectory();
}
@Override
public void tearDown() throws Exception {
// can be null if we force simpletext (funky, some kind of bug in test runner maybe)
if (dir != null) dir.close();
super.tearDown();
}
public void testRandomWords() throws IOException {
testRandomWords(1000, LuceneTestCase.atLeast(random(), 2));
//testRandomWords(100, 1);
}
private void testRandomWords(int maxNumWords, int numIter) throws IOException {
Random random = new Random(random().nextLong());
for(int iter=0;iter<numIter;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter " + iter);
}
for(int inputMode=0;inputMode<2;inputMode++) {
final int numWords = random.nextInt(maxNumWords+1);
Set<IntsRef> termsSet = new HashSet<>();
IntsRef[] terms = new IntsRef[numWords];
while(termsSet.size() < numWords) {
final String term = getRandomString(random);
termsSet.add(toIntsRef(term, inputMode));
}
doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
}
}
}
private void doTest(int inputMode, IntsRef[] terms) throws IOException {
Arrays.sort(terms);
// Up to two positive ints, shared, generally but not
// monotonically increasing
{
if (VERBOSE) {
System.out.println("TEST: now test UpToTwoPositiveIntOutputs");
}
final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true);
final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<>(terms.length);
long lastOutput = 0;
for(int idx=0;idx<terms.length;idx++) {
// Sometimes go backwards
long value = lastOutput + TestUtil.nextInt(random(), -100, 1000);
while(value < 0) {
value = lastOutput + TestUtil.nextInt(random(), -100, 1000);
}
final Object output;
if (random().nextInt(5) == 3) {
long value2 = lastOutput + TestUtil.nextInt(random(), -100, 1000);
while(value2 < 0) {
value2 = lastOutput + TestUtil.nextInt(random(), -100, 1000);
}
List<Long> values = new ArrayList<>();
values.add(value);
values.add(value2);
output = values;
} else {
output = outputs.get(value);
}
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
}
new FSTTester<Object>(random(), dir, inputMode, pairs, outputs, false) {
@Override
protected boolean outputsEqual(Object output1, Object output2) {
if (output1 instanceof TwoLongs && output2 instanceof List) {
TwoLongs twoLongs1 = (TwoLongs) output1;
return Arrays.asList(new Long[] {twoLongs1.first, twoLongs1.second}).equals(output2);
} else if (output2 instanceof TwoLongs && output1 instanceof List) {
TwoLongs twoLongs2 = (TwoLongs) output2;
return Arrays.asList(new Long[] {twoLongs2.first, twoLongs2.second}).equals(output1);
}
return output1.equals(output2);
}
}.doTest(false);
}
// ListOfOutputs(PositiveIntOutputs), generally but not
// monotonically increasing
{
if (VERBOSE) {
System.out.println("TEST: now test OneOrMoreOutputs");
}
final PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton();
final ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs);
final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<>(terms.length);
long lastOutput = 0;
for(int idx=0;idx<terms.length;idx++) {
int outputCount = TestUtil.nextInt(random(), 1, 7);
List<Long> values = new ArrayList<>();
for(int i=0;i<outputCount;i++) {
// Sometimes go backwards
long value = lastOutput + TestUtil.nextInt(random(), -100, 1000);
while(value < 0) {
value = lastOutput + TestUtil.nextInt(random(), -100, 1000);
}
values.add(value);
lastOutput = value;
}
final Object output;
if (values.size() == 1) {
output = values.get(0);
} else {
output = values;
}
pairs.add(new FSTTester.InputOutput<>(terms[idx], output));
}
new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(false);
}
}
public void testListOfOutputs() throws Exception {
PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton();
ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs);
final Builder<Object> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
// Add the same input more than once and the outputs
// are merged:
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L);
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L);
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L);
builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L);
final FST<Object> fst = builder.finish();
Object output = Util.get(fst, new BytesRef("a"));
assertNotNull(output);
List<Long> outputList = outputs.asList(output);
assertEquals(3, outputList.size());
assertEquals(1L, outputList.get(0).longValue());
assertEquals(3L, outputList.get(1).longValue());
assertEquals(0L, outputList.get(2).longValue());
output = Util.get(fst, new BytesRef("b"));
assertNotNull(output);
outputList = outputs.asList(output);
assertEquals(1, outputList.size());
assertEquals(17L, outputList.get(0).longValue());
}
public void testListOfOutputsEmptyString() throws Exception {
PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton();
ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs);
final Builder<Object> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
builder.add(scratch.get(), 0L);
builder.add(scratch.get(), 1L);
builder.add(scratch.get(), 17L);
builder.add(scratch.get(), 1L);
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L);
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L);
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L);
builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 0L);
final FST<Object> fst = builder.finish();
Object output = Util.get(fst, new BytesRef(""));
assertNotNull(output);
List<Long> outputList = outputs.asList(output);
assertEquals(4, outputList.size());
assertEquals(0L, outputList.get(0).longValue());
assertEquals(1L, outputList.get(1).longValue());
assertEquals(17L, outputList.get(2).longValue());
assertEquals(1L, outputList.get(3).longValue());
output = Util.get(fst, new BytesRef("a"));
assertNotNull(output);
outputList = outputs.asList(output);
assertEquals(3, outputList.size());
assertEquals(1L, outputList.get(0).longValue());
assertEquals(3L, outputList.get(1).longValue());
assertEquals(0L, outputList.get(2).longValue());
output = Util.get(fst, new BytesRef("b"));
assertNotNull(output);
outputList = outputs.asList(output);
assertEquals(1, outputList.size());
assertEquals(0L, outputList.get(0).longValue());
}
}