blob: 0ea8515bce7635e61e5954341cbb35e992b9faa3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.automaton;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestCompiledAutomaton extends LuceneTestCase {
private CompiledAutomaton build(int determinizeWorkLimit, String... strings) {
final List<BytesRef> terms = new ArrayList<>();
for(String s : strings) {
terms.add(new BytesRef(s));
}
Collections.sort(terms);
final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
return new CompiledAutomaton(a, true, false, determinizeWorkLimit, false);
}
private void testFloor(CompiledAutomaton c, String input, String expected) {
final BytesRef b = new BytesRef(input);
final BytesRef result = c.floor(b, new BytesRefBuilder());
if (expected == null) {
assertNull(result);
} else {
assertNotNull(result);
assertEquals("actual=" + result.utf8ToString() + " vs expected=" + expected + " (input=" + input + ")",
result, new BytesRef(expected));
}
}
private void testTerms(int determinizeWorkLimit, String[] terms) throws Exception {
final CompiledAutomaton c = build(determinizeWorkLimit, terms);
final BytesRef[] termBytes = new BytesRef[terms.length];
for(int idx=0;idx<terms.length;idx++) {
termBytes[idx] = new BytesRef(terms[idx]);
}
Arrays.sort(termBytes);
if (VERBOSE) {
System.out.println("\nTEST: terms in unicode order");
for(BytesRef t : termBytes) {
System.out.println(" " + t.utf8ToString());
}
//System.out.println(c.utf8.toDot());
}
for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
final String s = random().nextInt(10) == 1 ? terms[random().nextInt(terms.length)] : randomString();
if (VERBOSE) {
System.out.println("\nTEST: floor(" + s + ")");
}
int loc = Arrays.binarySearch(termBytes, new BytesRef(s));
final String expected;
if (loc >= 0) {
expected = s;
} else {
// term doesn't exist
loc = -(loc+1);
if (loc == 0) {
expected = null;
} else {
expected = termBytes[loc-1].utf8ToString();
}
}
if (VERBOSE) {
System.out.println(" expected=" + expected);
}
testFloor(c, s, expected);
}
}
public void testRandom() throws Exception {
final int numTerms = atLeast(400);
final Set<String> terms = new HashSet<>();
while(terms.size() != numTerms) {
terms.add(randomString());
}
testTerms(numTerms * 100, terms.toArray(new String[terms.size()]));
}
private String randomString() {
// return _TestUtil.randomSimpleString(random);
return TestUtil.randomRealisticUnicodeString(random());
}
public void testBasic() throws Exception {
CompiledAutomaton c = build(Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
"fob", "foo", "goo");
testFloor(c, "goo", "goo");
testFloor(c, "ga", "foo");
testFloor(c, "g", "foo");
testFloor(c, "foc", "fob");
testFloor(c, "foz", "foo");
testFloor(c, "f", null);
testFloor(c, "", null);
testFloor(c, "aa", null);
testFloor(c, "zzz", "goo");
}
// LUCENE-6367
public void testBinaryAll() throws Exception {
Automaton a = new Automaton();
int state = a.createState();
a.setAccept(state, true);
a.addTransition(state, state, 0, 0xff);
a.finishState();
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, true);
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
}
// LUCENE-6367
public void testUnicodeAll() throws Exception {
Automaton a = new Automaton();
int state = a.createState();
a.setAccept(state, true);
a.addTransition(state, state, 0, Character.MAX_CODE_POINT);
a.finishState();
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.ALL, ca.type);
}
// LUCENE-6367
public void testBinarySingleton() throws Exception {
// This is just ascii so we can pretend it's binary:
Automaton a = Automata.makeString("foobar");
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, true);
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
}
// LUCENE-6367
public void testUnicodeSingleton() throws Exception {
Automaton a = Automata.makeString(TestUtil.randomRealisticUnicodeString(random()));
CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
assertEquals(CompiledAutomaton.AUTOMATON_TYPE.SINGLE, ca.type);
}
}