blob: ed9cf6b33a4a0162d71bf2633a17bf226bd81632 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.automaton;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
import org.apache.lucene.util.fst.Util;
import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WORK_LIMIT;
public class TestAutomaton extends LuceneTestCase {
public void testBasic() throws Exception {
Automaton a = new Automaton();
int start = a.createState();
int x = a.createState();
int y = a.createState();
int end = a.createState();
a.setAccept(end, true);
a.addTransition(start, x, 'a', 'a');
a.addTransition(start, end, 'd', 'd');
a.addTransition(x, y, 'b', 'b');
a.addTransition(y, end, 'c', 'c');
a.finishState();
}
public void testReduceBasic() throws Exception {
Automaton a = new Automaton();
int start = a.createState();
int end = a.createState();
a.setAccept(end, true);
// Should collapse to a-b:
a.addTransition(start, end, 'a', 'a');
a.addTransition(start, end, 'b', 'b');
a.addTransition(start, end, 'm', 'm');
// Should collapse to x-y:
a.addTransition(start, end, 'x', 'x');
a.addTransition(start, end, 'y', 'y');
a.finishState();
assertEquals(3, a.getNumTransitions(start));
Transition scratch = new Transition();
a.initTransition(start, scratch);
a.getNextTransition(scratch);
assertEquals('a', scratch.min);
assertEquals('b', scratch.max);
a.getNextTransition(scratch);
assertEquals('m', scratch.min);
assertEquals('m', scratch.max);
a.getNextTransition(scratch);
assertEquals('x', scratch.min);
assertEquals('y', scratch.max);
}
public void testSameLanguage() throws Exception {
Automaton a1 = Automata.makeString("foobar");
Automaton a2 = Operations.removeDeadStates(Operations.concatenate(
Automata.makeString("foo"),
Automata.makeString("bar")));
assertTrue(Operations.sameLanguage(a1, a2));
}
public void testCommonPrefixString() throws Exception {
Automaton a = Operations.concatenate(
Automata.makeString("foobar"),
Automata.makeAnyString());
assertEquals("foobar", Operations.getCommonPrefix(a));
}
public void testCommonPrefixEmpty() throws Exception {
assertEquals("", Operations.getCommonPrefix(Automata.makeEmpty()));
}
public void testCommonPrefixEmptyString() throws Exception {
assertEquals("", Operations.getCommonPrefix(Automata.makeEmptyString()));
}
public void testCommonPrefixAny() throws Exception {
assertEquals("", Operations.getCommonPrefix(Automata.makeAnyString()));
}
public void testCommonPrefixRange() throws Exception {
assertEquals("", Operations.getCommonPrefix(Automata.makeCharRange('a', 'b')));
}
public void testAlternatives() throws Exception {
Automaton a = Automata.makeChar('a');
Automaton c = Automata.makeChar('c');
assertEquals("", Operations.getCommonPrefix(Operations.union(a, c)));
}
public void testCommonPrefixLeadingWildcard() throws Exception {
Automaton a = Operations.concatenate(Automata.makeAnyChar(), Automata.makeString("boo"));
assertEquals("", Operations.getCommonPrefix(a));
}
public void testCommonPrefixTrailingWildcard() throws Exception {
Automaton a = Operations.concatenate(Automata.makeString("boo"), Automata.makeAnyChar());
assertEquals("boo", Operations.getCommonPrefix(a));
}
public void testCommonPrefixLeadingKleenStar() throws Exception {
Automaton a = Operations.concatenate(Automata.makeAnyString(), Automata.makeString("boo"));
assertEquals("", Operations.getCommonPrefix(a));
}
public void testCommonPrefixTrailingKleenStar() throws Exception {
Automaton a = Operations.concatenate(Automata.makeString("boo"), Automata.makeAnyString());
assertEquals("boo", Operations.getCommonPrefix(a));
}
public void testCommonPrefixDeadStates() throws Exception {
Automaton a = Operations.concatenate(Automata.makeAnyString(), Automata.makeString("boo"));
// reverse it twice, to create some dead states
// TODO: is it possible to fix reverse() to not create dead states?!
Automaton withDeadStates = Operations.reverse(Operations.reverse(a));
IllegalArgumentException expected =
expectThrows(
IllegalArgumentException.class,
() -> {
Operations.getCommonPrefix(withDeadStates);
});
assertEquals("input automaton has dead states", expected.getMessage());
}
public void testCommonPrefixRemoveDeadStates() throws Exception {
Automaton a = Operations.concatenate(Automata.makeAnyString(), Automata.makeString("boo"));
// reverse it twice, to create some dead states
// TODO: is it possible to fix reverse() to not create dead states?!
Automaton withDeadStates = Operations.reverse(Operations.reverse(a));
// now remove the deadstates
Automaton withoutDeadStates = Operations.removeDeadStates(withDeadStates);
assertEquals("", Operations.getCommonPrefix(withoutDeadStates));
}
public void testCommonPrefixOptional() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
int fini = a.createState();
a.setAccept(init, true);
a.setAccept(fini, true);
a.addTransition(init, fini, 'm');
a.addTransition(fini, fini, 'm');
a.finishState();
assertEquals("", Operations.getCommonPrefix(a));
}
public void testCommonPrefixNFA() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
int medial = a.createState();
int fini = a.createState();
a.setAccept(fini, true);
a.addTransition(init, medial, 'm');
a.addTransition(init, fini, 'm');
a.addTransition(medial, fini, 'o');
a.finishState();
assertEquals("m", Operations.getCommonPrefix(a));
}
public void testCommonPrefixNFAInfinite() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
int medial = a.createState();
int fini = a.createState();
a.setAccept(fini, true);
a.addTransition(init, medial, 'm');
a.addTransition(init, fini, 'm');
a.addTransition(medial, fini, 'm');
a.addTransition(fini, fini, 'm');
a.finishState();
assertEquals("m", Operations.getCommonPrefix(a));
}
public void testCommonPrefixUnicode() throws Exception {
Automaton a = Operations.concatenate(Automata.makeString("boo😂😂😂"), Automata.makeAnyChar());
assertEquals("boo😂😂😂", Operations.getCommonPrefix(a));
}
public void testConcatenate1() throws Exception {
Automaton a = Operations.concatenate(
Automata.makeString("m"),
Automata.makeAnyString());
assertTrue(Operations.run(a, "m"));
assertTrue(Operations.run(a, "me"));
assertTrue(Operations.run(a, "me too"));
}
public void testConcatenate2() throws Exception {
Automaton a = Operations.concatenate(Arrays.asList(
Automata.makeString("m"),
Automata.makeAnyString(),
Automata.makeString("n"),
Automata.makeAnyString()));
a = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a, "mn"));
assertTrue(Operations.run(a, "mone"));
assertFalse(Operations.run(a, "m"));
assertFalse(Operations.isFinite(a));
}
public void testUnion1() throws Exception {
Automaton a = Operations.union(Arrays.asList(
Automata.makeString("foobar"),
Automata.makeString("barbaz")));
a = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a, "foobar"));
assertTrue(Operations.run(a, "barbaz"));
assertMatches(a, "foobar", "barbaz");
}
public void testUnion2() throws Exception {
Automaton a = Operations.union(Arrays.asList(
Automata.makeString("foobar"),
Automata.makeString(""),
Automata.makeString("barbaz")));
a = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a, "foobar"));
assertTrue(Operations.run(a, "barbaz"));
assertTrue(Operations.run(a, ""));
assertMatches(a, "", "foobar", "barbaz");
}
public void testMinimizeSimple() throws Exception {
Automaton a = Automata.makeString("foobar");
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, aMin));
}
public void testMinimize2() throws Exception {
Automaton a = Operations.union(Arrays.asList(Automata.makeString("foobar"),
Automata.makeString("boobar")));
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(Operations.determinize(
Operations.removeDeadStates(a), DEFAULT_DETERMINIZE_WORK_LIMIT), aMin));
}
public void testReverse() throws Exception {
Automaton a = Automata.makeString("foobar");
Automaton ra = Operations.reverse(a);
Automaton a2 = Operations.determinize(Operations.reverse(ra),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, a2));
}
public void testOptional() throws Exception {
Automaton a = Automata.makeString("foobar");
Automaton a2 = Operations.optional(a);
a2 = Operations.determinize(a2, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a, "foobar"));
assertFalse(Operations.run(a, ""));
assertTrue(Operations.run(a2, "foobar"));
assertTrue(Operations.run(a2, ""));
}
public void testRepeatAny() throws Exception {
Automaton a = Automata.makeString("zee");
Automaton a2 = Operations.determinize(Operations.repeat(a),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a2, ""));
assertTrue(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
assertTrue(Operations.run(a2, "zeezeezee"));
}
public void testRepeatMin() throws Exception {
Automaton a = Automata.makeString("zee");
Automaton a2 = Operations.determinize(Operations.repeat(a, 2),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertFalse(Operations.run(a2, ""));
assertFalse(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
assertTrue(Operations.run(a2, "zeezeezee"));
}
public void testRepeatMinMax1() throws Exception {
Automaton a = Automata.makeString("zee");
Automaton a2 = Operations.determinize(Operations.repeat(a, 0, 2),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a2, ""));
assertTrue(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
assertFalse(Operations.run(a2, "zeezeezee"));
}
public void testRepeatMinMax2() throws Exception {
Automaton a = Automata.makeString("zee");
Automaton a2 = Operations.determinize(Operations.repeat(a, 2, 4),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertFalse(Operations.run(a2, ""));
assertFalse(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
assertTrue(Operations.run(a2, "zeezeezee"));
assertTrue(Operations.run(a2, "zeezeezeezee"));
assertFalse(Operations.run(a2, "zeezeezeezeezee"));
}
public void testComplement() throws Exception {
Automaton a = Automata.makeString("zee");
Automaton a2 = Operations.determinize(Operations.complement(a,
DEFAULT_DETERMINIZE_WORK_LIMIT), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a2, ""));
assertFalse(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
assertTrue(Operations.run(a2, "zeezeezee"));
}
public void testInterval() throws Exception {
Automaton a = Operations.determinize(Automata.makeDecimalInterval(17, 100, 3),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertFalse(Operations.run(a, ""));
assertTrue(Operations.run(a, "017"));
assertTrue(Operations.run(a, "100"));
assertTrue(Operations.run(a, "073"));
}
public void testCommonSuffix() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
int fini = a.createState();
a.setAccept(init, true);
a.setAccept(fini, true);
a.addTransition(init, fini, 'm');
a.addTransition(fini, fini, 'm');
a.finishState();
assertEquals(0, Operations.getCommonSuffixBytesRef(a).length);
}
public void testCommonSuffixEmpty() throws Exception {
assertEquals(new BytesRef(), Operations.getCommonSuffixBytesRef(Automata.makeEmpty()));
}
public void testCommonSuffixEmptyString() throws Exception {
assertEquals(new BytesRef(), Operations.getCommonSuffixBytesRef(Automata.makeEmptyString()));
}
public void testCommonSuffixTrailingWildcard() throws Exception {
Automaton a = Operations.concatenate(Automata.makeString("boo"), Automata.makeAnyChar());
assertEquals(new BytesRef(), Operations.getCommonSuffixBytesRef(a));
}
public void testCommonSuffixLeadingKleenStar() throws Exception {
Automaton a = Operations.concatenate(Automata.makeAnyString(), Automata.makeString("boo"));
assertEquals(new BytesRef("boo"), Operations.getCommonSuffixBytesRef(a));
}
public void testCommonSuffixTrailingKleenStar() throws Exception {
Automaton a = Operations.concatenate(Automata.makeString("boo"), Automata.makeAnyString());
assertEquals(new BytesRef(), Operations.getCommonSuffixBytesRef(a));
}
public void testCommonSuffixUnicode() throws Exception {
Automaton a =
Operations.concatenate(Automata.makeAnyString(), Automata.makeString("boo😂😂😂"));
Automaton binary = new UTF32ToUTF8().convert(a);
assertEquals(new BytesRef("boo😂😂😂"), Operations.getCommonSuffixBytesRef(binary));
}
public void testReverseRandom1() throws Exception {
int ITERS = atLeast(100);
for(int i=0;i<ITERS;i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
Automaton ra = Operations.reverse(a);
Automaton rra = Operations.reverse(ra);
assertTrue(Operations.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
Operations.determinize(Operations.removeDeadStates(rra), Integer.MAX_VALUE)));
}
}
public void testReverseRandom2() throws Exception {
int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
//System.out.println("TEST: iter=" + iter);
Automaton a = AutomatonTestUtil.randomAutomaton(random());
if (random().nextBoolean()) {
a = Operations.removeDeadStates(a);
}
Automaton ra = Operations.reverse(a);
Automaton rda = Operations.determinize(ra, Integer.MAX_VALUE);
if (Operations.isEmpty(a)) {
assertTrue(Operations.isEmpty(rda));
continue;
}
RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
for(int iter2=0;iter2<20;iter2++) {
// Find string accepted by original automaton
int[] s = ras.getRandomAcceptedString(random());
// Reverse it
for(int j=0;j<s.length/2;j++) {
int x = s[j];
s[j] = s[s.length-j-1];
s[s.length-j-1] = x;
}
//System.out.println("TEST: iter2=" + iter2 + " s=" + Arrays.toString(s));
// Make sure reversed automaton accepts it
assertTrue(Operations.run(rda, new IntsRef(s, 0, s.length)));
}
}
}
public void testAnyStringEmptyString() throws Exception {
Automaton a = Operations.determinize(Automata.makeAnyString(),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a, ""));
}
public void testBasicIsEmpty() throws Exception {
Automaton a = new Automaton();
a.createState();
assertTrue(Operations.isEmpty(a));
}
public void testRemoveDeadTransitionsEmpty() throws Exception {
Automaton a = Automata.makeEmpty();
Automaton a2 = Operations.removeDeadStates(a);
assertTrue(Operations.isEmpty(a2));
}
public void testInvalidAddTransition() throws Exception {
Automaton a = new Automaton();
int s1 = a.createState();
int s2 = a.createState();
a.addTransition(s1, s2, 'a');
a.addTransition(s2, s2, 'a');
expectThrows(IllegalStateException.class, () -> {
a.addTransition(s1, s2, 'b');
});
}
public void testBuilderRandom() throws Exception {
int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
// Just get all transitions, shuffle, and build a new automaton with the same transitions:
List<Transition> allTrans = new ArrayList<>();
int numStates = a.getNumStates();
for(int s=0;s<numStates;s++) {
int count = a.getNumTransitions(s);
for(int i=0;i<count;i++) {
Transition t = new Transition();
a.getTransition(s, i, t);
allTrans.add(t);
}
}
Automaton.Builder builder = new Automaton.Builder();
for(int i=0;i<numStates;i++) {
int s = builder.createState();
builder.setAccept(s, a.isAccept(s));
}
Collections.shuffle(allTrans, random());
for(Transition t : allTrans) {
builder.addTransition(t.source, t.dest, t.min, t.max);
}
assertTrue(Operations.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
Operations.determinize(Operations.removeDeadStates(builder.finish()),
Integer.MAX_VALUE)));
}
}
public void testIsTotal() throws Exception {
assertFalse(Operations.isTotal(new Automaton()));
Automaton a = new Automaton();
int init = a.createState();
int fini = a.createState();
a.setAccept(fini, true);
a.addTransition(init, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
a.finishState();
assertFalse(Operations.isTotal(a));
a.addTransition(fini, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
a.finishState();
assertFalse(Operations.isTotal(a));
a.setAccept(init, true);
assertTrue(Operations.isTotal(MinimizationOperations.minimize(a,
DEFAULT_DETERMINIZE_WORK_LIMIT)));
}
public void testMinimizeEmpty() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
int fini = a.createState();
a.addTransition(init, fini, 'a');
a.finishState();
a = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertEquals(0, a.getNumStates());
}
public void testMinus() throws Exception {
Automaton a1 = Automata.makeString("foobar");
Automaton a2 = Automata.makeString("boobar");
Automaton a3 = Automata.makeString("beebar");
Automaton a = Operations.union(Arrays.asList(a1, a2, a3));
if (random().nextBoolean()) {
a = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
} else if (random().nextBoolean()) {
a = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
}
assertMatches(a, "foobar", "beebar", "boobar");
Automaton a4 = Operations.determinize(Operations.minus(a, a2,
DEFAULT_DETERMINIZE_WORK_LIMIT), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a4, "foobar"));
assertFalse(Operations.run(a4, "boobar"));
assertTrue(Operations.run(a4, "beebar"));
assertMatches(a4, "foobar", "beebar");
a4 = Operations.determinize(Operations.minus(a4, a1,
DEFAULT_DETERMINIZE_WORK_LIMIT), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertFalse(Operations.run(a4, "foobar"));
assertFalse(Operations.run(a4, "boobar"));
assertTrue(Operations.run(a4, "beebar"));
assertMatches(a4, "beebar");
a4 = Operations.determinize(Operations.minus(a4, a3,
DEFAULT_DETERMINIZE_WORK_LIMIT), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertFalse(Operations.run(a4, "foobar"));
assertFalse(Operations.run(a4, "boobar"));
assertFalse(Operations.run(a4, "beebar"));
assertMatches(a4);
}
public void testOneInterval() throws Exception {
Automaton a = Automata.makeDecimalInterval(999, 1032, 0);
a = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a, "0999"));
assertTrue(Operations.run(a, "00999"));
assertTrue(Operations.run(a, "000999"));
}
public void testAnotherInterval() throws Exception {
Automaton a = Automata.makeDecimalInterval(1, 2, 0);
a = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.run(a, "01"));
}
public void testIntervalRandom() throws Exception {
int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
int min = TestUtil.nextInt(random(), 0, 100000);
int max = TestUtil.nextInt(random(), min, min+100000);
int digits;
if (random().nextBoolean()) {
digits = 0;
} else {
String s = Integer.toString(max);
digits = TestUtil.nextInt(random(), s.length(), 2*s.length());
}
StringBuilder b = new StringBuilder();
for(int i=0;i<digits;i++) {
b.append('0');
}
String prefix = b.toString();
Automaton a = Operations.determinize(Automata.makeDecimalInterval(min, max, digits),
DEFAULT_DETERMINIZE_WORK_LIMIT);
if (random().nextBoolean()) {
a = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
}
String mins = Integer.toString(min);
String maxs = Integer.toString(max);
if (digits > 0) {
mins = prefix.substring(mins.length()) + mins;
maxs = prefix.substring(maxs.length()) + maxs;
}
assertTrue(Operations.run(a, mins));
assertTrue(Operations.run(a, maxs));
for(int iter2=0;iter2<100;iter2++) {
int x = random().nextInt(2*max);
boolean expected = x >= min && x <= max;
String sx = Integer.toString(x);
if (sx.length() < digits) {
// Left-fill with 0s
sx = b.substring(sx.length()) + sx;
} else if (digits == 0) {
// Left-fill with random number of 0s:
int numZeros = random().nextInt(10);
StringBuilder sb = new StringBuilder();
for(int i=0;i<numZeros;i++) {
sb.append('0');
}
sb.append(sx);
sx = sb.toString();
}
assertEquals(expected, Operations.run(a, sx));
}
}
}
private void assertMatches(Automaton a, String... strings) {
Set<IntsRef> expected = new HashSet<>();
for(String s : strings) {
IntsRefBuilder ints = new IntsRefBuilder();
expected.add(Util.toUTF32(s, ints));
}
assertEquals(expected, TestOperations.getFiniteStrings(
Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT)));
}
public void testConcatenatePreservesDet() throws Exception {
Automaton a1 = Automata.makeString("foobar");
assertTrue(a1.isDeterministic());
Automaton a2 = Automata.makeString("baz");
assertTrue(a2.isDeterministic());
assertTrue((Operations.concatenate(Arrays.asList(a1, a2)).isDeterministic()));
}
public void testRemoveDeadStates() throws Exception {
Automaton a = Operations.concatenate(Arrays.asList(Automata.makeString("x"),
Automata.makeString("y")));
assertEquals(4, a.getNumStates());
a = Operations.removeDeadStates(a);
assertEquals(3, a.getNumStates());
}
public void testRemoveDeadStatesEmpty1() throws Exception {
Automaton a = new Automaton();
a.finishState();
assertTrue(Operations.isEmpty(a));
assertTrue(Operations.isEmpty(Operations.removeDeadStates(a)));
}
public void testRemoveDeadStatesEmpty2() throws Exception {
Automaton a = new Automaton();
a.finishState();
assertTrue(Operations.isEmpty(a));
assertTrue(Operations.isEmpty(Operations.removeDeadStates(a)));
}
public void testRemoveDeadStatesEmpty3() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
int fini = a.createState();
a.addTransition(init, fini, 'a');
Automaton a2 = Operations.removeDeadStates(a);
assertEquals(0, a2.getNumStates());
}
public void testConcatEmpty() throws Exception {
// If you concat empty automaton to anything the result should still be empty:
Automaton a = Operations.concatenate(Automata.makeEmpty(),
Automata.makeString("foo"));
assertEquals(new HashSet<IntsRef>(), TestOperations.getFiniteStrings(a));
a = Operations.concatenate(Automata.makeString("foo"),
Automata.makeEmpty());
assertEquals(new HashSet<IntsRef>(), TestOperations.getFiniteStrings(a));
}
public void testSeemsNonEmptyButIsNot1() throws Exception {
Automaton a = new Automaton();
// Init state has a transition but doesn't lead to accept
int init = a.createState();
int s = a.createState();
a.addTransition(init, s, 'a');
a.finishState();
assertTrue(Operations.isEmpty(a));
}
public void testSeemsNonEmptyButIsNot2() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
int s = a.createState();
a.addTransition(init, s, 'a');
// An orphan'd accept state
s = a.createState();
a.setAccept(s, true);
a.finishState();
assertTrue(Operations.isEmpty(a));
}
public void testSameLanguage1() throws Exception {
Automaton a = Automata.makeEmptyString();
Automaton a2 = Automata.makeEmptyString();
int state = a2.createState();
a2.addTransition(0, state, 'a');
a2.finishState();
assertTrue(Operations.sameLanguage(Operations.removeDeadStates(a),
Operations.removeDeadStates(a2)));
}
private Automaton randomNoOp(Automaton a) {
switch (random().nextInt(7)) {
case 0:
if (VERBOSE) {
System.out.println(" randomNoOp: determinize");
}
return Operations.determinize(a, Integer.MAX_VALUE);
case 1:
if (a.getNumStates() < 100) {
if (VERBOSE) {
System.out.println(" randomNoOp: minimize");
}
return MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
} else {
if (VERBOSE) {
System.out.println(" randomNoOp: skip op=minimize: too many states (" + a.getNumStates() + ")");
}
return a;
}
case 2:
if (VERBOSE) {
System.out.println(" randomNoOp: removeDeadStates");
}
return Operations.removeDeadStates(a);
case 3:
if (VERBOSE) {
System.out.println(" randomNoOp: reverse reverse");
}
a = Operations.reverse(a);
a = randomNoOp(a);
return Operations.reverse(a);
case 4:
if (VERBOSE) {
System.out.println(" randomNoOp: concat empty string");
}
return Operations.concatenate(a, Automata.makeEmptyString());
case 5:
if (VERBOSE) {
System.out.println(" randomNoOp: union empty automaton");
}
return Operations.union(a, Automata.makeEmpty());
case 6:
if (VERBOSE) {
System.out.println(" randomNoOp: do nothing!");
}
return a;
}
assert false;
return null;
}
private Automaton unionTerms(Collection<BytesRef> terms) {
Automaton a;
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: unionTerms: use union");
}
List<Automaton> as = new ArrayList<>();
for(BytesRef term : terms) {
as.add(Automata.makeString(term.utf8ToString()));
}
a = Operations.union(as);
} else {
if (VERBOSE) {
System.out.println("TEST: unionTerms: use makeStringUnion");
}
List<BytesRef> termsList = new ArrayList<>(terms);
Collections.sort(termsList);
a = Automata.makeStringUnion(termsList);
}
return randomNoOp(a);
}
private String getRandomString() {
//return TestUtil.randomSimpleString(random());
return TestUtil.randomRealisticUnicodeString(random());
}
public void testRandomFinite() throws Exception {
int numTerms = atLeast(10);
int iters = atLeast(100);
if (VERBOSE) {
System.out.println("TEST: numTerms=" + numTerms + " iters=" + iters);
}
Set<BytesRef> terms = new HashSet<>();
while (terms.size() < numTerms) {
terms.add(new BytesRef(getRandomString()));
}
Automaton a = unionTerms(terms);
assertSame(terms, a);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " numTerms=" + terms.size() + " a.numStates=" + a.getNumStates());
/*
System.out.println(" terms:");
for(BytesRef term : terms) {
System.out.println(" " + term);
}
*/
}
switch(random().nextInt(15)) {
case 0:
// concatenate prefix
{
if (VERBOSE) {
System.out.println(" op=concat prefix");
}
Set<BytesRef> newTerms = new HashSet<>();
BytesRef prefix = new BytesRef(getRandomString());
BytesRefBuilder newTerm = new BytesRefBuilder();
for(BytesRef term : terms) {
newTerm.copyBytes(prefix);
newTerm.append(term);
newTerms.add(newTerm.toBytesRef());
}
terms = newTerms;
boolean wasDeterministic1 = a.isDeterministic();
a = Operations.concatenate(Automata.makeString(prefix.utf8ToString()), a);
assertEquals(wasDeterministic1, a.isDeterministic());
}
break;
case 1:
// concatenate suffix
{
BytesRef suffix = new BytesRef(getRandomString());
if (VERBOSE) {
System.out.println(" op=concat suffix " + suffix);
}
Set<BytesRef> newTerms = new HashSet<>();
BytesRefBuilder newTerm = new BytesRefBuilder();
for(BytesRef term : terms) {
newTerm.copyBytes(term);
newTerm.append(suffix);
newTerms.add(newTerm.toBytesRef());
}
terms = newTerms;
a = Operations.concatenate(a, Automata.makeString(suffix.utf8ToString()));
}
break;
case 2:
// determinize
if (VERBOSE) {
System.out.println(" op=determinize");
}
a = Operations.determinize(a, Integer.MAX_VALUE);
assertTrue(a.isDeterministic());
break;
case 3:
if (a.getNumStates() < 100) {
if (VERBOSE) {
System.out.println(" op=minimize");
}
// minimize
a = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
} else if (VERBOSE) {
System.out.println(" skip op=minimize: too many states (" + a.getNumStates() + ")");
}
break;
case 4:
// union
{
if (VERBOSE) {
System.out.println(" op=union");
}
Set<BytesRef> newTerms = new HashSet<>();
int numNewTerms = random().nextInt(5);
while (newTerms.size() < numNewTerms) {
newTerms.add(new BytesRef(getRandomString()));
}
terms.addAll(newTerms);
Automaton newA = unionTerms(newTerms);
a = Operations.union(a, newA);
}
break;
case 5:
// optional
{
if (VERBOSE) {
System.out.println(" op=optional");
}
// NOTE: This can add a dead state:
a = Operations.optional(a);
terms.add(new BytesRef());
}
break;
case 6:
// minus finite
{
if (VERBOSE) {
System.out.println(" op=minus finite");
}
if (terms.size() > 0) {
RandomAcceptedStrings rasl = new RandomAcceptedStrings(Operations.removeDeadStates(a));
Set<BytesRef> toRemove = new HashSet<>();
int numToRemove = TestUtil.nextInt(random(), 1, (terms.size()+1)/2);
while (toRemove.size() < numToRemove) {
int[] ints = rasl.getRandomAcceptedString(random());
BytesRef term = new BytesRef(UnicodeUtil.newString(ints, 0, ints.length));
if (toRemove.contains(term) == false) {
toRemove.add(term);
}
}
for(BytesRef term : toRemove) {
boolean removed = terms.remove(term);
assertTrue(removed);
}
Automaton a2 = unionTerms(toRemove);
a = Operations.minus(a, a2, Integer.MAX_VALUE);
}
}
break;
case 7:
{
// minus infinite
List<Automaton> as = new ArrayList<>();
int count = TestUtil.nextInt(random(), 1, 5);
Set<Integer> prefixes = new HashSet<>();
while(prefixes.size() < count) {
// prefix is a leading ascii byte; we remove <prefix>* from a
int prefix = random().nextInt(128);
prefixes.add(prefix);
}
if (VERBOSE) {
System.out.println(" op=minus infinite prefixes=" + prefixes);
}
for(int prefix : prefixes) {
// prefix is a leading ascii byte; we remove <prefix>* from a
Automaton a2 = new Automaton();
int init = a2.createState();
int state = a2.createState();
a2.addTransition(init, state, prefix);
a2.setAccept(state, true);
a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
a2.finishState();
as.add(a2);
Iterator<BytesRef> it = terms.iterator();
while (it.hasNext()) {
BytesRef term = it.next();
if (term.length > 0 && (term.bytes[term.offset] & 0xFF) == prefix) {
it.remove();
}
}
}
Automaton a2 = randomNoOp(Operations.union(as));
a = Operations.minus(a, a2, DEFAULT_DETERMINIZE_WORK_LIMIT);
}
break;
case 8:
{
int count = TestUtil.nextInt(random(), 10, 20);
if (VERBOSE) {
System.out.println(" op=intersect infinite count=" + count);
}
// intersect infinite
List<Automaton> as = new ArrayList<>();
Set<Integer> prefixes = new HashSet<>();
while(prefixes.size() < count) {
int prefix = random().nextInt(128);
prefixes.add(prefix);
}
if (VERBOSE) {
System.out.println(" prefixes=" + prefixes);
}
for(int prefix : prefixes) {
// prefix is a leading ascii byte; we retain <prefix>* in a
Automaton a2 = new Automaton();
int init = a2.createState();
int state = a2.createState();
a2.addTransition(init, state, prefix);
a2.setAccept(state, true);
a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
a2.finishState();
as.add(a2);
prefixes.add(prefix);
}
Automaton a2 = Operations.union(as);
if (random().nextBoolean()) {
a2 = Operations.determinize(a2, DEFAULT_DETERMINIZE_WORK_LIMIT);
} else if (random().nextBoolean()) {
a2 = MinimizationOperations.minimize(a2, DEFAULT_DETERMINIZE_WORK_LIMIT);
}
a = Operations.intersection(a, a2);
Iterator<BytesRef> it = terms.iterator();
while (it.hasNext()) {
BytesRef term = it.next();
if (term.length == 0 || prefixes.contains(term.bytes[term.offset]&0xff) == false) {
if (VERBOSE) {
System.out.println(" drop term=" + term);
}
it.remove();
} else {
if (VERBOSE) {
System.out.println(" keep term=" + term);
}
}
}
}
break;
case 9:
// reverse
{
if (VERBOSE) {
System.out.println(" op=reverse");
}
a = Operations.reverse(a);
Set<BytesRef> newTerms = new HashSet<>();
for(BytesRef term : terms) {
newTerms.add(new BytesRef(new StringBuilder(term.utf8ToString()).reverse().toString()));
}
terms = newTerms;
}
break;
case 10:
if (VERBOSE) {
System.out.println(" op=randomNoOp");
}
a = randomNoOp(a);
break;
case 11:
// interval
{
int min = random().nextInt(1000);
int max = min + random().nextInt(50);
// digits must be non-zero else we make cycle
int digits = Integer.toString(max).length();
if (VERBOSE) {
System.out.println(" op=union interval min=" + min + " max=" + max + " digits=" + digits);
}
a = Operations.union(a, Automata.makeDecimalInterval(min, max, digits));
StringBuilder b = new StringBuilder();
for(int i=0;i<digits;i++) {
b.append('0');
}
String prefix = b.toString();
for(int i=min;i<=max;i++) {
String s = Integer.toString(i);
if (s.length() < digits) {
// Left-fill with 0s
s = prefix.substring(s.length()) + s;
}
terms.add(new BytesRef(s));
}
}
break;
case 12:
if (VERBOSE) {
System.out.println(" op=remove the empty string");
}
a = Operations.minus(a, Automata.makeEmptyString(), DEFAULT_DETERMINIZE_WORK_LIMIT);
terms.remove(new BytesRef());
break;
case 13:
if (VERBOSE) {
System.out.println(" op=add the empty string");
}
a = Operations.union(a, Automata.makeEmptyString());
terms.add(new BytesRef());
break;
case 14:
// Safety in case we are really unlucky w/ the dice:
if (terms.size() <= numTerms * 3) {
if (VERBOSE) {
System.out.println(" op=concat finite automaton");
}
int count = random().nextBoolean() ? 2 : 3;
Set<BytesRef> addTerms = new HashSet<>();
while (addTerms.size() < count) {
addTerms.add(new BytesRef(getRandomString()));
}
if (VERBOSE) {
for(BytesRef term : addTerms) {
System.out.println(" term=" + term);
}
}
Automaton a2 = unionTerms(addTerms);
Set<BytesRef> newTerms = new HashSet<>();
if (random().nextBoolean()) {
// suffix
if (VERBOSE) {
System.out.println(" do suffix");
}
a = Operations.concatenate(a, randomNoOp(a2));
BytesRefBuilder newTerm = new BytesRefBuilder();
for(BytesRef term : terms) {
for(BytesRef suffix : addTerms) {
newTerm.copyBytes(term);
newTerm.append(suffix);
newTerms.add(newTerm.toBytesRef());
}
}
} else {
// prefix
if (VERBOSE) {
System.out.println(" do prefix");
}
a = Operations.concatenate(randomNoOp(a2), a);
BytesRefBuilder newTerm = new BytesRefBuilder();
for(BytesRef term : terms) {
for(BytesRef prefix : addTerms) {
newTerm.copyBytes(prefix);
newTerm.append(term);
newTerms.add(newTerm.toBytesRef());
}
}
}
terms = newTerms;
}
break;
default:
throw new AssertionError();
}
assertSame(terms, a);
assertEquals(AutomatonTestUtil.isDeterministicSlow(a), a.isDeterministic());
if (random().nextInt(10) == 7) {
a = verifyTopoSort(a);
}
}
assertSame(terms, a);
}
/** Runs topo sort, verifies transitions then only "go forwards", and
* builds and returns new automaton with those remapped toposorted states. */
private Automaton verifyTopoSort(Automaton a) {
int[] sorted = Operations.topoSortStates(a);
// This can be < if we removed dead states:
assertTrue(sorted.length <= a.getNumStates());
Automaton a2 = new Automaton();
int[] stateMap = new int[a.getNumStates()];
Arrays.fill(stateMap, -1);
Transition transition = new Transition();
for(int state : sorted) {
int newState = a2.createState();
a2.setAccept(newState, a.isAccept(state));
// Each state should only appear once in the sort:
assertEquals(-1, stateMap[state]);
stateMap[state] = newState;
}
// 2nd pass: add new transitions
for(int state : sorted) {
int count = a.initTransition(state, transition);
for(int i=0;i<count;i++) {
a.getNextTransition(transition);
assert stateMap[transition.dest] > stateMap[state];
a2.addTransition(stateMap[state], stateMap[transition.dest], transition.min, transition.max);
}
}
a2.finishState();
return a2;
}
private void assertSame(Collection<BytesRef> terms, Automaton a) {
try {
assertTrue(Operations.isFinite(a));
assertFalse(Operations.isTotal(a));
Automaton detA = Operations.determinize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
// Make sure all terms are accepted:
IntsRefBuilder scratch = new IntsRefBuilder();
for(BytesRef term : terms) {
Util.toIntsRef(term, scratch);
assertTrue("failed to accept term=" + term.utf8ToString(), Operations.run(detA, term.utf8ToString()));
}
// Use getFiniteStrings:
Set<IntsRef> expected = new HashSet<>();
for(BytesRef term : terms) {
IntsRefBuilder intsRef = new IntsRefBuilder();
Util.toUTF32(term.utf8ToString(), intsRef);
expected.add(intsRef.toIntsRef());
}
Set<IntsRef> actual = TestOperations.getFiniteStrings(a);
if (expected.equals(actual) == false) {
System.out.println("FAILED:");
for(IntsRef term : expected) {
if (actual.contains(term) == false) {
System.out.println(" term=" + term + " should be accepted but isn't");
}
}
for(IntsRef term : actual) {
if (expected.contains(term) == false) {
System.out.println(" term=" + term + " is accepted but should not be");
}
}
throw new AssertionError("mismatch");
}
// Use sameLanguage:
Automaton a2 = Operations.removeDeadStates(Operations.determinize(unionTerms(terms),
Integer.MAX_VALUE));
assertTrue(Operations.sameLanguage(a2, Operations.removeDeadStates(Operations.determinize(a,
Integer.MAX_VALUE))));
// Do same check, in UTF8 space
Automaton utf8 = randomNoOp(new UTF32ToUTF8().convert(a));
Set<IntsRef> expected2 = new HashSet<>();
for(BytesRef term : terms) {
IntsRefBuilder intsRef = new IntsRefBuilder();
Util.toIntsRef(term, intsRef);
expected2.add(intsRef.toIntsRef());
}
assertEquals(expected2, TestOperations.getFiniteStrings(utf8));
} catch (AssertionError ae) {
System.out.println("TEST: FAILED: not same");
System.out.println(" terms (count=" + terms.size() + "):");
for(BytesRef term : terms) {
System.out.println(" " + term);
}
System.out.println(" automaton:");
System.out.println(a.toDot());
//a.writeDot("fail");
throw ae;
}
}
private boolean accepts(Automaton a, BytesRef b) {
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(b, intsBuilder);
return Operations.run(a, intsBuilder.toIntsRef());
}
private Automaton makeBinaryInterval(BytesRef minTerm, boolean minInclusive,
BytesRef maxTerm, boolean maxInclusive) {
if (VERBOSE) {
System.out.println("TEST: minTerm=" + minTerm + " minInclusive=" + minInclusive + " maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
}
Automaton a = Automata.makeBinaryInterval(minTerm, minInclusive,
maxTerm, maxInclusive);
Automaton minA = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
if (minA.getNumStates() != a.getNumStates()) {
assertTrue(minA.getNumStates() < a.getNumStates());
System.out.println("Original was not minimal:");
System.out.println("Original:\n" + a.toDot());
System.out.println("Minimized:\n" + minA.toDot());
System.out.println("minTerm=" + minTerm + " minInclusive=" + minInclusive);
System.out.println("maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
fail("automaton was not minimal");
}
if (VERBOSE) {
System.out.println(a.toDot());
}
return a;
}
public void testMakeBinaryIntervalFiniteCasesBasic() throws Exception {
// 0 (incl) - 00 (incl)
byte[] zeros = new byte[3];
Automaton a = makeBinaryInterval(new BytesRef(zeros, 0, 1), true, new BytesRef(zeros, 0, 2), true);
assertTrue(Operations.isFinite(a));
assertFalse(accepts(a, new BytesRef()));
assertTrue(accepts(a, new BytesRef(zeros, 0, 1)));
assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
// '' (incl) - 00 (incl)
a = makeBinaryInterval(new BytesRef(), true, new BytesRef(zeros, 0, 2), true);
assertTrue(Operations.isFinite(a));
assertTrue(accepts(a, new BytesRef()));
assertTrue(accepts(a, new BytesRef(zeros, 0, 1)));
assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
// '' (excl) - 00 (incl)
a = makeBinaryInterval(new BytesRef(), false, new BytesRef(zeros, 0, 2), true);
assertTrue(Operations.isFinite(a));
assertFalse(accepts(a, new BytesRef()));
assertTrue(accepts(a, new BytesRef(zeros, 0, 1)));
assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
// 0 (excl) - 00 (incl)
a = makeBinaryInterval(new BytesRef(zeros, 0, 1), false, new BytesRef(zeros, 0, 2), true);
assertTrue(Operations.isFinite(a));
assertFalse(accepts(a, new BytesRef()));
assertFalse(accepts(a, new BytesRef(zeros, 0, 1)));
assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
// 0 (excl) - 00 (excl)
a = makeBinaryInterval(new BytesRef(zeros, 0, 1), false, new BytesRef(zeros, 0, 2), false);
assertTrue(Operations.isFinite(a));
assertFalse(accepts(a, new BytesRef()));
assertFalse(accepts(a, new BytesRef(zeros, 0, 1)));
assertFalse(accepts(a, new BytesRef(zeros, 0, 2)));
assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
}
public void testMakeBinaryIntervalFiniteCasesRandom() throws Exception {
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
BytesRef prefix = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
BytesRefBuilder b = new BytesRefBuilder();
b.append(prefix);
int numZeros = random().nextInt(10);
for(int i=0;i<numZeros;i++) {
b.append((byte) 0);
}
BytesRef minTerm = b.get();
b = new BytesRefBuilder();
b.append(minTerm);
numZeros = random().nextInt(10);
for(int i=0;i<numZeros;i++) {
b.append((byte) 0);
}
BytesRef maxTerm = b.get();
boolean minInclusive = random().nextBoolean();
boolean maxInclusive = random().nextBoolean();
Automaton a = makeBinaryInterval(minTerm, minInclusive,
maxTerm, maxInclusive);
assertTrue(Operations.isFinite(a));
int expectedCount = maxTerm.length - minTerm.length + 1;
if (minInclusive == false) {
expectedCount--;
}
if (maxInclusive == false) {
expectedCount--;
}
if (expectedCount <= 0) {
assertTrue(Operations.isEmpty(a));
continue;
} else {
// Enumerate all finite strings and verify the count matches what we expect:
assertEquals(expectedCount, TestOperations.getFiniteStrings(a, expectedCount).size());
}
b = new BytesRefBuilder();
b.append(minTerm);
if (minInclusive == false) {
assertFalse(accepts(a, b.get()));
b.append((byte) 0);
}
while (b.length() < maxTerm.length) {
b.append((byte) 0);
boolean expected;
if (b.length() == maxTerm.length) {
expected = maxInclusive;
} else {
expected = true;
}
assertEquals(expected, accepts(a, b.get()));
}
}
}
public void testMakeBinaryIntervalRandom() throws Exception {
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
BytesRef minTerm = TestUtil.randomBinaryTerm(random());
boolean minInclusive = random().nextBoolean();
BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
boolean maxInclusive = random().nextBoolean();
Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
for(int iter2=0;iter2<500;iter2++) {
BytesRef term = TestUtil.randomBinaryTerm(random());
int minCmp = minTerm.compareTo(term);
int maxCmp = maxTerm.compareTo(term);
boolean expected;
if (minCmp > 0 || maxCmp < 0) {
expected = false;
} else if (minCmp == 0 && maxCmp == 0) {
expected = minInclusive && maxInclusive;
} else if (minCmp == 0) {
expected = minInclusive;
} else if (maxCmp == 0) {
expected = maxInclusive;
} else {
expected = true;
}
if (VERBOSE) {
System.out.println(" check term=" + term + " expected=" + expected);
}
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(term, intsBuilder);
assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
}
}
}
private static IntsRef intsRef(String s) {
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(new BytesRef(s), intsBuilder);
return intsBuilder.toIntsRef();
}
public void testMakeBinaryIntervalBasic() throws Exception {
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("foo"), true);
assertTrue(Operations.run(a, intsRef("bar")));
assertTrue(Operations.run(a, intsRef("foo")));
assertTrue(Operations.run(a, intsRef("beep")));
assertFalse(Operations.run(a, intsRef("baq")));
assertTrue(Operations.run(a, intsRef("bara")));
}
public void testMakeBinaryIntervalLowerBoundEmptyString() throws Exception {
Automaton a = Automata.makeBinaryInterval(new BytesRef(""), true, new BytesRef("bar"), true);
assertTrue(Operations.run(a, intsRef("")));
assertTrue(Operations.run(a, intsRef("a")));
assertTrue(Operations.run(a, intsRef("bar")));
assertFalse(Operations.run(a, intsRef("bara")));
assertFalse(Operations.run(a, intsRef("baz")));
a = Automata.makeBinaryInterval(new BytesRef(""), false, new BytesRef("bar"), true);
assertFalse(Operations.run(a, intsRef("")));
assertTrue(Operations.run(a, intsRef("a")));
assertTrue(Operations.run(a, intsRef("bar")));
assertFalse(Operations.run(a, intsRef("bara")));
assertFalse(Operations.run(a, intsRef("baz")));
}
public void testMakeBinaryIntervalEqual() throws Exception {
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("bar"), true);
assertTrue(Operations.run(a, intsRef("bar")));
assertTrue(Operations.isFinite(a));
assertEquals(1, TestOperations.getFiniteStrings(a).size());
}
public void testMakeBinaryIntervalCommonPrefix() throws Exception {
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("barfoo"), true);
assertFalse(Operations.run(a, intsRef("bam")));
assertTrue(Operations.run(a, intsRef("bar")));
assertTrue(Operations.run(a, intsRef("bara")));
assertTrue(Operations.run(a, intsRef("barf")));
assertTrue(Operations.run(a, intsRef("barfo")));
assertTrue(Operations.run(a, intsRef("barfoo")));
assertTrue(Operations.run(a, intsRef("barfonz")));
assertFalse(Operations.run(a, intsRef("barfop")));
assertFalse(Operations.run(a, intsRef("barfoop")));
}
public void testMakeBinaryExceptEmpty() throws Exception {
Automaton a = Automata.makeNonEmptyBinary();
assertFalse(Operations.run(a, intsRef("")));
assertTrue(Operations.run(a, intsRef(TestUtil.randomRealisticUnicodeString(random(), 1, 10))));
}
public void testMakeBinaryIntervalOpenMax() throws Exception {
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, null, true);
assertFalse(Operations.run(a, intsRef("bam")));
assertTrue(Operations.run(a, intsRef("bar")));
assertTrue(Operations.run(a, intsRef("bara")));
assertTrue(Operations.run(a, intsRef("barf")));
assertTrue(Operations.run(a, intsRef("barfo")));
assertTrue(Operations.run(a, intsRef("barfoo")));
assertTrue(Operations.run(a, intsRef("barfonz")));
assertTrue(Operations.run(a, intsRef("barfop")));
assertTrue(Operations.run(a, intsRef("barfoop")));
assertTrue(Operations.run(a, intsRef("zzz")));
}
public void testMakeBinaryIntervalOpenMaxZeroLengthMin() throws Exception {
// when including min, automaton should accept "a"
Automaton a = Automata.makeBinaryInterval(new BytesRef(""), true, null, true);
assertTrue(Operations.run(a, intsRef("")));
assertTrue(Operations.run(a, intsRef("a")));
assertTrue(Operations.run(a, intsRef("aaaaaa")));
// excluding min should still accept "a"
a = Automata.makeBinaryInterval(new BytesRef(""), false, null, true);
assertFalse(Operations.run(a, intsRef("")));
assertTrue(Operations.run(a, intsRef("a")));
assertTrue(Operations.run(a, intsRef("aaaaaa")));
}
public void testMakeBinaryIntervalOpenMin() throws Exception {
Automaton a = Automata.makeBinaryInterval(null, true, new BytesRef("foo"), true);
assertFalse(Operations.run(a, intsRef("foz")));
assertFalse(Operations.run(a, intsRef("zzz")));
assertTrue(Operations.run(a, intsRef("foo")));
assertTrue(Operations.run(a, intsRef("")));
assertTrue(Operations.run(a, intsRef("a")));
assertTrue(Operations.run(a, intsRef("aaa")));
assertTrue(Operations.run(a, intsRef("bz")));
}
public void testMakeBinaryIntervalOpenBoth() throws Exception {
Automaton a = Automata.makeBinaryInterval(null, true, null, true);
assertTrue(Operations.run(a, intsRef("foz")));
assertTrue(Operations.run(a, intsRef("zzz")));
assertTrue(Operations.run(a, intsRef("foo")));
assertTrue(Operations.run(a, intsRef("")));
assertTrue(Operations.run(a, intsRef("a")));
assertTrue(Operations.run(a, intsRef("aaa")));
assertTrue(Operations.run(a, intsRef("bz")));
}
public void testAcceptAllEmptyStringMin() throws Exception {
Automaton a = Automata.makeBinaryInterval(new BytesRef(), true, null, true);
assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a));
}
private static IntsRef toIntsRef(String s) {
IntsRefBuilder b = new IntsRefBuilder();
for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
cp = s.codePointAt(i);
b.append(cp);
}
return b.get();
}
public void testGetSingleton() {
int iters = atLeast(10000);
for(int iter=0;iter<iters;iter++) {
String s = TestUtil.randomRealisticUnicodeString(random());
Automaton a = Automata.makeString(s);
assertEquals(toIntsRef(s), Operations.getSingleton(a));
}
}
public void testGetSingletonEmptyString() {
Automaton a = new Automaton();
int s = a.createState();
a.setAccept(s, true);
a.finishState();
assertEquals(new IntsRef(), Operations.getSingleton(a));
}
public void testGetSingletonNothing() {
Automaton a = new Automaton();
a.createState();
a.finishState();
assertNull(Operations.getSingleton(a));
}
public void testGetSingletonTwo() {
Automaton a = new Automaton();
int s = a.createState();
int x = a.createState();
a.setAccept(x, true);
a.addTransition(s, x, 55);
int y = a.createState();
a.setAccept(y, true);
a.addTransition(s, y, 58);
a.finishState();
assertNull(Operations.getSingleton(a));
}
// LUCENE-9981
public void testDeterminizeTooMuchEffort() {
// make sure determinize properly aborts, relatively quickly, for this regexp:
expectThrows(
TooComplexToDeterminizeException.class,
() -> {
Automaton a = new RegExp("(.*a){2000}").toAutomaton();
Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
});
// ... and for its reversed form too:
expectThrows(
TooComplexToDeterminizeException.class,
() -> {
Automaton a = new RegExp("(.*a){2000}").toAutomaton();
a = Operations.reverse(a);
Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
});
}
}