Make Automata#optional create simpler automata. (#13793)
In the common case when the input automaton has no transition to state 0, the
optional automaton can be created by marking state 0 as accepted.
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
index 3cb02c7..7c2b164 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@@ -159,6 +159,37 @@
* <p>Complexity: linear in number of states.
*/
public static Automaton optional(Automaton a) {
+ if (a.isAccept(0)) {
+ // If the initial state is accepted, then the empty string is already accepted.
+ return a;
+ }
+
+ boolean hasTransitionsToInitialState = false;
+ Transition t = new Transition();
+ outer:
+ for (int state = 0; state < a.getNumStates(); ++state) {
+ int count = a.initTransition(state, t);
+ for (int i = 0; i < count; ++i) {
+ a.getNextTransition(t);
+ if (t.dest == 0) {
+ hasTransitionsToInitialState = true;
+ break outer;
+ }
+ }
+ }
+
+ if (hasTransitionsToInitialState == false) {
+ // If the automaton has no transition to the initial state, we can simply mark the initial
+ // state as accepted.
+ Automaton result = new Automaton();
+ result.copy(a);
+ if (result.getNumStates() == 0) {
+ result.createState();
+ }
+ result.setAccept(0, true);
+ return result;
+ }
+
Automaton result = new Automaton();
result.createState();
result.setAccept(0, true);
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
index c158dd9..3de2630 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
@@ -412,4 +412,70 @@
return builder.finish();
}
+
+ public void testOptional() {
+ Automaton a = Automata.makeChar('a');
+
+ Automaton optionalA = new Automaton();
+ optionalA.createState();
+ optionalA.setAccept(0, true);
+ optionalA.finishState();
+ optionalA.createState();
+ optionalA.setAccept(1, true);
+ optionalA.addTransition(0, 1, 'a');
+ optionalA.finishState();
+
+ assertTrue(AutomatonTestUtil.sameLanguage(Operations.optional(a), optionalA));
+ assertSame(optionalA, Operations.optional(optionalA));
+
+ // Now test an automaton that has a transition to state 0. a(ba)*
+ a = new Automaton();
+ a.createState();
+ a.createState();
+ a.setAccept(1, true);
+ a.addTransition(0, 1, 'a');
+ a.finishState();
+ a.addTransition(1, 0, 'b');
+ a.finishState();
+
+ optionalA = new Automaton();
+ optionalA.createState();
+ optionalA.setAccept(0, true);
+ optionalA.createState();
+ optionalA.createState();
+ optionalA.setAccept(2, true);
+ optionalA.addTransition(0, 2, 'a');
+ optionalA.finishState();
+ optionalA.addTransition(1, 2, 'a');
+ optionalA.finishState();
+ optionalA.addTransition(2, 1, 'b');
+ optionalA.finishState();
+
+ assertTrue(AutomatonTestUtil.sameLanguage(Operations.optional(a), optionalA));
+ assertSame(optionalA, Operations.optional(optionalA));
+ }
+
+ public void testDuelOptional() {
+ final int iters = atLeast(1_000);
+ for (int iter = 0; iter < iters; ++iter) {
+ Automaton a = AutomatonTestUtil.randomAutomaton(random());
+ Automaton repeat1 = Operations.determinize(Operations.optional(a), Integer.MAX_VALUE);
+ Automaton repeat2 = Operations.determinize(naiveOptional(a), Integer.MAX_VALUE);
+ assertTrue(AutomatonTestUtil.sameLanguage(repeat1, repeat2));
+ }
+ }
+
+ // This is the original implementation of Operations#optional, before we improved it to generate
+ // simpler automata in some common cases.
+ private static Automaton naiveOptional(Automaton a) {
+ Automaton result = new Automaton();
+ result.createState();
+ result.setAccept(0, true);
+ if (a.getNumStates() > 0) {
+ result.copy(a);
+ result.addEpsilon(0, 1);
+ }
+ result.finishState();
+ return result;
+ }
}