LUCENE-5752: cutover to FixedBitSet to mark the accept states; improve javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5752@1603489 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
index cb9c996..3603aba 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
@@ -24,6 +24,7 @@
 import java.util.Set;
 
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.Sorter;
 
@@ -71,7 +72,7 @@
   /** Holds toState, min, max for each transition. */
   private int[] transitions = new int[6];
 
-  private final Set<Integer> acceptStates = new HashSet<Integer>();
+  private FixedBitSet isAccept = new FixedBitSet(4);
 
   /** True if no state has two transitions leaving with the same label. */
   private boolean deterministic = true;
@@ -86,18 +87,23 @@
     int state = nextState/2;
     states[nextState] = -1;
     nextState += 2;
+    if (state >= isAccept.length()) {
+      FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(state+1, 1));
+      newBits.or(isAccept);
+      isAccept = newBits;
+    }
     return state;
   }
 
   /** Set or clear this state as an accept state. */
-  public void setAccept(int state, boolean isAccept) {
+  public void setAccept(int state, boolean accept) {
     if (state >= getNumStates()) {
       throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
     }
-    if (isAccept) {
-      acceptStates.add(state);
+    if (accept) {
+      isAccept.set(state);
     } else {
-      acceptStates.remove(state);
+      isAccept.clear(state);
     }
   }
 
@@ -119,14 +125,14 @@
     return transitions;
   }
 
-  /** Returns accept states. */
-  public Set<Integer> getAcceptStates() {
-    return acceptStates;
+  /** Returns accept states.  If the bit is set then that state is an accept state. */
+  FixedBitSet getAcceptStates() {
+    return isAccept;
   }
 
   /** Returns true if this state is an accept state. */
   public boolean isAccept(int state) {
-    return acceptStates.contains(state);
+    return isAccept.get(state);
   }
 
   /** Add a new transition with min = max = label. */
@@ -195,12 +201,19 @@
       if (states[nextState+i] != -1) {
         states[nextState+i] += nextTransition;
       }
-      int state = i/2;
     }
     nextState += other.nextState;
-
-    for(int s : other.getAcceptStates()) {
-      setAccept(stateOffset+s, true);
+    if (isAccept.length() < nextState/2) {
+      FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(nextState/2, 1));
+      newBits.or(isAccept);
+      isAccept = newBits;
+    }
+    int otherNumStates = other.getNumStates();
+    FixedBitSet otherAcceptStates = other.getAcceptStates();
+    int state = 0;
+    while (state < otherNumStates && (state = otherAcceptStates.nextSetBit(state)) != -1) {
+      setAccept(stateOffset + state, true);
+      state++;
     }
 
     // Bulk copy and then fixup dest for each transition:
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
index ffb1aa8..6d2ecac 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@@ -42,6 +42,7 @@
 
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.RamUsageEstimator;
 
@@ -239,7 +240,7 @@
       b = concatenate(as);
     }
 
-    Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates());
+    Set<Integer> prevAcceptStates = toSet(b, 0);
 
     for(int i=min;i<max;i++) {
       int numStates = b.getNumStates();
@@ -247,16 +248,26 @@
       for(int s : prevAcceptStates) {
         b.addEpsilon(s, numStates);
       }
-      prevAcceptStates.clear();
-      for(int s : a.getAcceptStates()) {
-        prevAcceptStates.add(numStates+s);
-      }
+      prevAcceptStates = toSet(a, numStates);
     }
 
     b.finishState();
 
     return b;
   }
+
+  private static Set<Integer> toSet(Automaton a, int offset) {
+    int numStates = a.getNumStates();
+    FixedBitSet isAccept = a.getAcceptStates();
+    Set<Integer> result = new HashSet<Integer>();
+    int upto = 0;
+    while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) {
+      result.add(offset+upto);
+      upto++;
+    }
+
+    return result;
+  }
   
   /**
    * Returns a (deterministic) automaton that accepts the complement of the
@@ -913,13 +924,16 @@
 
     LinkedList<Integer> workList = new LinkedList<>();
     BitSet live = new BitSet(numStates);
-    for (int s : a.getAcceptStates()) {
+    FixedBitSet acceptBits = a.getAcceptStates();
+    int s = 0;
+    while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) {
       live.set(s);
       workList.add(s);
+      s++;
     }
 
     while (workList.isEmpty() == false) {
-      int s = workList.removeFirst();
+      s = workList.removeFirst();
       int count = a2.initTransition(s, t);
       for(int i=0;i<count;i++) {
         a2.getNextTransition(t);
@@ -971,6 +985,7 @@
     assert hasDeadStates(result) == false;
     return result;
   }
+
   /**
    * Finds the largest entry whose value is less than or equal to c, or 0 if
    * there is no such entry.
@@ -988,7 +1003,8 @@
   }
   
   /**
-   * Returns true if the language of this automaton is finite.
+   * Returns true if the language of this automaton is finite.  The
+   * automaton must not have any dead states.
    */
   public static boolean isFinite(Automaton a) {
     if (a.getNumStates() == 0) {
@@ -998,7 +1014,7 @@
   }
   
   /**
-   * Checks whether there is a loop containing s. (This is sufficient since
+   * Checks whether there is a loop containing state. (This is sufficient since
    * there are never transitions to dead states.)
    */
   // TODO: not great that this is recursive... in theory a
@@ -1144,12 +1160,14 @@
 
     Automaton result = builder.finish();
 
-    for(int s : a.getAcceptStates()) {
-      assert s < numStates;
+    int s = 0;
+    FixedBitSet acceptStates = a.getAcceptStates();
+    while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
       result.addEpsilon(0, s+1);
       if (initialStates != null) {
         initialStates.add(s+1);
       }
+      s++;
     }
 
     result.finishState();