lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.util.automaton;


 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

 import org.apache.lucene.index.SingleTermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.UnicodeUtil;

 /**
  * Immutable class holding compiled details for a given
  * Automaton.  The Automaton is deterministic, must not have
  * dead states but is not necessarily minimal.
  *
  * @lucene.experimental
  */
 public class CompiledAutomaton implements Accountable {
   private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(CompiledAutomaton.class);

   /**
    * Automata are compiled into different internal forms for the
    * most efficient execution depending upon the language they accept.
    */
   public enum AUTOMATON_TYPE {
     /** Automaton that accepts no strings. */
     NONE,
     /** Automaton that accepts all possible strings. */
     ALL,
     /** Automaton that accepts only a single fixed string. */
     SINGLE,
     /** Catch-all for any other automata. */
     NORMAL
   };

   /** If simplify is true this will be the "simplified" type; else, this is NORMAL */
   public final AUTOMATON_TYPE type;

   /**
    * For {@link AUTOMATON_TYPE#SINGLE} this is the singleton term.
    */
   public final BytesRef term;

   /**
    * Matcher for quickly determining if a byte[] is accepted.
    * only valid for {@link AUTOMATON_TYPE#NORMAL}.
    */
   public final ByteRunAutomaton runAutomaton;

   /**
    * Two dimensional array of transitions, indexed by state
    * number for traversal. The state numbering is consistent with
    * {@link #runAutomaton}.
    * Only valid for {@link AUTOMATON_TYPE#NORMAL}.
    */
   public final Automaton automaton;

   /**
    * Shared common suffix accepted by the automaton. Only valid
    * for {@link AUTOMATON_TYPE#NORMAL}, and only when the
    * automaton accepts an infinite language.  This will be null
    * if the common prefix is length 0.
    */
   public final BytesRef commonSuffixRef;

   /**
    * Indicates if the automaton accepts a finite set of strings.
    * Null if this was not computed.
    * Only valid for {@link AUTOMATON_TYPE#NORMAL}.
    */
   public final Boolean finite;

   /** Which state, if any, accepts all suffixes, else -1. */
   public final int sinkState;

   /** Create this, passing simplify=true and finite=null, so that we try
    *  to simplify the automaton and determine if it is finite. */
   public CompiledAutomaton(Automaton automaton) {
     this(automaton, null, true);
   }

   /** Returns sink state, if present, else -1. */
   private static int findSinkState(Automaton automaton) {
     int numStates = automaton.getNumStates();
     Transition t = new Transition();
     int foundState = -1;
     for (int s=0;s<numStates;s++) {
       if (automaton.isAccept(s)) {
         int count = automaton.initTransition(s, t);
         boolean isSinkState = false;
         for(int i=0;i<count;i++) {
           automaton.getNextTransition(t);
           if (t.dest == s && t.min == 0 && t.max == 0xff) {
             isSinkState = true;
             break;
           }
         }
         if (isSinkState) {
           foundState = s;
           break;
         }
       }
     }

     return foundState;
   }

   /** Create this.  If finite is null, we use {@link Operations#isFinite}
    *  to determine whether it is finite.  If simplify is true, we run
    *  possibly expensive operations to determine if the automaton is one
    *  the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. */
   public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
     this(automaton, finite, simplify, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, false);
   }


   /** Create this.  If finite is null, we use {@link Operations#isFinite}
    *  to determine whether it is finite.  If simplify is true, we run
    *  possibly expensive operations to determine if the automaton is one
    *  the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. If simplify
    *  requires determinizing the automaton then at most determinizeWorkLimit
    *  effort will be spent.  Any more than that will cause a
    *  TooComplexToDeterminizeException.
    */
   public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify,
                            int determinizeWorkLimit, boolean isBinary) {
     if (automaton.getNumStates() == 0) {
       automaton = new Automaton();
       automaton.createState();
     }

     if (simplify) {

       // Test whether the automaton is a "simple" form and
       // if so, don't create a runAutomaton.  Note that on a
       // large automaton these tests could be costly:

       if (Operations.isEmpty(automaton)) {
         // matches nothing
         type = AUTOMATON_TYPE.NONE;
         term = null;
         commonSuffixRef = null;
         runAutomaton = null;
         this.automaton = null;
         this.finite = null;
         sinkState = -1;
         return;
       }

       boolean isTotal;

       // NOTE: only approximate, because automaton may not be minimal:
       if (isBinary) {
         isTotal = Operations.isTotal(automaton, 0, 0xff);
       } else {
         isTotal = Operations.isTotal(automaton);
       }

       if (isTotal) {
         // matches all possible strings
         type = AUTOMATON_TYPE.ALL;
         term = null;
         commonSuffixRef = null;
         runAutomaton = null;
         this.automaton = null;
         this.finite = null;
         sinkState = -1;
         return;
       }

       automaton = Operations.determinize(automaton, determinizeWorkLimit);

       IntsRef singleton = Operations.getSingleton(automaton);

       if (singleton != null) {
         // matches a fixed string
         type = AUTOMATON_TYPE.SINGLE;
         commonSuffixRef = null;
         runAutomaton = null;
         this.automaton = null;
         this.finite = null;

         if (isBinary) {
           term = StringHelper.intsRefToBytesRef(singleton);
         } else {
           term = new BytesRef(UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length));
         }
         sinkState = -1;
         return;
       }
     }

     type = AUTOMATON_TYPE.NORMAL;
     term = null;

     if (finite == null) {
       this.finite = Operations.isFinite(automaton);
     } else {
       this.finite = finite;
     }

     Automaton binary;
     if (isBinary) {
       // Caller already built binary automaton themselves, e.g. PrefixQuery
       // does this since it can be provided with a binary (not necessarily
       // UTF8!) term:
       binary = automaton;
     } else {
       // Incoming automaton is unicode, and we must convert to UTF8 to match what's in the index:
       binary = new UTF32ToUTF8().convert(automaton);
     }

     // compute a common suffix for infinite DFAs, this is an optimization for "leading wildcard"
     // so don't burn cycles on it if the DFA is finite, or largeish
     if (this.finite || automaton.getNumStates() + automaton.getNumTransitions() > 1000) {
       commonSuffixRef = null;
     } else {
       BytesRef suffix = Operations.getCommonSuffixBytesRef(binary);
       if (suffix.length == 0) {
         commonSuffixRef = null;
       } else {
         commonSuffixRef = suffix;
       }
     }

     // This will determinize the binary automaton for us:
     runAutomaton = new ByteRunAutomaton(binary, true, determinizeWorkLimit);

     this.automaton = runAutomaton.automaton;

     // TODO: this is a bit fragile because if the automaton is not minimized there could be more than 1 sink state but auto-prefix will fail
     // to run for those:
     sinkState = findSinkState(this.automaton);
   }

   private Transition transition = new Transition();

   //private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;

   private BytesRef addTail(int state, BytesRefBuilder term, int idx, int leadLabel) {
     //System.out.println("addTail state=" + state + " term=" + term.utf8ToString() + " idx=" + idx + " leadLabel=" + (char) leadLabel);
     //System.out.println(automaton.toDot());
     // Find biggest transition that's < label
     // TODO: use binary search here
     int maxIndex = -1;
     int numTransitions = automaton.initTransition(state, transition);
     for(int i=0;i<numTransitions;i++) {
       automaton.getNextTransition(transition);
       if (transition.min < leadLabel) {
         maxIndex = i;
       } else {
         // Transitions are always sorted
         break;
       }
     }

     //System.out.println("  maxIndex=" + maxIndex);

     assert maxIndex != -1;
     automaton.getTransition(state, maxIndex, transition);

     // Append floorLabel
     final int floorLabel;
     if (transition.max > leadLabel-1) {
       floorLabel = leadLabel-1;
     } else {
       floorLabel = transition.max;
     }
     //System.out.println("  floorLabel=" + (char) floorLabel);
     term.grow(1+idx);
     //if (DEBUG) System.out.println("  add floorLabel=" + (char) floorLabel + " idx=" + idx);
     term.setByteAt(idx, (byte) floorLabel);

     state = transition.dest;
     //System.out.println("  dest: " + state);
     idx++;

     // Push down to last accept state
     while (true) {
       numTransitions = automaton.getNumTransitions(state);
       if (numTransitions == 0) {
         //System.out.println("state=" + state + " 0 trans");
         assert runAutomaton.isAccept(state);
         term.setLength(idx);
         //if (DEBUG) System.out.println("  return " + term.utf8ToString());
         return term.get();
       } else {
         // We are pushing "top" -- so get last label of
         // last transition:
         //System.out.println("get state=" + state + " numTrans=" + numTransitions);
         automaton.getTransition(state, numTransitions-1, transition);
         term.grow(1+idx);
         //if (DEBUG) System.out.println("  push maxLabel=" + (char) lastTransition.max + " idx=" + idx);
         //System.out.println("  add trans dest=" + scratch.dest + " label=" + (char) scratch.max);
         term.setByteAt(idx, (byte) transition.max);
         state = transition.dest;
         idx++;
       }
     }
   }

   // TODO: should this take startTerm too?  This way
   // Terms.intersect could forward to this method if type !=
   // NORMAL:
   /** Return a {@link TermsEnum} intersecting the provided {@link Terms}
    *  with the terms accepted by this automaton. */
   public TermsEnum getTermsEnum(Terms terms) throws IOException {
     switch(type) {
     case NONE:
       return TermsEnum.EMPTY;
     case ALL:
       return terms.iterator();
     case SINGLE:
       return new SingleTermsEnum(terms.iterator(), term);
     case NORMAL:
       return terms.intersect(this, null);
     default:
       // unreachable
       throw new RuntimeException("unhandled case");
     }
   }

   /**
    * Report back to a QueryVisitor how this automaton matches terms
    */
   public void visit(QueryVisitor visitor, Query parent, String field) {
     if (visitor.acceptField(field)) {
       switch (type) {
         case NORMAL:
           visitor.consumeTermsMatching(parent, field, () -> runAutomaton);
           break;
         case NONE:
           break;
         case ALL:
           visitor.consumeTermsMatching(parent, field, () -> new ByteRunAutomaton(Automata.makeAnyString()));
           break;
         case SINGLE:
           visitor.consumeTerms(parent, new Term(field, term));
           break;
       }
     }
   }

   /** Finds largest term accepted by this Automaton, that's
    *  &lt;= the provided input term.  The result is placed in
    *  output; it's fine for output and input to point to
    *  the same bytes.  The returned result is either the
    *  provided output, or null if there is no floor term
    *  (ie, the provided input term is before the first term
    *  accepted by this Automaton). */
   public BytesRef floor(BytesRef input, BytesRefBuilder output) {

     //if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString());

     int state = 0;

     // Special case empty string:
     if (input.length == 0) {
       if (runAutomaton.isAccept(state)) {
         output.clear();
         return output.get();
       } else {
         return null;
       }
     }

     final List<Integer> stack = new ArrayList<>();

     int idx = 0;
     while (true) {
       int label = input.bytes[input.offset + idx] & 0xff;
       int nextState = runAutomaton.step(state, label);
       //if (DEBUG) System.out.println("  cycle label=" + (char) label + " nextState=" + nextState);

       if (idx == input.length-1) {
         if (nextState != -1 && runAutomaton.isAccept(nextState)) {
           // Input string is accepted
           output.grow(1+idx);
           output.setByteAt(idx, (byte) label);
           output.setLength(input.length);
           //if (DEBUG) System.out.println("  input is accepted; return term=" + output.utf8ToString());
           return output.get();
         } else {
           nextState = -1;
         }
       }

       if (nextState == -1) {

         // Pop back to a state that has a transition
         // <= our label:
         while (true) {
           int numTransitions = automaton.getNumTransitions(state);
           if (numTransitions == 0) {
             assert runAutomaton.isAccept(state);
             output.setLength(idx);
             //if (DEBUG) System.out.println("  return " + output.utf8ToString());
             return output.get();
           } else {
             automaton.getTransition(state, 0, transition);

             if (label-1 < transition.min) {

               if (runAutomaton.isAccept(state)) {
                 output.setLength(idx);
                 //if (DEBUG) System.out.println("  return " + output.utf8ToString());
                 return output.get();
               }
               // pop
               if (stack.size() == 0) {
                 //if (DEBUG) System.out.println("  pop ord=" + idx + " return null");
                 return null;
               } else {
                 state = stack.remove(stack.size()-1);
                 idx--;
                 //if (DEBUG) System.out.println("  pop ord=" + (idx+1) + " label=" + (char) label + " first trans.min=" + (char) transitions[0].min);
                 label = input.bytes[input.offset + idx] & 0xff;
               }
             } else {
               //if (DEBUG) System.out.println("  stop pop ord=" + idx + " first trans.min=" + (char) transitions[0].min);
               break;
             }
           }
         }

         //if (DEBUG) System.out.println("  label=" + (char) label + " idx=" + idx);

         return addTail(state, output, idx, label);

       } else {
         output.grow(1+idx);
         output.setByteAt(idx, (byte) label);
         stack.add(state);
         state = nextState;
         idx++;
       }
     }
   }

   @Override
   public int hashCode() {
     final int prime = 31;
     int result = 1;
     result = prime * result + ((runAutomaton == null) ? 0 : runAutomaton.hashCode());
     result = prime * result + ((term == null) ? 0 : term.hashCode());
     result = prime * result + ((type == null) ? 0 : type.hashCode());
     return result;
   }

   @Override
   public boolean equals(Object obj) {
     if (this == obj) return true;
     if (obj == null) return false;
     if (getClass() != obj.getClass()) return false;
     CompiledAutomaton other = (CompiledAutomaton) obj;
     if (type != other.type) return false;
     if (type == AUTOMATON_TYPE.SINGLE) {
       if (!term.equals(other.term)) return false;
     } else if (type == AUTOMATON_TYPE.NORMAL) {
       if (!runAutomaton.equals(other.runAutomaton)) return false;
     }

     return true;
   }

   @Override
   public long ramBytesUsed() {
     return BASE_RAM_BYTES +
         RamUsageEstimator.sizeOfObject(automaton) +
         RamUsageEstimator.sizeOfObject(commonSuffixRef) +
         RamUsageEstimator.sizeOfObject(runAutomaton) +
         RamUsageEstimator.sizeOfObject(term) +
         RamUsageEstimator.sizeOfObject(transition);
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.util.automaton;


	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.lucene.index.SingleTermsEnum;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.Terms;
	import org.apache.lucene.index.TermsEnum;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.QueryVisitor;
	import org.apache.lucene.util.Accountable;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.BytesRefBuilder;
	import org.apache.lucene.util.IntsRef;
	import org.apache.lucene.util.RamUsageEstimator;
	import org.apache.lucene.util.StringHelper;
	import org.apache.lucene.util.UnicodeUtil;

	/**
	* Immutable class holding compiled details for a given
	* Automaton. The Automaton is deterministic, must not have
	* dead states but is not necessarily minimal.
	*
	* @lucene.experimental
	*/
	public class CompiledAutomaton implements Accountable {
	private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(CompiledAutomaton.class);

	/**
	* Automata are compiled into different internal forms for the
	* most efficient execution depending upon the language they accept.
	*/
	public enum AUTOMATON_TYPE {
	/** Automaton that accepts no strings. */
	NONE,
	/** Automaton that accepts all possible strings. */
	ALL,
	/** Automaton that accepts only a single fixed string. */
	SINGLE,
	/** Catch-all for any other automata. */
	NORMAL
	};

	/** If simplify is true this will be the "simplified" type; else, this is NORMAL */
	public final AUTOMATON_TYPE type;

	/**
	* For {@link AUTOMATON_TYPE#SINGLE} this is the singleton term.
	*/
	public final BytesRef term;

	/**
	* Matcher for quickly determining if a byte[] is accepted.
	* only valid for {@link AUTOMATON_TYPE#NORMAL}.
	*/
	public final ByteRunAutomaton runAutomaton;

	/**
	* Two dimensional array of transitions, indexed by state
	* number for traversal. The state numbering is consistent with
	* {@link #runAutomaton}.
	* Only valid for {@link AUTOMATON_TYPE#NORMAL}.
	*/
	public final Automaton automaton;

	/**
	* Shared common suffix accepted by the automaton. Only valid
	* for {@link AUTOMATON_TYPE#NORMAL}, and only when the
	* automaton accepts an infinite language. This will be null
	* if the common prefix is length 0.
	*/
	public final BytesRef commonSuffixRef;

	/**
	* Indicates if the automaton accepts a finite set of strings.
	* Null if this was not computed.
	* Only valid for {@link AUTOMATON_TYPE#NORMAL}.
	*/
	public final Boolean finite;

	/** Which state, if any, accepts all suffixes, else -1. */
	public final int sinkState;

	/** Create this, passing simplify=true and finite=null, so that we try
	* to simplify the automaton and determine if it is finite. */
	public CompiledAutomaton(Automaton automaton) {
	this(automaton, null, true);
	}

	/** Returns sink state, if present, else -1. */
	private static int findSinkState(Automaton automaton) {
	int numStates = automaton.getNumStates();
	Transition t = new Transition();
	int foundState = -1;
	for (int s=0;s<numStates;s++) {
	if (automaton.isAccept(s)) {
	int count = automaton.initTransition(s, t);
	boolean isSinkState = false;
	for(int i=0;i<count;i++) {
	automaton.getNextTransition(t);
	if (t.dest == s && t.min == 0 && t.max == 0xff) {
	isSinkState = true;
	break;
	}
	}
	if (isSinkState) {
	foundState = s;
	break;
	}
	}
	}

	return foundState;
	}

	/** Create this. If finite is null, we use {@link Operations#isFinite}
	* to determine whether it is finite. If simplify is true, we run
	* possibly expensive operations to determine if the automaton is one
	* the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. */
	public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
	this(automaton, finite, simplify, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, false);
	}


	/** Create this. If finite is null, we use {@link Operations#isFinite}
	* to determine whether it is finite. If simplify is true, we run
	* possibly expensive operations to determine if the automaton is one
	* the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. If simplify
	* requires determinizing the automaton then at most determinizeWorkLimit
	* effort will be spent. Any more than that will cause a
	* TooComplexToDeterminizeException.
	*/
	public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify,
	int determinizeWorkLimit, boolean isBinary) {
	if (automaton.getNumStates() == 0) {
	automaton = new Automaton();
	automaton.createState();
	}

	if (simplify) {

	// Test whether the automaton is a "simple" form and
	// if so, don't create a runAutomaton. Note that on a
	// large automaton these tests could be costly:

	if (Operations.isEmpty(automaton)) {
	// matches nothing
	type = AUTOMATON_TYPE.NONE;
	term = null;
	commonSuffixRef = null;
	runAutomaton = null;
	this.automaton = null;
	this.finite = null;
	sinkState = -1;
	return;
	}

	boolean isTotal;

	// NOTE: only approximate, because automaton may not be minimal:
	if (isBinary) {
	isTotal = Operations.isTotal(automaton, 0, 0xff);
	} else {
	isTotal = Operations.isTotal(automaton);
	}

	if (isTotal) {
	// matches all possible strings
	type = AUTOMATON_TYPE.ALL;
	term = null;
	commonSuffixRef = null;
	runAutomaton = null;
	this.automaton = null;
	this.finite = null;
	sinkState = -1;
	return;
	}

	automaton = Operations.determinize(automaton, determinizeWorkLimit);

	IntsRef singleton = Operations.getSingleton(automaton);

	if (singleton != null) {
	// matches a fixed string
	type = AUTOMATON_TYPE.SINGLE;
	commonSuffixRef = null;
	runAutomaton = null;
	this.automaton = null;
	this.finite = null;

	if (isBinary) {
	term = StringHelper.intsRefToBytesRef(singleton);
	} else {
	term = new BytesRef(UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length));
	}
	sinkState = -1;
	return;
	}
	}

	type = AUTOMATON_TYPE.NORMAL;
	term = null;

	if (finite == null) {
	this.finite = Operations.isFinite(automaton);
	} else {
	this.finite = finite;
	}

	Automaton binary;
	if (isBinary) {
	// Caller already built binary automaton themselves, e.g. PrefixQuery
	// does this since it can be provided with a binary (not necessarily
	// UTF8!) term:
	binary = automaton;
	} else {
	// Incoming automaton is unicode, and we must convert to UTF8 to match what's in the index:
	binary = new UTF32ToUTF8().convert(automaton);
	}

	// compute a common suffix for infinite DFAs, this is an optimization for "leading wildcard"
	// so don't burn cycles on it if the DFA is finite, or largeish
	if (this.finite \|\| automaton.getNumStates() + automaton.getNumTransitions() > 1000) {
	commonSuffixRef = null;
	} else {
	BytesRef suffix = Operations.getCommonSuffixBytesRef(binary);
	if (suffix.length == 0) {
	commonSuffixRef = null;
	} else {
	commonSuffixRef = suffix;
	}
	}

	// This will determinize the binary automaton for us:
	runAutomaton = new ByteRunAutomaton(binary, true, determinizeWorkLimit);

	this.automaton = runAutomaton.automaton;

	// TODO: this is a bit fragile because if the automaton is not minimized there could be more than 1 sink state but auto-prefix will fail
	// to run for those:
	sinkState = findSinkState(this.automaton);
	}

	private Transition transition = new Transition();

	//private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;

	private BytesRef addTail(int state, BytesRefBuilder term, int idx, int leadLabel) {
	//System.out.println("addTail state=" + state + " term=" + term.utf8ToString() + " idx=" + idx + " leadLabel=" + (char) leadLabel);
	//System.out.println(automaton.toDot());
	// Find biggest transition that's < label
	// TODO: use binary search here
	int maxIndex = -1;
	int numTransitions = automaton.initTransition(state, transition);
	for(int i=0;i<numTransitions;i++) {
	automaton.getNextTransition(transition);
	if (transition.min < leadLabel) {
	maxIndex = i;
	} else {
	// Transitions are always sorted
	break;
	}
	}

	//System.out.println(" maxIndex=" + maxIndex);

	assert maxIndex != -1;
	automaton.getTransition(state, maxIndex, transition);

	// Append floorLabel
	final int floorLabel;
	if (transition.max > leadLabel-1) {
	floorLabel = leadLabel-1;
	} else {
	floorLabel = transition.max;
	}
	//System.out.println(" floorLabel=" + (char) floorLabel);
	term.grow(1+idx);
	//if (DEBUG) System.out.println(" add floorLabel=" + (char) floorLabel + " idx=" + idx);
	term.setByteAt(idx, (byte) floorLabel);

	state = transition.dest;
	//System.out.println(" dest: " + state);
	idx++;

	// Push down to last accept state
	while (true) {
	numTransitions = automaton.getNumTransitions(state);
	if (numTransitions == 0) {
	//System.out.println("state=" + state + " 0 trans");
	assert runAutomaton.isAccept(state);
	term.setLength(idx);
	//if (DEBUG) System.out.println(" return " + term.utf8ToString());
	return term.get();
	} else {
	// We are pushing "top" -- so get last label of
	// last transition:
	//System.out.println("get state=" + state + " numTrans=" + numTransitions);
	automaton.getTransition(state, numTransitions-1, transition);
	term.grow(1+idx);
	//if (DEBUG) System.out.println(" push maxLabel=" + (char) lastTransition.max + " idx=" + idx);
	//System.out.println(" add trans dest=" + scratch.dest + " label=" + (char) scratch.max);
	term.setByteAt(idx, (byte) transition.max);
	state = transition.dest;
	idx++;
	}
	}
	}

	// TODO: should this take startTerm too? This way
	// Terms.intersect could forward to this method if type !=
	// NORMAL:
	/** Return a {@link TermsEnum} intersecting the provided {@link Terms}
	* with the terms accepted by this automaton. */
	public TermsEnum getTermsEnum(Terms terms) throws IOException {
	switch(type) {
	case NONE:
	return TermsEnum.EMPTY;
	case ALL:
	return terms.iterator();
	case SINGLE:
	return new SingleTermsEnum(terms.iterator(), term);
	case NORMAL:
	return terms.intersect(this, null);
	default:
	// unreachable
	throw new RuntimeException("unhandled case");
	}
	}

	/**
	* Report back to a QueryVisitor how this automaton matches terms
	*/
	public void visit(QueryVisitor visitor, Query parent, String field) {
	if (visitor.acceptField(field)) {
	switch (type) {
	case NORMAL:
	visitor.consumeTermsMatching(parent, field, () -> runAutomaton);
	break;
	case NONE:
	break;
	case ALL:
	visitor.consumeTermsMatching(parent, field, () -> new ByteRunAutomaton(Automata.makeAnyString()));
	break;
	case SINGLE:
	visitor.consumeTerms(parent, new Term(field, term));
	break;
	}
	}
	}

	/** Finds largest term accepted by this Automaton, that's
	* <= the provided input term. The result is placed in
	* output; it's fine for output and input to point to
	* the same bytes. The returned result is either the
	* provided output, or null if there is no floor term
	* (ie, the provided input term is before the first term
	* accepted by this Automaton). */
	public BytesRef floor(BytesRef input, BytesRefBuilder output) {

	//if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString());

	int state = 0;

	// Special case empty string:
	if (input.length == 0) {
	if (runAutomaton.isAccept(state)) {
	output.clear();
	return output.get();
	} else {
	return null;
	}
	}

	final List<Integer> stack = new ArrayList<>();

	int idx = 0;
	while (true) {
	int label = input.bytes[input.offset + idx] & 0xff;
	int nextState = runAutomaton.step(state, label);
	//if (DEBUG) System.out.println(" cycle label=" + (char) label + " nextState=" + nextState);

	if (idx == input.length-1) {
	if (nextState != -1 && runAutomaton.isAccept(nextState)) {
	// Input string is accepted
	output.grow(1+idx);
	output.setByteAt(idx, (byte) label);
	output.setLength(input.length);
	//if (DEBUG) System.out.println(" input is accepted; return term=" + output.utf8ToString());
	return output.get();
	} else {
	nextState = -1;
	}
	}

	if (nextState == -1) {

	// Pop back to a state that has a transition
	// <= our label:
	while (true) {
	int numTransitions = automaton.getNumTransitions(state);
	if (numTransitions == 0) {
	assert runAutomaton.isAccept(state);
	output.setLength(idx);
	//if (DEBUG) System.out.println(" return " + output.utf8ToString());
	return output.get();
	} else {
	automaton.getTransition(state, 0, transition);

	if (label-1 < transition.min) {

	if (runAutomaton.isAccept(state)) {
	output.setLength(idx);
	//if (DEBUG) System.out.println(" return " + output.utf8ToString());
	return output.get();
	}
	// pop
	if (stack.size() == 0) {
	//if (DEBUG) System.out.println(" pop ord=" + idx + " return null");
	return null;
	} else {
	state = stack.remove(stack.size()-1);
	idx--;
	//if (DEBUG) System.out.println(" pop ord=" + (idx+1) + " label=" + (char) label + " first trans.min=" + (char) transitions[0].min);
	label = input.bytes[input.offset + idx] & 0xff;
	}
	} else {
	//if (DEBUG) System.out.println(" stop pop ord=" + idx + " first trans.min=" + (char) transitions[0].min);
	break;
	}
	}
	}

	//if (DEBUG) System.out.println(" label=" + (char) label + " idx=" + idx);

	return addTail(state, output, idx, label);

	} else {
	output.grow(1+idx);
	output.setByteAt(idx, (byte) label);
	stack.add(state);
	state = nextState;
	idx++;
	}
	}
	}

	@Override
	public int hashCode() {
	final int prime = 31;
	int result = 1;
	result = prime * result + ((runAutomaton == null) ? 0 : runAutomaton.hashCode());
	result = prime * result + ((term == null) ? 0 : term.hashCode());
	result = prime * result + ((type == null) ? 0 : type.hashCode());
	return result;
	}

	@Override
	public boolean equals(Object obj) {
	if (this == obj) return true;
	if (obj == null) return false;
	if (getClass() != obj.getClass()) return false;
	CompiledAutomaton other = (CompiledAutomaton) obj;
	if (type != other.type) return false;
	if (type == AUTOMATON_TYPE.SINGLE) {
	if (!term.equals(other.term)) return false;
	} else if (type == AUTOMATON_TYPE.NORMAL) {
	if (!runAutomaton.equals(other.runAutomaton)) return false;
	}

	return true;
	}

	@Override
	public long ramBytesUsed() {
	return BASE_RAM_BYTES +
	RamUsageEstimator.sizeOfObject(automaton) +
	RamUsageEstimator.sizeOfObject(commonSuffixRef) +
	RamUsageEstimator.sizeOfObject(runAutomaton) +
	RamUsageEstimator.sizeOfObject(term) +
	RamUsageEstimator.sizeOfObject(transition);
	}

	}