lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.codecs.idversion;

 import java.io.IOException;
 import java.io.PrintStream;

 import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.index.BaseTermsEnum;
 import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SlowImpactsEnum;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
 import org.apache.lucene.util.fst.Util;

 /** Iterates through terms in this field; this class is public so users
  *  can cast it to call {@link #seekExact(BytesRef, long)} for
  *  optimistic-concurrency, and also {@link #getVersion} to get the
  *  version of the currently seek'd term. */
 public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {

   // Lazy init:
   IndexInput in;

   // static boolean DEBUG = false;

   private IDVersionSegmentTermsEnumFrame[] stack;
   private final IDVersionSegmentTermsEnumFrame staticFrame;
   IDVersionSegmentTermsEnumFrame currentFrame;
   boolean termExists;
   final VersionFieldReader fr;

   private int targetBeforeCurrentLength;

   private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();

   // What prefix of the current term was present in the index:
   private int validIndexPrefix;

   // assert only:
   private boolean eof;

   final BytesRefBuilder term = new BytesRefBuilder();
   private final FST.BytesReader fstReader;

   @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<Pair<BytesRef,Long>>[] arcs =
   new FST.Arc[1];

   IDVersionSegmentTermsEnum(VersionFieldReader fr) throws IOException {
     this.fr = fr;

     //if (DEBUG) System.out.println("BTTR.init seg=" + segment);
     stack = new IDVersionSegmentTermsEnumFrame[0];

     // Used to hold seek by TermState, or cached seek
     staticFrame = new IDVersionSegmentTermsEnumFrame(this, -1);

     if (fr.index == null) {
       fstReader = null;
     } else {
       fstReader = fr.index.getBytesReader();
     }

     // Init w/ root block; don't use index since it may
     // not (and need not) have been loaded
     for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
       arcs[arcIdx] = new FST.Arc<>();
     }

     currentFrame = staticFrame;
     final FST.Arc<Pair<BytesRef,Long>> arc;
     if (fr.index != null) {
       arc = fr.index.getFirstArc(arcs[0]);
       // Empty string prefix must have an output in the index!
       assert arc.isFinal();
     } else {
       arc = null;
     }
     currentFrame = staticFrame;
     //currentFrame = pushFrame(arc, rootCode, 0);
     //currentFrame.loadBlock();
     validIndexPrefix = 0;
     // if (DEBUG) {
     //   System.out.println("init frame state " + currentFrame.ord);
     //   printSeekState();
     // }

     //System.out.println();
     // computeBlockStats().print(System.out);
   }

   // Not private to avoid synthetic access$NNN methods
   void initIndexInput() {
     if (this.in == null) {
       this.in = fr.parent.in.clone();
     }
   }

   private IDVersionSegmentTermsEnumFrame getFrame(int ord) throws IOException {
     if (ord >= stack.length) {
       final IDVersionSegmentTermsEnumFrame[] next = new IDVersionSegmentTermsEnumFrame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
       System.arraycopy(stack, 0, next, 0, stack.length);
       for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
         next[stackOrd] = new IDVersionSegmentTermsEnumFrame(this, stackOrd);
       }
       stack = next;
     }
     assert stack[ord].ord == ord;
     return stack[ord];
   }

   private FST.Arc<Pair<BytesRef,Long>> getArc(int ord) {
     if (ord >= arcs.length) {
       @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<Pair<BytesRef,Long>>[] next =
       new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
       System.arraycopy(arcs, 0, next, 0, arcs.length);
       for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
         next[arcOrd] = new FST.Arc<>();
       }
       arcs = next;
     }
     return arcs[ord];
   }

   // Pushes a frame we seek'd to
   IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, Pair<BytesRef,Long> frameData, int length) throws IOException {
     scratchReader.reset(frameData.output1.bytes, frameData.output1.offset, frameData.output1.length);
     final long code = scratchReader.readVLong();
     final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
     final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
     f.maxIDVersion = Long.MAX_VALUE - frameData.output2;
     f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
     f.hasTermsOrig = f.hasTerms;
     f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
     if (f.isFloor) {
       f.setFloorData(scratchReader, frameData.output1);
     }
     pushFrame(arc, fpSeek, length);

     return f;
   }

   // Pushes next'd frame or seek'd frame; we later
   // lazy-load the frame only when needed
   IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, long fp, int length) throws IOException {
     final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord);
     f.arc = arc;
     if (f.fpOrig == fp && f.nextEnt != -1) {
       //if (DEBUG) System.out.println("      push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
       if (f.prefix > targetBeforeCurrentLength) {
         f.rewind();
       } else {
         // if (DEBUG) {
         //   System.out.println("        skip rewind!");
         // }
       }
       assert length == f.prefix;
     } else {
       f.nextEnt = -1;
       f.prefix = length;
       f.state.termBlockOrd = 0;
       f.fpOrig = f.fp = fp;
       f.lastSubFP = -1;
       // if (DEBUG) {
       //   final int sav = term.length;
       //   term.length = length;
       //   System.out.println("      push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
       //   term.length = sav;
       // }
     }

     return f;
   }

   // asserts only
   private boolean clearEOF() {
     eof = false;
     return true;
   }

   // asserts only
   private boolean setEOF() {
     eof = true;
     return true;
   }

   @Override
   public boolean seekExact(final BytesRef target) throws IOException {
     return seekExact(target, 0);
   }

   // for debugging
   @SuppressWarnings("unused")
   static String brToString(BytesRef b) {
     try {
       return b.utf8ToString() + " " + b;
     } catch (Throwable t) {
       // If BytesRef isn't actually UTF8, or it's eg a
       // prefix of UTF8 that ends mid-unicode-char, we
       // fallback to hex:
       return b.toString();
     }
   }

   /** Get the version of the currently seek'd term; only valid if we are
    *  positioned. */
   public long getVersion() {
     return ((IDVersionTermState) currentFrame.state).idVersion;
   }

   /** Optimized version of {@link #seekExact(BytesRef)} that can
    *  sometimes fail-fast if the version indexed with the requested ID
    *  is less than the specified minIDVersion.  Applications that index
    *  a monotonically increasing global version with each document can
    *  use this for fast optimistic concurrency. */
   public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {

     if (fr.index == null) {
       throw new IllegalStateException("terms index was not loaded");
     }

     term.grow(1 + target.length);

     assert clearEOF();

     //  if (DEBUG) {
     //    System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
     //   printSeekState(System.out);
     //  }

     FST.Arc<Pair<BytesRef,Long>> arc;
     int targetUpto;
     Pair<BytesRef,Long> output;

     long startFrameFP = currentFrame.fp;

     targetBeforeCurrentLength = currentFrame.ord;

     boolean changed = false;

     // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can check?

     if (currentFrame != staticFrame) {

       // We are already seek'd; find the common
       // prefix of new seek term vs current term and
       // re-use the corresponding seek state.  For
       // example, if app first seeks to foobar, then
       // seeks to foobaz, we can re-use the seek state
       // for the first 5 bytes.

       // if (DEBUG) {
       //    System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
       //  }

       arc = arcs[0];
       assert arc.isFinal();
       output = arc.output();
       targetUpto = 0;

       IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
       assert validIndexPrefix <= term.length(): "validIndexPrefix=" + validIndexPrefix + " term.length=" + term.length() + " seg=" + fr.parent;

       final int targetLimit = Math.min(target.length, validIndexPrefix);

       int cmp = 0;

       // TODO: reverse vLong byte order for better FST
       // prefix output sharing

       // First compare up to valid seek frames:
       while (targetUpto < targetLimit) {
         cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
         // if (DEBUG) {
         //    System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
         // }
         if (cmp != 0) {
           break;
         }
         arc = arcs[1+targetUpto];
         //if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
         //System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
         //}
         assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
         if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
           output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
         }
         if (arc.isFinal()) {
           lastFrame = stack[1+lastFrame.ord];
         }
         targetUpto++;
       }

       if (cmp == 0) {
         final int targetUptoMid = targetUpto;

         // Second compare the rest of the term, but
         // don't save arc/output/frame; we only do this
         // to find out if the target term is before,
         // equal or after the current term
         final int targetLimit2 = Math.min(target.length, term.length());
         while (targetUpto < targetLimit2) {
           cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
           // if (DEBUG) {
           //    System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
           // }
           if (cmp != 0) {
             break;
           }
           targetUpto++;
         }

         if (cmp == 0) {
           cmp = term.length() - target.length;
         }
         targetUpto = targetUptoMid;
       }

       if (cmp < 0) {
         // Common case: target term is after current
         // term, ie, app is seeking multiple terms
         // in sorted order
         // if (DEBUG) {
         //    System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto);
         //  }
         currentFrame = lastFrame;

       } else if (cmp > 0) {
         // Uncommon case: target term
         // is before current term; this means we can
         // keep the currentFrame but we must rewind it
         // (so we scan from the start)
         targetBeforeCurrentLength = 0;
         changed = true;
         // if (DEBUG) {
         //    System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
         //  }
         currentFrame = lastFrame;
         currentFrame.rewind();
       } else {
         // Target is exactly the same as current term
         assert term.length() == target.length;
         if (termExists) {

           if (currentFrame.maxIDVersion < minIDVersion) {
             // The max version for all terms in this block is lower than the minVersion
             // if (DEBUG) {
             //   System.out.println("  target is same as current maxIDVersion=" + currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false");
             // }
             return false;
           }

           currentFrame.decodeMetaData();
           if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
             // This term's version is lower than the minVersion
             // if (DEBUG) {
             //   System.out.println("  target is same as current but version=" + ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" + minIDVersion + "; return false");
             // }
             return false;
           }
           // System.out.println("  term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);

           // if (DEBUG) {
           //    System.out.println("  target is same as current; return true");
           //  }
           return true;
         } else {
           // if (DEBUG) {
           //    System.out.println("  target is same as current but term doesn't exist");
           //  }
         }
         //validIndexPrefix = currentFrame.depth;
         //term.length = target.length;
         //return termExists;
       }

     } else {

       targetBeforeCurrentLength = -1;
       arc = fr.index.getFirstArc(arcs[0]);
       //System.out.println("first arc=" + arc);

       // Empty string prefix must have an output (block) in the index!
       assert arc.isFinal();
       assert arc.output() != null;

       // if (DEBUG) {
       //    System.out.println("    no seek state; push root frame");
       //  }

       output = arc.output();

       currentFrame = staticFrame;

       //term.length = 0;
       targetUpto = 0;
       currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
     }

     // if (DEBUG) {
     //   System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " termExists=" + termExists);
     // }

     // We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
     while (targetUpto < target.length) {

       final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;

       final FST.Arc<Pair<BytesRef,Long>> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);

       if (nextArc == null) {

         // Index is exhausted
         // if (DEBUG) {
         //    System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + Integer.toHexString(targetLabel) + " termExists=" + termExists);
         //  }

         validIndexPrefix = currentFrame.prefix;
         //validIndexPrefix = targetUpto;

         currentFrame.scanToFloorFrame(target);

         if (!currentFrame.hasTerms) {
           termExists = false;
           term.setByteAt(targetUpto, (byte) targetLabel);
           term.setLength(1+targetUpto);
           // if (DEBUG) {
           //    System.out.println("  FAST NOT_FOUND term=" + brToString(term));
           //  }
           return false;
         }
         //System.out.println("  check maxVersion=" + currentFrame.maxIDVersion + " vs " + minIDVersion);

         // if (DEBUG) {
         //   System.out.println("  frame.maxIDVersion=" + currentFrame.maxIDVersion +  " vs minIDVersion=" + minIDVersion);
         // }

         if (currentFrame.maxIDVersion < minIDVersion) {
           // The max version for all terms in this block is lower than the minVersion
           if (currentFrame.fp != startFrameFP || changed) {
           //if (targetUpto+1 > term.length) {
             termExists = false;
             term.setByteAt(targetUpto, (byte) targetLabel);
             term.setLength(1+targetUpto);
             // if (DEBUG) {
             //   System.out.println("    reset current term");
             // }
             validIndexPrefix = Math.min(validIndexPrefix, term.length());
           }
             //if (currentFrame.ord != startFrameOrd) {
             //termExists = false;
             //}
           // if (DEBUG) {
           //   System.out.println("    FAST version NOT_FOUND term=" + brToString(term) + " targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp + " termExists=" + termExists);
           // }
           return false;
         }

         currentFrame.loadBlock();

         // if (DEBUG) {
         //   System.out.println("    scan currentFrame ord=" + currentFrame.ord);
         // }
         final SeekStatus result = currentFrame.scanToTerm(target, true);
         if (result == SeekStatus.FOUND) {
           currentFrame.decodeMetaData();
           if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
             // This term's version is lower than the minVersion
             // if (DEBUG) {
             //   System.out.println("    return NOT_FOUND: idVersion=" + ((IDVersionTermState) currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion);
             // }
             return false;
           }

           // if (DEBUG) {
           //    System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
           //  }

           return true;
         } else {
           // if (DEBUG) {
           //    System.out.println("  got " + result + "; return NOT_FOUND term=" + brToString(term));
           // }
           return false;
         }
       } else {
         // Follow this arc
         arc = nextArc;
         if (term.byteAt(targetUpto) != (byte) targetLabel) {
           // if (DEBUG) {
           //   System.out.println("  now set termExists=false targetUpto=" + targetUpto + " term=" + term.bytes[targetUpto] + " targetLabel=" + targetLabel);
           // }
           changed = true;
           term.setByteAt(targetUpto, (byte) targetLabel);
           termExists = false;
         }
         // Aggregate output as we go:
         assert arc.output() != null;
         if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
           output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
         }

         // if (DEBUG) {
         //    System.out.println("    index: follow label=" + (char) ((target.bytes[target.offset + targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
         //  }
         targetUpto++;

         if (arc.isFinal()) {
           // if (DEBUG) System.out.println("    arc is final!");
           currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
           // if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
         }
       }
     }

     //validIndexPrefix = targetUpto;
     validIndexPrefix = currentFrame.prefix;

     currentFrame.scanToFloorFrame(target);

     // Target term is entirely contained in the index:
     if (!currentFrame.hasTerms) {
       termExists = false;
       term.setLength(targetUpto);
       // if (DEBUG) {
       //    System.out.println("  FAST NOT_FOUND term=" + brToString(term));
       //  }
       return false;
     }

     // if (DEBUG) {
     //   System.out.println("  frame.maxIDVersion=" + currentFrame.maxIDVersion +  " vs minIDVersion=" + minIDVersion);
     // }

     if (currentFrame.maxIDVersion < minIDVersion) {
       // The max version for all terms in this block is lower than the minVersion
       termExists = false;
       term.setLength(targetUpto);
       return false;
     }

     currentFrame.loadBlock();

     final SeekStatus result = currentFrame.scanToTerm(target, true);
     if (result == SeekStatus.FOUND) {
       // if (DEBUG) {
       //    System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
       //  }
       currentFrame.decodeMetaData();
       if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
         // This term's version is lower than the minVersion
         return false;
       }
       return true;
     } else {
       // if (DEBUG) {
       //    System.out.println("  got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
       //  }

       return false;
     }
   }

   @Override
   public SeekStatus seekCeil(final BytesRef target) throws IOException {
     if (fr.index == null) {
       throw new IllegalStateException("terms index was not loaded");
     }

     term.grow(1 + target.length);

     assert clearEOF();

     //if (DEBUG) {
     //System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=  " + validIndexPrefix);
     //printSeekState();
     //}

     FST.Arc<Pair<BytesRef,Long>> arc;
     int targetUpto;
     Pair<BytesRef,Long> output;

     targetBeforeCurrentLength = currentFrame.ord;

     if (currentFrame != staticFrame) {

       // We are already seek'd; find the common
       // prefix of new seek term vs current term and
       // re-use the corresponding seek state.  For
       // example, if app first seeks to foobar, then
       // seeks to foobaz, we can re-use the seek state
       // for the first 5 bytes.

       //if (DEBUG) {
       //System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
       //}

       arc = arcs[0];
       assert arc.isFinal();
       output = arc.output();
       targetUpto = 0;

       IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
       assert validIndexPrefix <= term.length();

       final int targetLimit = Math.min(target.length, validIndexPrefix);

       int cmp = 0;

       // TODO: we should write our vLong backwards (MSB
       // first) to get better sharing from the FST

       // First compare up to valid seek frames:
       while (targetUpto < targetLimit) {
         cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
         //if (DEBUG) {
         //System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
         //}
         if (cmp != 0) {
           break;
         }
         arc = arcs[1+targetUpto];
         assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
         // TODO: we could save the outputs in local
         // byte[][] instead of making new objs ever
         // seek; but, often the FST doesn't have any
         // shared bytes (but this could change if we
         // reverse vLong byte order)
         if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
           output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
         }
         if (arc.isFinal()) {
           lastFrame = stack[1+lastFrame.ord];
         }
         targetUpto++;
       }


       if (cmp == 0) {
         final int targetUptoMid = targetUpto;
         // Second compare the rest of the term, but
         // don't save arc/output/frame:
         final int targetLimit2 = Math.min(target.length, term.length());
         while (targetUpto < targetLimit2) {
           cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
           //if (DEBUG) {
           //System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
           //}
           if (cmp != 0) {
             break;
           }
           targetUpto++;
         }

         if (cmp == 0) {
           cmp = term.length() - target.length;
         }
         targetUpto = targetUptoMid;
       }

       if (cmp < 0) {
         // Common case: target term is after current
         // term, ie, app is seeking multiple terms
         // in sorted order
         //if (DEBUG) {
         //System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord);
         //}
         currentFrame = lastFrame;

       } else if (cmp > 0) {
         // Uncommon case: target term
         // is before current term; this means we can
         // keep the currentFrame but we must rewind it
         // (so we scan from the start)
         targetBeforeCurrentLength = 0;
         //if (DEBUG) {
         //System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
         //}
         currentFrame = lastFrame;
         currentFrame.rewind();
       } else {
         // Target is exactly the same as current term
         assert term.length() == target.length;
         if (termExists) {
           //if (DEBUG) {
           //System.out.println("  target is same as current; return FOUND");
           //}
           return SeekStatus.FOUND;
         } else {
           //if (DEBUG) {
           //System.out.println("  target is same as current but term doesn't exist");
           //}
         }
       }

     } else {

       targetBeforeCurrentLength = -1;
       arc = fr.index.getFirstArc(arcs[0]);

       // Empty string prefix must have an output (block) in the index!
       assert arc.isFinal();
       assert arc.output() != null;

       //if (DEBUG) {
       //System.out.println("    no seek state; push root frame");
       //}

       output = arc.output();

       currentFrame = staticFrame;

       //term.length = 0;
       targetUpto = 0;
       currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
     }

     //if (DEBUG) {
     //System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
     //}

     // We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
     while (targetUpto < target.length) {

       final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;

       final FST.Arc<Pair<BytesRef,Long>> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);

       if (nextArc == null) {

         // Index is exhausted
         // if (DEBUG) {
         //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
         // }

         validIndexPrefix = currentFrame.prefix;
         //validIndexPrefix = targetUpto;

         currentFrame.scanToFloorFrame(target);

         currentFrame.loadBlock();

         final SeekStatus result = currentFrame.scanToTerm(target, false);
         if (result == SeekStatus.END) {
           term.copyBytes(target);
           termExists = false;

           if (next() != null) {
             //if (DEBUG) {
             //System.out.println("  return NOT_FOUND term=" + brToString(term) + " " + term);
             //}
             return SeekStatus.NOT_FOUND;
           } else {
             //if (DEBUG) {
             //System.out.println("  return END");
             //}
             return SeekStatus.END;
           }
         } else {
           //if (DEBUG) {
           //System.out.println("  return " + result + " term=" + brToString(term) + " " + term);
           //}
           return result;
         }
       } else {
         // Follow this arc
         term.setByteAt(targetUpto, (byte) targetLabel);
         arc = nextArc;
         // Aggregate output as we go:
         assert arc.output() != null;
         if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
           output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
         }

         //if (DEBUG) {
         //System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
         //}
         targetUpto++;

         if (arc.isFinal()) {
           //if (DEBUG) System.out.println("    arc is final!");
           currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
           //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
         }
       }
     }

     //validIndexPrefix = targetUpto;
     validIndexPrefix = currentFrame.prefix;

     currentFrame.scanToFloorFrame(target);

     currentFrame.loadBlock();

     final SeekStatus result = currentFrame.scanToTerm(target, false);

     if (result == SeekStatus.END) {
       term.copyBytes(target);
       termExists = false;
       if (next() != null) {
         //if (DEBUG) {
         //System.out.println("  return NOT_FOUND term=" + term.utf8ToString() + " " + term);
         //}
         return SeekStatus.NOT_FOUND;
       } else {
         //if (DEBUG) {
         //System.out.println("  return END");
         //}
         return SeekStatus.END;
       }
     } else {
       return result;
     }
   }

   @SuppressWarnings("unused")
   private void printSeekState(PrintStream out) throws IOException {
     if (currentFrame == staticFrame) {
       out.println("  no prior seek");
     } else {
       out.println("  prior seek state:");
       int ord = 0;
       boolean isSeekFrame = true;
       while(true) {
         IDVersionSegmentTermsEnumFrame f = getFrame(ord);
         assert f != null;
         final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
         if (f.nextEnt == -1) {
           out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
         } else {
           out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
         }
         if (fr.index != null) {
           assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
           if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) {
             out.println("      broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
             throw new RuntimeException("seek state is broken");
           }
           Pair<BytesRef,Long> output = Util.get(fr.index, prefix);
           if (output == null) {
             out.println("      broken seek state: prefix is not final in index");
             throw new RuntimeException("seek state is broken");
           } else if (isSeekFrame && !f.isFloor) {
             final ByteArrayDataInput reader = new ByteArrayDataInput(output.output1.bytes, output.output1.offset, output.output1.length);
             final long codeOrig = reader.readVLong();
             final long code = (f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) | (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0);
             if (codeOrig != code) {
               out.println("      broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
               throw new RuntimeException("seek state is broken");
             }
           }
         }
         if (f == currentFrame) {
           break;
         }
         if (f.prefix == validIndexPrefix) {
           isSeekFrame = false;
         }
         ord++;
       }
     }
   }

   /* Decodes only the term bytes of the next term.  If caller then asks for
      metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
      decode all metadata up to the current term. */
   @Override
   public BytesRef next() throws IOException {

     if (in == null) {
       // Fresh TermsEnum; seek to first term:
       final FST.Arc<Pair<BytesRef,Long>> arc;
       if (fr.index != null) {
         arc = fr.index.getFirstArc(arcs[0]);
         // Empty string prefix must have an output in the index!
         assert arc.isFinal();
       } else {
         arc = null;
       }
       currentFrame = pushFrame(arc, fr.rootCode, 0);
       currentFrame.loadBlock();
     }

     targetBeforeCurrentLength = currentFrame.ord;

     assert !eof;
     //if (DEBUG) {
     //System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
     //printSeekState();
     //}

     if (currentFrame == staticFrame) {
       // If seek was previously called and the term was
       // cached, or seek(TermState) was called, usually
       // caller is just going to pull a D/&PEnum or get
       // docFreq, etc.  But, if they then call next(),
       // this method catches up all internal state so next()
       // works properly:
       //if (DEBUG) System.out.println("  re-seek to pending term=" + term.utf8ToString() + " " + term);
       final boolean result = seekExact(term.get());
       assert result;
     }

     // Pop finished blocks
     while (currentFrame.nextEnt == currentFrame.entCount) {
       if (!currentFrame.isLastInFloor) {
         currentFrame.loadNextFloorBlock();
       } else {
         //if (DEBUG) System.out.println("  pop frame");
         if (currentFrame.ord == 0) {
           //if (DEBUG) System.out.println("  return null");
           assert setEOF();
           term.clear();
           validIndexPrefix = 0;
           currentFrame.rewind();
           termExists = false;
           return null;
         }
         final long lastFP = currentFrame.fpOrig;
         currentFrame = stack[currentFrame.ord-1];

         if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
           // We popped into a frame that's not loaded
           // yet or not scan'd to the right entry
           currentFrame.scanToFloorFrame(term.get());
           currentFrame.loadBlock();
           currentFrame.scanToSubBlock(lastFP);
         }

         // Note that the seek state (last seek) has been
         // invalidated beyond this depth
         validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
         //if (DEBUG) {
         //System.out.println("  reset validIndexPrefix=" + validIndexPrefix);
         //}
       }
     }

     while(true) {
       if (currentFrame.next()) {
         // Push to new block:
         //if (DEBUG) System.out.println("  push frame");
         currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length());
         // This is a "next" frame -- even if it's
         // floor'd we must pretend it isn't so we don't
         // try to scan to the right floor frame:
         currentFrame.isFloor = false;
         //currentFrame.hasTerms = true;
         currentFrame.loadBlock();
       } else {
         //if (DEBUG) System.out.println("  return term=" + term.utf8ToString() + " " + term + " currentFrame.ord=" + currentFrame.ord);
         return term.get();
       }
     }
   }

   @Override
   public BytesRef term() {
     assert !eof;
     return term.get();
   }

   @Override
   public int docFreq() throws IOException {
     assert !eof;
     return 1;
   }

   @Override
   public long totalTermFreq() throws IOException {
     assert !eof;
     return 1;
   }

   @Override
   public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
     assert !eof;
     //if (DEBUG) {
     //System.out.println("BTTR.docs seg=" + segment);
     //}
     currentFrame.decodeMetaData();
     //if (DEBUG) {
     //System.out.println("  state=" + currentFrame.state);
     //}
     return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, reuse, flags);
   }

   @Override
   public ImpactsEnum impacts(int flags) throws IOException {
     // Only one posting, the slow impl is fine
     // We could make this throw UOE but then CheckIndex is angry
     return new SlowImpactsEnum(postings(null, flags));
   }

   @Override
   public void seekExact(BytesRef target, TermState otherState) {
     // if (DEBUG) {
     //   System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
     // }
     assert clearEOF();
     if (target.compareTo(term.get()) != 0 || !termExists) {
       assert otherState != null && otherState instanceof BlockTermState;
       currentFrame = staticFrame;
       currentFrame.state.copyFrom(otherState);
       term.copyBytes(target);
       currentFrame.metaDataUpto = currentFrame.getTermBlockOrd();
       assert currentFrame.metaDataUpto > 0;
       validIndexPrefix = 0;
     } else {
       // if (DEBUG) {
       //   System.out.println("  skip seek: already on target state=" + currentFrame.state);
       // }
     }
   }

   @Override
   public TermState termState() throws IOException {
     assert !eof;
     currentFrame.decodeMetaData();
     TermState ts = currentFrame.state.clone();
     //if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
     return ts;
   }

   @Override
   public void seekExact(long ord) {
     throw new UnsupportedOperationException();
   }

   @Override
   public long ord() {
     throw new UnsupportedOperationException();
   }

   @Override
   public String toString() {
     return "IDVersionSegmentTermsEnum(seg=" + fr.parent + ")";
   }
 }