src/Lucene.Net.Core/Index/DocTermOrds.cs - lucenenet - Git at Google

 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 namespace Lucene.Net.Index
 {
     using Lucene.Net.Support;
     using Bits = Lucene.Net.Util.Bits;
     using BytesRef = Lucene.Net.Util.BytesRef;
     using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
     using PagedBytes = Lucene.Net.Util.PagedBytes;
     using PostingsFormat = Lucene.Net.Codecs.PostingsFormat; // javadocs
     using SeekStatus = Lucene.Net.Index.TermsEnum.SeekStatus;
     using StringHelper = Lucene.Net.Util.StringHelper;

     /// <summary>
     /// this class enables fast access to multiple term ords for
     /// a specified field across all docIDs.
     ///
     /// Like FieldCache, it uninverts the index and holds a
     /// packed data structure in RAM to enable fast access.
     /// Unlike FieldCache, it can handle multi-valued fields,
     /// and, it does not hold the term bytes in RAM.  Rather, you
     /// must obtain a TermsEnum from the <seealso cref="#getOrdTermsEnum"/>
     /// method, and then seek-by-ord to get the term's bytes.
     ///
     /// While normally term ords are type long, in this API they are
     /// int as the internal representation here cannot address
     /// more than MAX_INT unique terms.  Also, typically this
     /// class is used on fields with relatively few unique terms
     /// vs the number of documents.  In addition, there is an
     /// internal limit (16 MB) on how many bytes each chunk of
     /// documents may consume.  If you trip this limit you'll hit
     /// an InvalidOperationException.
     ///
     /// Deleted documents are skipped during uninversion, and if
     /// you look them up you'll get 0 ords.
     ///
     /// The returned per-document ords do not retain their
     /// original order in the document.  Instead they are returned
     /// in sorted (by ord, ie term's BytesRef comparator) order.  They
     /// are also de-dup'd (ie if doc has same term more than once
     /// in this field, you'll only get that ord back once).
     ///
     /// this class tests whether the provided reader is able to
     /// retrieve terms by ord (ie, it's single segment, and it
     /// uses an ord-capable terms index).  If not, this class
     /// will create its own term index internally, allowing to
     /// create a wrapped TermsEnum that can handle ord.  The
     /// <seealso cref="#getOrdTermsEnum"/> method then provides this
     /// wrapped enum, if necessary.
     ///
     /// The RAM consumption of this class can be high!
     ///
     /// @lucene.experimental
     /// </summary>

     /*
      * Final form of the un-inverted field:
      *   Each document points to a list of term numbers that are contained in that document.
      *
      *   Term numbers are in sorted order, and are encoded as variable-length deltas from the
      *   previous term number.  Real term numbers start at 2 since 0 and 1 are reserved.  A
      *   term number of 0 signals the end of the termNumber list.
      *
      *   There is a single int[maxDoc()] which either contains a pointer into a byte[] for
      *   the termNumber lists, or directly contains the termNumber list if it fits in the 4
      *   bytes of an integer.  If the first byte in the integer is 1, the next 3 bytes
      *   are a pointer into a byte[] where the termNumber list starts.
      *
      *   There are actually 256 byte arrays, to compensate for the fact that the pointers
      *   into the byte arrays are only 3 bytes long.  The correct byte array for a document
      *   is a function of it's id.
      *
      *   To save space and speed up faceting, any term that matches enough documents will
      *   not be un-inverted... it will be skipped while building the un-inverted field structure,
      *   and will use a set intersection method during faceting.
      *
      *   To further save memory, the terms (the actual string values) are not all stored in
      *   memory, but a TermIndex is used to convert term numbers to term values only
      *   for the terms needed after faceting has completed.  Only every 128th term value
      *   is stored, along with it's corresponding term number, and this is used as an
      *   index to find the closest term and iterate until the desired number is hit (very
      *   much like Lucene's own internal term index).
      *
      */

     public class DocTermOrds
     {
         // Term ords are shifted by this, internally, to reserve
         // values 0 (end term) and 1 (index is a pointer into byte array)
         private const int TNUM_OFFSET = 2;

         /// <summary>
         /// Every 128th term is indexed, by default. </summary>
         public const int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing

         private int IndexIntervalBits;
         private int IndexIntervalMask;
         private int IndexInterval;

         /// <summary>
         /// Don't uninvert terms that exceed this count. </summary>
         protected internal readonly int MaxTermDocFreq;

         /// <summary>
         /// Field we are uninverting. </summary>
         protected internal readonly string Field;

         /// <summary>
         /// Number of terms in the field. </summary>
         protected internal int NumTermsInField;

         /// <summary>
         /// Total number of references to term numbers. </summary>
         protected internal long TermInstances;

         private long Memsz;

         /// <summary>
         /// Total time to uninvert the field. </summary>
         protected internal int Total_time;

         /// <summary>
         /// Time for phase1 of the uninvert process. </summary>
         protected internal int Phase1_time;

         /// <summary>
         /// Holds the per-document ords or a pointer to the ords. </summary>
         protected internal int[] Index;

         /// <summary>
         /// Holds term ords for documents. </summary>
         protected internal sbyte[][] Tnums = new sbyte[256][];

         /// <summary>
         /// Total bytes (sum of term lengths) for all indexed terms. </summary>
         protected internal long SizeOfIndexedStrings;

         /// <summary>
         /// Holds the indexed (by default every 128th) terms. </summary>
         protected internal BytesRef[] IndexedTermsArray;

         /// <summary>
         /// If non-null, only terms matching this prefix were
         ///  indexed.
         /// </summary>
         protected internal BytesRef Prefix;

         /// <summary>
         /// Ordinal of the first term in the field, or 0 if the
         ///  <seealso cref="PostingsFormat"/> does not implement {@link
         ///  TermsEnum#ord}.
         /// </summary>
         protected internal int OrdBase;

         /// <summary>
         /// Used while uninverting. </summary>
         protected internal DocsEnum DocsEnum;

         /// <summary>
         /// Returns total bytes used. </summary>
         public virtual long RamUsedInBytes()
         {
             // can cache the mem size since it shouldn't change
             if (Memsz != 0)
             {
                 return Memsz;
             }
             long sz = 8 * 8 + 32; // local fields
             if (Index != null)
             {
                 sz += Index.Length * 4;
             }
             if (Tnums != null)
             {
                 sz = Tnums.Where(arr => arr != null).Aggregate(sz, (current, arr) => current + arr.Length);
             }
             Memsz = sz;
             return sz;
         }

         /// <summary>
         /// Inverts all terms </summary>
         public DocTermOrds(AtomicReader reader, Bits liveDocs, string field)
             : this(reader, liveDocs, field, null, int.MaxValue)
         {
         }

         /// <summary>
         /// Inverts only terms starting w/ prefix </summary>
         public DocTermOrds(AtomicReader reader, Bits liveDocs, string field, BytesRef termPrefix)
             : this(reader, liveDocs, field, termPrefix, int.MaxValue)
         {
         }

         /// <summary>
         /// Inverts only terms starting w/ prefix, and only terms
         ///  whose docFreq (not taking deletions into account) is
         ///  <=  maxTermDocFreq
         /// </summary>
         public DocTermOrds(AtomicReader reader, Bits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq)
             : this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS)
         {
         }

         /// <summary>
         /// Inverts only terms starting w/ prefix, and only terms
         ///  whose docFreq (not taking deletions into account) is
         ///  <=  maxTermDocFreq, with a custom indexing interval
         ///  (default is every 128nd term).
         /// </summary>
         public DocTermOrds(AtomicReader reader, Bits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits)
             : this(field, maxTermDocFreq, indexIntervalBits)
         {
             Uninvert(reader, liveDocs, termPrefix);
         }

         /// <summary>
         /// Subclass inits w/ this, but be sure you then call
         ///  uninvert, only once
         /// </summary>
         protected internal DocTermOrds(string field, int maxTermDocFreq, int indexIntervalBits)
         {
             //System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq);
             this.Field = field;
             this.MaxTermDocFreq = maxTermDocFreq;
             this.IndexIntervalBits = indexIntervalBits;
             IndexIntervalMask = (int)((uint)0xffffffff >> (32 - indexIntervalBits));
             IndexInterval = 1 << indexIntervalBits;
         }

         /// <summary>
         /// Returns a TermsEnum that implements ord.  If the
         ///  provided reader supports ord, we just return its
         ///  TermsEnum; if it does not, we build a "private" terms
         ///  index internally (WARNING: consumes RAM) and use that
         ///  index to implement ord.  this also enables ord on top
         ///  of a composite reader.  The returned TermsEnum is
         ///  unpositioned.  this returns null if there are no terms.
         ///
         ///  <p><b>NOTE</b>: you must pass the same reader that was
         ///  used when creating this class
         /// </summary>
         public virtual TermsEnum GetOrdTermsEnum(AtomicReader reader)
         {
             if (IndexedTermsArray == null)
             {
                 //System.out.println("GET normal enum");
                 Fields fields = reader.Fields;
                 if (fields == null)
                 {
                     return null;
                 }
                 Terms terms = fields.Terms(Field);
                 if (terms == null)
                 {
                     return null;
                 }
                 else
                 {
                     return terms.Iterator(null);
                 }
             }
             else
             {
                 //System.out.println("GET wrapped enum ordBase=" + ordBase);
                 return new OrdWrappedTermsEnum(this, reader);
             }
         }

         /// <summary>
         /// Returns the number of terms in this field
         /// </summary>
         public virtual int NumTerms()
         {
             return NumTermsInField;
         }

         /// <summary>
         /// Returns {@code true} if no terms were indexed.
         /// </summary>
         public virtual bool Empty
         {
             get
             {
                 return Index == null;
             }
         }

         /// <summary>
         /// Subclass can override this </summary>
         protected internal virtual void VisitTerm(TermsEnum te, int termNum)
         {
         }

         /// <summary>
         /// Invoked during <seealso cref="#uninvert(AtomicReader,Bits,BytesRef)"/>
         ///  to record the document frequency for each uninverted
         ///  term.
         /// </summary>
         protected internal virtual void SetActualDocFreq(int termNum, int df)
         {
         }

         /// <summary>
         /// Call this only once (if you subclass!) </summary>
         protected internal virtual void Uninvert(AtomicReader reader, Bits liveDocs, BytesRef termPrefix)
         {
             FieldInfo info = reader.FieldInfos.FieldInfo(Field);
             if (info != null && info.HasDocValues())
             {
                 throw new InvalidOperationException("Type mismatch: " + Field + " was indexed as " + info.DocValuesType);
             }
             //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
             long startTime = Environment.TickCount;
             Prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

             int maxDoc = reader.MaxDoc;
             int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
             int[] lastTerm = new int[maxDoc]; // last term we saw for this document
             var bytes = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

             Fields fields = reader.Fields;
             if (fields == null)
             {
                 // No terms
                 return;
             }
             Terms terms = fields.Terms(Field);
             if (terms == null)
             {
                 // No terms
                 return;
             }

             TermsEnum te = terms.Iterator(null);
             BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
             //System.out.println("seekStart=" + seekStart.utf8ToString());
             if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
             {
                 // No terms match
                 return;
             }

             // If we need our "term index wrapper", these will be
             // init'd below:
             IList<BytesRef> indexedTerms = null;
             PagedBytes indexedTermsBytes = null;

             bool testedOrd = false;

             // we need a minimum of 9 bytes, but round up to 12 since the space would
             // be wasted with most allocators anyway.
             var tempArr = new sbyte[12];

             //
             // enumerate all terms, and build an intermediate form of the un-inverted field.
             //
             // During this intermediate form, every document has a (potential) byte[]
             // and the int[maxDoc()] array either contains the termNumber list directly
             // or the *end* offset of the termNumber list in it's byte array (for faster
             // appending and faster creation of the final form).
             //
             // idea... if things are too large while building, we could do a range of docs
             // at a time (but it would be a fair amount slower to build)
             // could also do ranges in parallel to take advantage of multiple CPUs

             // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
             // values.  this requires going over the field first to find the most
             // frequent terms ahead of time.

             int termNum = 0;
             DocsEnum = null;

             // Loop begins with te positioned to first term (we call
             // seek above):
             for (; ; )
             {
                 BytesRef t = te.Term();
                 if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                 {
                     break;
                 }
                 //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                 if (!testedOrd)
                 {
                     try
                     {
                         OrdBase = (int)te.Ord();
                         //System.out.println("got ordBase=" + ordBase);
                     }
                     catch (System.NotSupportedException uoe)
                     {
                         // Reader cannot provide ord support, so we wrap
                         // our own support by creating our own terms index:
                         indexedTerms = new List<BytesRef>();
                         indexedTermsBytes = new PagedBytes(15);
                         //System.out.println("NO ORDS");
                     }
                     testedOrd = true;
                 }

                 VisitTerm(te, termNum);

                 if (indexedTerms != null && (termNum & IndexIntervalMask) == 0)
                 {
                     // Index this term
                     SizeOfIndexedStrings += t.Length;
                     BytesRef indexedTerm = new BytesRef();
                     indexedTermsBytes.Copy(t, indexedTerm);
                     // TODO: really should 1) strip off useless suffix,
                     // and 2) use FST not array/PagedBytes
                     indexedTerms.Add(indexedTerm);
                 }

                 int df = te.DocFreq();
                 if (df <= MaxTermDocFreq)
                 {
                     DocsEnum = te.Docs(liveDocs, DocsEnum, DocsEnum.FLAG_NONE);

                     // dF, but takes deletions into account
                     int actualDF = 0;

                     for (; ; )
                     {
                         int doc = DocsEnum.NextDoc();
                         if (doc == DocIdSetIterator.NO_MORE_DOCS)
                         {
                             break;
                         }
                         //System.out.println("  chunk=" + chunk + " docs");

                         actualDF++;
                         TermInstances++;

                         //System.out.println("    docID=" + doc);
                         // add TNUM_OFFSET to the term number to make room for special reserved values:
                         // 0 (end term) and 1 (index into byte array follows)
                         int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                         lastTerm[doc] = termNum;
                         int val = index[doc];

                         if ((val & 0xff) == 1)
                         {
                             // index into byte array (actually the end of
                             // the doc-specific byte[] when building)
                             int pos = (int)((uint)val >> 8);
                             int ilen = VIntSize(delta);
                             var arr = bytes[doc];
                             int newend = pos + ilen;
                             if (newend > arr.Length)
                             {
                                 // We avoid a doubling strategy to lower memory usage.
                                 // this faceting method isn't for docs with many terms.
                                 // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                 // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                 // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                 // It should be safe to round up to the nearest 32 bits in any case.
                                 int newLen = (newend + 3) & unchecked((int)0xfffffffc); // 4 byte alignment
                                 var newarr = new sbyte[newLen];
                                 Array.Copy(arr, 0, newarr, 0, pos);
                                 arr = newarr;
                                 bytes[doc] = newarr;
                             }
                             pos = WriteInt(delta, arr, pos);
                             index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                         }
                         else
                         {
                             // OK, this int has data in it... find the end (a zero starting byte - not
                             // part of another number, hence not following a byte with the high bit set).
                             int ipos;
                             if (val == 0)
                             {
                                 ipos = 0;
                             }
                             else if ((val & 0x0000ff80) == 0)
                             {
                                 ipos = 1;
                             }
                             else if ((val & 0x00ff8000) == 0)
                             {
                                 ipos = 2;
                             }
                             else if ((val & 0xff800000) == 0)
                             {
                                 ipos = 3;
                             }
                             else
                             {
                                 ipos = 4;
                             }

                             //System.out.println("      ipos=" + ipos);

                             int endPos = WriteInt(delta, tempArr, ipos);
                             //System.out.println("      endpos=" + endPos);
                             if (endPos <= 4)
                             {
                                 //System.out.println("      fits!");
                                 // value will fit in the integer... move bytes back
                                 for (int j = ipos; j < endPos; j++)
                                 {
                                     val |= (tempArr[j] & 0xff) << (j << 3);
                                 }
                                 index[doc] = val;
                             }
                             else
                             {
                                 // value won't fit... move integer into byte[]
                                 for (int j = 0; j < ipos; j++)
                                 {
                                     tempArr[j] = (sbyte)val;
                                     val = (int)((uint)val >> 8);
                                 }
                                 // point at the end index in the byte[]
                                 index[doc] = (endPos << 8) | 1;
                                 bytes[doc] = tempArr;
                                 tempArr = new sbyte[12];
                             }
                         }
                     }
                     SetActualDocFreq(termNum, actualDF);
                 }

                 termNum++;
                 if (te.Next() == null)
                 {
                     break;
                 }
             }

             NumTermsInField = termNum;

             long midPoint = Environment.TickCount;

             if (TermInstances == 0)
             {
                 // we didn't invert anything
                 // lower memory consumption.
                 Tnums = null;
             }
             else
             {
                 this.Index = index;

                 //
                 // transform intermediate form into the final form, building a single byte[]
                 // at a time, and releasing the intermediate byte[]s as we go to avoid
                 // increasing the memory footprint.
                 //

                 for (int pass = 0; pass < 256; pass++)
                 {
                     var target = Tnums[pass];
                     var pos = 0; // end in target;
                     if (target != null)
                     {
                         pos = target.Length;
                     }
                     else
                     {
                         target = new sbyte[4096];
                     }

                     // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                     // where pp is the pass (which array we are building), and xx is all values.
                     // each pass shares the same byte[] for termNumber lists.
                     for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                     {
                         int lim = Math.Min(docbase + (1 << 16), maxDoc);
                         for (int doc = docbase; doc < lim; doc++)
                         {
                             //System.out.println("  pass=" + pass + " process docID=" + doc);
                             int val = index[doc];
                             if ((val & 0xff) == 1)
                             {
                                 int len = (int)((uint)val >> 8);
                                 //System.out.println("    ptr pos=" + pos);
                                 index[doc] = (pos << 8) | 1; // change index to point to start of array
                                 if ((pos & 0xff000000) != 0)
                                 {
                                     // we only have 24 bits for the array index
                                     throw new InvalidOperationException("Too many values for UnInvertedField faceting on field " + Field);
                                 }
                                 var arr = bytes[doc];
                                 /*
                                 for(byte b : arr) {
                                   //System.out.println("      b=" + Integer.toHexString((int) b));
                                 }
                                 */
                                 bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                 if (target.Length <= pos + len)
                                 {
                                     int newlen = target.Length;
                                     /// <summary>
                                     ///* we don't have to worry about the array getting too large
                                     /// since the "pos" param will overflow first (only 24 bits available)
                                     /// if ((newlen<<1) <= 0) {
                                     ///  // overflow...
                                     ///  newlen = Integer.MAX_VALUE;
                                     ///  if (newlen <= pos + len) {
                                     ///    throw new SolrException(400,"Too many terms to uninvert field!");
                                     ///  }
                                     /// } else {
                                     ///  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                     /// }
                                     /// ***
                                     /// </summary>
                                     while (newlen <= pos + len) // doubling strategy
                                     {
                                         newlen <<= 1;
                                     }
                                     var newtarget = new sbyte[newlen];
                                     Array.Copy(target, 0, newtarget, 0, pos);
                                     target = newtarget;
                                 }
                                 Array.Copy(arr, 0, target, pos, len);
                                 pos += len + 1; // skip single byte at end and leave it 0 for terminator
                             }
                         }
                     }

                     // shrink array
                     if (pos < target.Length)
                     {
                         var newtarget = new sbyte[pos];
                         Array.Copy(target, 0, newtarget, 0, pos);
                         target = newtarget;
                     }

                     Tnums[pass] = target;

                     if ((pass << 16) > maxDoc)
                     {
                         break;
                     }
                 }
             }
             if (indexedTerms != null)
             {
                 IndexedTermsArray = indexedTerms.ToArray();
             }

             long endTime = Environment.TickCount;

             Total_time = (int)(endTime - startTime);
             Phase1_time = (int)(midPoint - startTime);
         }

         /// <summary>
         /// Number of bytes to represent an unsigned int as a vint. </summary>
         private static int VIntSize(int x)
         {
             if ((x & (0xffffffff << (7 * 1))) == 0)
             {
                 return 1;
             }
             if ((x & (0xffffffff << (7 * 2))) == 0)
             {
                 return 2;
             }
             if ((x & (0xffffffff << (7 * 3))) == 0)
             {
                 return 3;
             }
             if ((x & (0xffffffff << (7 * 4))) == 0)
             {
                 return 4;
             }
             return 5;
         }

         // todo: if we know the size of the vInt already, we could do
         // a single switch on the size
         private static int WriteInt(int x, sbyte[] arr, int pos)
         {
             var a = ((int)((uint)x >> (7 * 4)));
             if (a != 0)
             {
                 arr[pos++] = unchecked((sbyte)(a | 0x80));
             }
             a = ((int)((uint)x >> (7 * 3)));
             if (a != 0)
             {
                 arr[pos++] = unchecked((sbyte)(a | 0x80));
             }
             a = ((int)((uint)x >> (7 * 2)));
             if (a != 0)
             {
                 arr[pos++] = unchecked((sbyte)(a | 0x80));
             }
             a = ((int)((uint)x >> (7 * 1)));
             if (a != 0)
             {
                 arr[pos++] = unchecked((sbyte)(a | 0x80));
             }
             arr[pos++] = (sbyte)(x & 0x7f);
             return pos;
         }

         /* Only used if original IndexReader doesn't implement
          * ord; in this case we "wrap" our own terms index
          * around it. */

         private sealed class OrdWrappedTermsEnum : TermsEnum
         {
             internal bool InstanceFieldsInitialized = false;

             internal void InitializeInstanceFields()
             {
                 Ord_Renamed = -OuterInstance.IndexInterval - 1;
             }

             private readonly DocTermOrds OuterInstance;

             internal readonly TermsEnum TermsEnum;
             internal BytesRef Term_Renamed;
             internal long Ord_Renamed; // force "real" seek

             public OrdWrappedTermsEnum(DocTermOrds outerInstance, AtomicReader reader)
             {
                 this.OuterInstance = outerInstance;

                 if (!InstanceFieldsInitialized)
                 {
                     InitializeInstanceFields();
                     InstanceFieldsInitialized = true;
                 }
                 Debug.Assert(outerInstance.IndexedTermsArray != null);
                 TermsEnum = reader.Fields.Terms(outerInstance.Field).Iterator(null);
             }

             public override IComparer<BytesRef> Comparator
             {
                 get
                 {
                     return TermsEnum.Comparator;
                 }
             }

             public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
             {
                 return TermsEnum.Docs(liveDocs, reuse, flags);
             }

             public override DocsAndPositionsEnum DocsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
             {
                 return TermsEnum.DocsAndPositions(liveDocs, reuse, flags);
             }

             public override BytesRef Term()
             {
                 return Term_Renamed;
             }

             public override BytesRef Next()
             {
                 if (++Ord_Renamed < 0)
                 {
                     Ord_Renamed = 0;
                 }
                 if (TermsEnum.Next() == null)
                 {
                     Term_Renamed = null;
                     return null;
                 }
                 return SetTerm(); // this is extra work if we know we are in bounds...
             }

             public override int DocFreq()
             {
                 return TermsEnum.DocFreq();
             }

             public override long TotalTermFreq()
             {
                 return TermsEnum.TotalTermFreq();
             }

             public override long Ord()
             {
                 return OuterInstance.OrdBase + Ord_Renamed;
             }

             public override SeekStatus SeekCeil(BytesRef target)
             {
                 // already here
                 if (Term_Renamed != null && Term_Renamed.Equals(target))
                 {
                     return SeekStatus.FOUND;
                 }

                 int startIdx = OuterInstance.IndexedTermsArray.ToList().BinarySearch(target);

                 if (startIdx >= 0)
                 {
                     // we hit the term exactly... lucky us!
                     TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                     Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                     Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                     SetTerm();
                     Debug.Assert(Term_Renamed != null);
                     return SeekStatus.FOUND;
                 }

                 // we didn't hit the term exactly
                 startIdx = -startIdx - 1;

                 if (startIdx == 0)
                 {
                     // our target occurs *before* the first term
                     TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                     Debug.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND);
                     Ord_Renamed = 0;
                     SetTerm();
                     Debug.Assert(Term_Renamed != null);
                     return SeekStatus.NOT_FOUND;
                 }

                 // back up to the start of the block
                 startIdx--;

                 if ((Ord_Renamed >> OuterInstance.IndexIntervalBits) == startIdx && Term_Renamed != null && Term_Renamed.CompareTo(target) <= 0)
                 {
                     // we are already in the right block and the current term is before the term we want,
                     // so we don't need to seek.
                 }
                 else
                 {
                     // seek to the right block
                     TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(OuterInstance.IndexedTermsArray[startIdx]);
                     Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                     Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                     SetTerm();
                     Debug.Assert(Term_Renamed != null); // should be non-null since it's in the index
                 }

                 while (Term_Renamed != null && Term_Renamed.CompareTo(target) < 0)
                 {
                     Next();
                 }

                 if (Term_Renamed == null)
                 {
                     return SeekStatus.END;
                 }
                 else if (Term_Renamed.CompareTo(target) == 0)
                 {
                     return SeekStatus.FOUND;
                 }
                 else
                 {
                     return SeekStatus.NOT_FOUND;
                 }
             }

             public override void SeekExact(long targetOrd)
             {
                 int delta = (int)(targetOrd - OuterInstance.OrdBase - Ord_Renamed);
                 //System.out.println("  seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
                 if (delta < 0 || delta > OuterInstance.IndexInterval)
                 {
                     int idx = (int)((long)((ulong)targetOrd >> OuterInstance.IndexIntervalBits));
                     BytesRef @base = OuterInstance.IndexedTermsArray[idx];
                     //System.out.println("  do seek term=" + base.utf8ToString());
                     Ord_Renamed = idx << OuterInstance.IndexIntervalBits;
                     delta = (int)(targetOrd - Ord_Renamed);
                     TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(@base);
                     Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                 }
                 else
                 {
                     //System.out.println("seek w/in block");
                 }

                 while (--delta >= 0)
                 {
                     BytesRef br = TermsEnum.Next();
                     if (br == null)
                     {
                         Debug.Assert(false);
                         return;
                     }
                     Ord_Renamed++;
                 }

                 SetTerm();
                 Debug.Assert(Term_Renamed != null);
             }

             internal BytesRef SetTerm()
             {
                 Term_Renamed = TermsEnum.Term();
                 //System.out.println("  setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
                 if (OuterInstance.Prefix != null && !StringHelper.StartsWith(Term_Renamed, OuterInstance.Prefix))
                 {
                     Term_Renamed = null;
                 }
                 return Term_Renamed;
             }
         }

         /// <summary>
         /// Returns the term (<seealso cref="BytesRef"/>) corresponding to
         ///  the provided ordinal.
         /// </summary>
         public virtual BytesRef LookupTerm(TermsEnum termsEnum, int ord)
         {
             termsEnum.SeekExact(ord);
             return termsEnum.Term();
         }

         /// <summary>
         /// Returns a SortedSetDocValues view of this instance </summary>
         public virtual SortedSetDocValues GetIterator(AtomicReader reader)
         {
             if (Empty)
             {
                 return DocValues.EMPTY_SORTED_SET;
             }
             else
             {
                 return new Iterator(this, reader);
             }
         }

         private class Iterator : SortedSetDocValues
         {
             private readonly DocTermOrds OuterInstance;

             private readonly AtomicReader Reader;
             private readonly TermsEnum Te; // used internally for lookupOrd() and lookupTerm()

             // currently we read 5 at a time (using the logic of the old iterator)
             private readonly int[] Buffer = new int[5];

             private int BufferUpto;
             private int BufferLength;

             private int Tnum;
             private int Upto;
             private sbyte[] Arr;

             internal Iterator(DocTermOrds outerInstance, AtomicReader reader)
             {
                 this.OuterInstance = outerInstance;
                 this.Reader = reader;
                 this.Te = TermsEnum();
             }

             public override long NextOrd()
             {
                 while (BufferUpto == BufferLength)
                 {
                     if (BufferLength < Buffer.Length)
                     {
                         return NO_MORE_ORDS;
                     }
                     else
                     {
                         BufferLength = Read(Buffer);
                         BufferUpto = 0;
                     }
                 }
                 return Buffer[BufferUpto++];
             }

             /// <summary>
             /// Buffer must be at least 5 ints long.  Returns number
             ///  of term ords placed into buffer; if this count is
             ///  less than buffer.length then that is the end.
             /// </summary>
             internal virtual int Read(int[] buffer)
             {
                 int bufferUpto = 0;
                 if (Arr == null)
                 {
                     // code is inlined into upto
                     //System.out.println("inlined");
                     int code = Upto;
                     int delta = 0;
                     for (; ; )
                     {
                         delta = (delta << 7) | (code & 0x7f);
                         if ((code & 0x80) == 0)
                         {
                             if (delta == 0)
                             {
                                 break;
                             }
                             Tnum += delta - TNUM_OFFSET;
                             buffer[bufferUpto++] = OuterInstance.OrdBase + Tnum;
                             //System.out.println("  tnum=" + tnum);
                             delta = 0;
                         }
                         code = (int)((uint)code >> 8);
                     }
                 }
                 else
                 {
                     // code is a pointer
                     for (; ; )
                     {
                         int delta = 0;
                         for (; ; )
                         {
                             sbyte b = Arr[Upto++];
                             delta = (delta << 7) | (b & 0x7f);
                             //System.out.println("    cycle: upto=" + upto + " delta=" + delta + " b=" + b);
                             if ((b & 0x80) == 0)
                             {
                                 break;
                             }
                         }
                         //System.out.println("  delta=" + delta);
                         if (delta == 0)
                         {
                             break;
                         }
                         Tnum += delta - TNUM_OFFSET;
                         //System.out.println("  tnum=" + tnum);
                         buffer[bufferUpto++] = OuterInstance.OrdBase + Tnum;
                         if (bufferUpto == buffer.Length)
                         {
                             break;
                         }
                     }
                 }

                 return bufferUpto;
             }

             public override int Document
             {
                 set
                 {
                     Tnum = 0;
                     int code = OuterInstance.Index[value];
                     if ((code & 0xff) == 1)
                     {
                         // a pointer
                         Upto = (int)((uint)code >> 8);
                         //System.out.println("    pointer!  upto=" + upto);
                         int whichArray = ((int)((uint)value >> 16)) & 0xff;
                         Arr = OuterInstance.Tnums[whichArray];
                     }
                     else
                     {
                         //System.out.println("    inline!");
                         Arr = null;
                         Upto = code;
                     }
                     BufferUpto = 0;
                     BufferLength = Read(Buffer);
                 }
             }

             public override void LookupOrd(long ord, BytesRef result)
             {
                 BytesRef @ref = null;
                 try
                 {
                     @ref = OuterInstance.LookupTerm(Te, (int)ord);
                 }
                 catch (System.IO.IOException e)
                 {
                     throw new Exception(e.Message, e);
                 }
                 result.Bytes = @ref.Bytes;
                 result.Offset = @ref.Offset;
                 result.Length = @ref.Length;
             }

             public override long ValueCount
             {
                 get
                 {
                     return OuterInstance.NumTerms();
                 }
             }

             public override long LookupTerm(BytesRef key)
             {
                 try
                 {
                     if (Te.SeekCeil(key) == SeekStatus.FOUND)
                     {
                         return Te.Ord();
                     }
                     else
                     {
                         return -Te.Ord() - 1;
                     }
                 }
                 catch (System.IO.IOException e)
                 {
                     throw new Exception(e.Message, e);
                 }
             }

             public override TermsEnum TermsEnum()
             {
                 try
                 {
                     return OuterInstance.GetOrdTermsEnum(Reader);
                 }
                 catch (System.IO.IOException e)
                 {
                     throw new Exception();
                 }
             }
         }
     }
 }