solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.uninverting;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.WeakHashMap;

 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.GrowableWriter;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;

 /**
  * Expert: The default cache implementation, storing all values in memory.
  * A WeakHashMap is used for storage.
  *
  * @lucene.internal
  */
 public class FieldCacheImpl implements FieldCache {

   private Map<Class<?>,Cache> caches;
   FieldCacheImpl() {
     init();
   }

   private synchronized void init() {
     caches = new HashMap<>(6);
     caches.put(Long.TYPE, new LongCache(this));
     caches.put(BinaryDocValues.class, new BinaryDocValuesCache(this));
     caches.put(SortedDocValues.class, new SortedDocValuesCache(this));
     caches.put(DocTermOrds.class, new DocTermOrdsCache(this));
     caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this));
   }

   @Override
   public synchronized void purgeAllCaches() {
     init();
   }

   @Override
   public synchronized void purgeByCacheKey(IndexReader.CacheKey coreCacheKey) {
     for(Cache c : caches.values()) {
       c.purgeByCacheKey(coreCacheKey);
     }
   }

   @Override
   public synchronized CacheEntry[] getCacheEntries() {
     List<CacheEntry> result = new ArrayList<>(17);
     for(final Map.Entry<Class<?>,Cache> cacheEntry: caches.entrySet()) {
       final Cache cache = cacheEntry.getValue();
       final Class<?> cacheType = cacheEntry.getKey();
       synchronized(cache.readerCache) {
         for (final Map.Entry<IndexReader.CacheKey,Map<CacheKey, Accountable>> readerCacheEntry : cache.readerCache.entrySet()) {
           final IndexReader.CacheKey readerKey = readerCacheEntry.getKey();
           if (readerKey == null) continue;
           final Map<CacheKey, Accountable> innerCache = readerCacheEntry.getValue();
           for (final Map.Entry<CacheKey, Accountable> mapEntry : innerCache.entrySet()) {
             CacheKey entry = mapEntry.getKey();
             result.add(new CacheEntry(readerKey, entry.field,
                                       cacheType, entry.custom,
                                       mapEntry.getValue()));
           }
         }
       }
     }
     return result.toArray(new CacheEntry[result.size()]);
   }

   // per-segment fieldcaches don't purge until the shared core closes.
   final IndexReader.ClosedListener purgeCore = FieldCacheImpl.this::purgeByCacheKey;

   private void initReader(LeafReader reader) {
     IndexReader.CacheHelper cacheHelper = reader.getCoreCacheHelper();
     if (cacheHelper == null) {
       throw new IllegalStateException("Cannot cache on " + reader);
     }
     cacheHelper.addClosedListener(purgeCore);
   }

   /** Expert: Internal cache. */
   abstract static class Cache {

     Cache(FieldCacheImpl wrapper) {
       this.wrapper = wrapper;
     }

     final FieldCacheImpl wrapper;

     final Map<IndexReader.CacheKey,Map<CacheKey,Accountable>> readerCache = new WeakHashMap<>();

     protected abstract Accountable createValue(LeafReader reader, CacheKey key)
         throws IOException;

     /** Remove this reader from the cache, if present. */
     public void purgeByCacheKey(IndexReader.CacheKey coreCacheKey) {
       synchronized(readerCache) {
         readerCache.remove(coreCacheKey);
       }
     }

     /** Sets the key to the value for the provided reader;
      *  if the key is already set then this doesn't change it. */
     public void put(LeafReader reader, CacheKey key, Accountable value) {
       IndexReader.CacheHelper cacheHelper = reader.getCoreCacheHelper();
       if (cacheHelper == null) {
         throw new IllegalStateException("Cannot cache on " + reader);
       }
       final IndexReader.CacheKey readerKey = cacheHelper.getKey();
       synchronized (readerCache) {
         Map<CacheKey,Accountable> innerCache = readerCache.get(readerKey);
         if (innerCache == null) {
           // First time this reader is using FieldCache
           innerCache = new HashMap<>();
           readerCache.put(readerKey, innerCache);
           wrapper.initReader(reader);
         }
         if (innerCache.get(key) == null) {
           innerCache.put(key, value);
         } else {
           // Another thread beat us to it; leave the current
           // value
         }
       }
     }

     public Object get(LeafReader reader, CacheKey key) throws IOException {
       Map<CacheKey,Accountable> innerCache;
       Accountable value;
       IndexReader.CacheHelper cacheHelper = reader.getCoreCacheHelper();
       if (cacheHelper == null) {
         reader.getCoreCacheHelper();
         throw new IllegalStateException("Cannot cache on " + reader);
       }
       final IndexReader.CacheKey readerKey = cacheHelper.getKey();
       synchronized (readerCache) {
         innerCache = readerCache.get(readerKey);
         if (innerCache == null) {
           // First time this reader is using FieldCache
           innerCache = new HashMap<>();
           readerCache.put(readerKey, innerCache);
           wrapper.initReader(reader);
           value = null;
         } else {
           value = innerCache.get(key);
         }
         if (value == null) {
           value = new CreationPlaceholder();
           innerCache.put(key, value);
         }
       }
       if (value instanceof CreationPlaceholder) {
         synchronized (value) {
           CreationPlaceholder progress = (CreationPlaceholder) value;
           if (progress.value == null) {
             progress.value = createValue(reader, key);
             synchronized (readerCache) {
               innerCache.put(key, progress.value);
             }
           }
           return progress.value;
         }
       }
       return value;
     }
   }

   /** Expert: Every composite-key in the internal cache is of this type. */
   static class CacheKey {
     final String field;        // which Field
     final Object custom;       // which custom comparator or parser

     /** Creates one of these objects for a custom comparator/parser. */
     CacheKey(String field, Object custom) {
       this.field = field;
       this.custom = custom;
     }

     /** Two of these are equal iff they reference the same field and type. */
     @Override
     public boolean equals (Object o) {
       if (o instanceof CacheKey) {
         CacheKey other = (CacheKey) o;
         if (other.field.equals(field)) {
           if (other.custom == null) {
             if (custom == null) return true;
           } else if (other.custom.equals (custom)) {
             return true;
           }
         }
       }
       return false;
     }

     /** Composes a hashcode based on the field and type. */
     @Override
     public int hashCode() {
       return field.hashCode() ^ (custom==null ? 0 : custom.hashCode());
     }
   }

   private static abstract class Uninvert {

     public Bits docsWithField;
     final boolean points;

     // pass true to pull from points, otherwise postings.
     Uninvert(boolean points) {
       this.points = points;
     }

     final void uninvert(LeafReader reader, String field) throws IOException {
       if (points) {
         uninvertPoints(reader, field);
       } else {
         uninvertPostings(reader, field);
       }
     }

     final void uninvertPoints(LeafReader reader, String field) throws IOException {
       final int maxDoc = reader.maxDoc();
       PointValues values = reader.getPointValues(field);
       assert values != null;
       assert values.size() > 0;

       final boolean setDocsWithField;
       final int docCount = values.getDocCount();
       assert docCount <= maxDoc;
       if (docCount == maxDoc) {
         // Fast case: all docs have this field:
         this.docsWithField = new Bits.MatchAllBits(maxDoc);
         setDocsWithField = false;
       } else {
         setDocsWithField = true;
       }

       BytesRef scratch = new BytesRef();
       values.intersect(new IntersectVisitor() {
         @Override
         public void visit(int docID) throws IOException {
           throw new AssertionError();
         }

         @Override
         public void visit(int docID, byte[] packedValue) throws IOException {
           scratch.bytes = packedValue;
           scratch.length = packedValue.length;
           visitTerm(scratch);
           visitDoc(docID);
           if (setDocsWithField) {
             if (docsWithField == null) {
               // Lazy init
               docsWithField = new FixedBitSet(maxDoc);
             }
             ((FixedBitSet)docsWithField).set(docID);
           }
         }

         @Override
         public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
           return Relation.CELL_CROSSES_QUERY; // inspect all byte-docid pairs
         }
       });
     }

     final void uninvertPostings(LeafReader reader, String field) throws IOException {
       final int maxDoc = reader.maxDoc();
       Terms terms = reader.terms(field);
       if (terms != null) {
         final boolean setDocsWithField;
         final int termsDocCount = terms.getDocCount();
         assert termsDocCount <= maxDoc;
         if (termsDocCount == maxDoc) {
           // Fast case: all docs have this field:
           this.docsWithField = new Bits.MatchAllBits(maxDoc);
           setDocsWithField = false;
         } else {
           setDocsWithField = true;
         }

         final TermsEnum termsEnum = termsEnum(terms);

         PostingsEnum docs = null;
         FixedBitSet docsWithField = null;
         while(true) {
           final BytesRef term = termsEnum.next();
           if (term == null) {
             break;
           }
           visitTerm(term);
           docs = termsEnum.postings(docs, PostingsEnum.NONE);
           while (true) {
             final int docID = docs.nextDoc();
             if (docID == DocIdSetIterator.NO_MORE_DOCS) {
               break;
             }
             visitDoc(docID);
             if (setDocsWithField) {
               if (docsWithField == null) {
                 // Lazy init
                 this.docsWithField = docsWithField = new FixedBitSet(maxDoc);
               }
               docsWithField.set(docID);
             }
           }
         }
       }
     }

     protected abstract TermsEnum termsEnum(Terms terms) throws IOException;
     protected abstract void visitTerm(BytesRef term);
     protected abstract void visitDoc(int docID);
   }

   // null Bits means no docs matched
   void setDocsWithField(LeafReader reader, String field, Bits docsWithField, Parser parser) {
     final int maxDoc = reader.maxDoc();
     final Bits bits;
     if (docsWithField == null) {
       bits = new Bits.MatchNoBits(maxDoc);
     } else if (docsWithField instanceof FixedBitSet) {
       final int numSet = ((FixedBitSet) docsWithField).cardinality();
       if (numSet >= maxDoc) {
         // The cardinality of the BitSet is maxDoc if all documents have a value.
         assert numSet == maxDoc;
         bits = new Bits.MatchAllBits(maxDoc);
       } else {
         bits = docsWithField;
       }
     } else {
       bits = docsWithField;
     }
     caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, parser), new BitsEntry(bits));
   }

   private static class HoldsOneThing<T> {
     private T it;

     public void set(T it) {
       this.it = it;
     }

     public T get() {
       return it;
     }
   }

   private static class GrowableWriterAndMinValue {
     GrowableWriterAndMinValue(GrowableWriter array, long minValue) {
       this.writer = array;
       this.minValue = minValue;
     }
     public GrowableWriter writer;
     public long minValue;
   }

   @Override
   public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException {
     final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
     if (fieldInfo == null) {
       // field does not exist or has no value
       return new Bits.MatchNoBits(reader.maxDoc());
     }

     if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
       // doc values case
     } else if (parser instanceof PointParser) {
       // points case
     } else {
       // postings case
       if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
         return new Bits.MatchNoBits(reader.maxDoc());
       }
     }
     BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, parser));
     return bitsEntry.bits;
   }

   static class BitsEntry implements Accountable {
     final Bits bits;

     BitsEntry(Bits bits) {
       this.bits = bits;
     }

     @Override
     public long ramBytesUsed() {
       long base = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
       if (bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) {
         return base;
       } else {
         return base + (bits.length() >>> 3);
       }
     }
   }

   static final class DocsWithFieldCache extends Cache {
     DocsWithFieldCache(FieldCacheImpl wrapper) {
       super(wrapper);
     }

     @Override
     protected BitsEntry createValue(LeafReader reader, CacheKey key) throws IOException {
       final String field = key.field;
       final Parser parser = (Parser) key.custom;
       FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
       if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
         return createValueDocValues(reader, field);
       } else if (parser instanceof PointParser) {
         return createValuePoints(reader, field);
       } else {
         return createValuePostings(reader, field);
       }
     }

     private BitsEntry createValueDocValues(LeafReader reader, String field) throws IOException {
       FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);

       DocValuesType dvType = fieldInfo.getDocValuesType();
       DocIdSetIterator iterator;
       switch(dvType) {
       case NUMERIC:
         iterator = reader.getNumericDocValues(field);
         break;
       case BINARY:
         iterator = reader.getBinaryDocValues(field);
         break;
       case SORTED:
         iterator = reader.getSortedDocValues(field);
         break;
       case SORTED_NUMERIC:
         iterator = reader.getSortedNumericDocValues(field);
         break;
       case SORTED_SET:
         iterator = reader.getSortedSetDocValues(field);
         break;
       default:
         throw new AssertionError();
       }

       FixedBitSet bits = new FixedBitSet(reader.maxDoc());
       while (true) {
         int docID = iterator.nextDoc();
         if (docID == DocIdSetIterator.NO_MORE_DOCS) {
           break;
         }
         bits.set(docID);
       }

       return new BitsEntry(bits);
     }

     private BitsEntry createValuePoints(LeafReader reader, String field) throws IOException {
       final int maxDoc = reader.maxDoc();
       PointValues values = reader.getPointValues(field);
       assert values != null;
       assert values.size() > 0;

       final int docCount = values.getDocCount();
       assert docCount <= maxDoc;
       if (docCount == maxDoc) {
         // Fast case: all docs have this field:
         return new BitsEntry(new Bits.MatchAllBits(maxDoc));
       }

       // otherwise a no-op uninvert!
       Uninvert u = new Uninvert(true) {
         @Override
         protected TermsEnum termsEnum(Terms terms) throws IOException {
           throw new AssertionError();
         }

         @Override
         protected void visitTerm(BytesRef term) {}

         @Override
         protected void visitDoc(int docID) {}
       };
       u.uninvert(reader, field);
       return new BitsEntry(u.docsWithField);
     }

     // TODO: it is dumb that uninverting code is duplicated here in this method!!
     private BitsEntry createValuePostings(LeafReader reader, String field) throws IOException {
       final int maxDoc = reader.maxDoc();

       // Visit all docs that have terms for this field
       FixedBitSet res = null;
       Terms terms = reader.terms(field);
       if (terms != null) {
         final int termsDocCount = terms.getDocCount();
         assert termsDocCount <= maxDoc;
         if (termsDocCount == maxDoc) {
           // Fast case: all docs have this field:
           return new BitsEntry(new Bits.MatchAllBits(maxDoc));
         }
         final TermsEnum termsEnum = terms.iterator();
         PostingsEnum docs = null;
         while(true) {
           final BytesRef term = termsEnum.next();
           if (term == null) {
             break;
           }
           if (res == null) {
             // lazy init
             res = new FixedBitSet(maxDoc);
           }

           docs = termsEnum.postings(docs, PostingsEnum.NONE);
           // TODO: use bulk API
           while (true) {
             final int docID = docs.nextDoc();
             if (docID == DocIdSetIterator.NO_MORE_DOCS) {
               break;
             }
             res.set(docID);
           }
         }
       }
       if (res == null) {
         return new BitsEntry(new Bits.MatchNoBits(maxDoc));
       }
       final int numSet = res.cardinality();
       if (numSet >= maxDoc) {
         // The cardinality of the BitSet is maxDoc if all documents have a value.
         assert numSet == maxDoc;
         return new BitsEntry(new Bits.MatchAllBits(maxDoc));
       }
       return new BitsEntry(res);
     }
   }

   @Override
   public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser) throws IOException {
     if (parser == null) {
       throw new NullPointerException();
     }
     final NumericDocValues valuesIn = reader.getNumericDocValues(field);
     if (valuesIn != null) {
       return valuesIn;
     } else {
       final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
       if (info == null) {
         return DocValues.emptyNumeric();
       } else if (info.getDocValuesType() != DocValuesType.NONE) {
         throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
       }

       if (parser instanceof PointParser) {
         // points case
         // no points in this segment
         if (info.getPointDimensionCount() == 0) {
           return DocValues.emptyNumeric();
         }
         if (info.getPointDimensionCount() != 1) {
           throw new IllegalStateException("Type mismatch: " + field + " was indexed with dimensions=" + info.getPointDimensionCount());
         }
         PointValues values = reader.getPointValues(field);
         // no actual points for this field (e.g. all points deleted)
         if (values == null || values.size() == 0) {
           return DocValues.emptyNumeric();
         }
         // not single-valued
         if (values.size() != values.getDocCount()) {
           throw new IllegalStateException("Type mismatch: " + field + " was indexed with multiple values, numValues=" + values.size() + ",numDocs=" + values.getDocCount());
         }
       } else {
         // postings case
         // not indexed
         if (info.getIndexOptions() == IndexOptions.NONE) {
           return DocValues.emptyNumeric();
         }
       }

       return ((LongsFromArray) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser))).iterator();
     }
   }

   public static class LongsFromArray implements Accountable {
     private final PackedInts.Reader values;
     private final long minValue;
     private final Bits docsWithField;
     private final String field;

     public LongsFromArray(String field, PackedInts.Reader values, long minValue, Bits docsWithField) { // TODO: accept null docsWithField?
       this.field = field;
       this.values = values;
       this.minValue = minValue;
       this.docsWithField = docsWithField;
     }

     @Override
     public long ramBytesUsed() {
       return values.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJECT_REF + Long.BYTES;
     }

     public NumericDocValues iterator() {
       return new NumericDocValues() {
         int docID = -1;

         @Override
         public int docID() {
           return docID;
         }

         @Override
         public int nextDoc() {
           while (true) {
             docID++;
             if (docID >= values.size()) {
               docID = NO_MORE_DOCS;
               return docID;
             }
             if (docsWithField.get(docID)) {
               return docID;
             }
           }
         }

         @Override
         public int advance(int target) {
           if (target < values.size()) {
             docID = target;
             if (docsWithField.get(docID)) {
               return docID;
             } else{
               return nextDoc();
             }
           } else {
             docID = NO_MORE_DOCS;
             return docID;
           }
         }

         @Override
         public boolean advanceExact(int target) throws IOException {
           docID = target;
           return docsWithField.get(docID);
         }

         @Override
         public long cost() {
           return values.size();
         }

         @Override
         public long longValue() {
           return minValue + values.get(docID);
         }
       };
     }
   }

   static final class LongCache extends Cache {
     LongCache(FieldCacheImpl wrapper) {
       super(wrapper);
     }

     @Override
     protected Accountable createValue(final LeafReader reader, CacheKey key)
         throws IOException {

       final Parser parser = (Parser) key.custom;

       final HoldsOneThing<GrowableWriterAndMinValue> valuesRef = new HoldsOneThing<>();

       Uninvert u = new Uninvert(parser instanceof PointParser) {
           private long minValue;
           private long currentValue;
           private GrowableWriter values;

           @Override
           public void visitTerm(BytesRef term) {
             currentValue = parser.parseValue(term);
             if (values == null) {
               // Lazy alloc so for the numeric field case
               // (which will hit a NumberFormatException
               // when we first try the DEFAULT_INT_PARSER),
               // we don't double-alloc:
               int startBitsPerValue;
               // Make sure than missing values (0) can be stored without resizing
               if (currentValue < 0) {
                 minValue = currentValue;
                 startBitsPerValue = minValue == Long.MIN_VALUE ? 64 : PackedInts.bitsRequired(-minValue);
               } else {
                 minValue = 0;
                 startBitsPerValue = PackedInts.bitsRequired(currentValue);
               }
               values = new GrowableWriter(startBitsPerValue, reader.maxDoc(), PackedInts.FAST);
               if (minValue != 0) {
                 values.fill(0, values.size(), -minValue); // default value must be 0
               }
               valuesRef.set(new GrowableWriterAndMinValue(values, minValue));
             }
           }

           @Override
           public void visitDoc(int docID) {
             values.set(docID, currentValue - minValue);
           }

           @Override
           protected TermsEnum termsEnum(Terms terms) throws IOException {
             return parser.termsEnum(terms);
           }
         };

       u.uninvert(reader, key.field);
       wrapper.setDocsWithField(reader, key.field, u.docsWithField, parser);
       GrowableWriterAndMinValue values = valuesRef.get();
       Bits docsWithField = u.docsWithField == null ? new Bits.MatchNoBits(reader.maxDoc()) : u.docsWithField;
       if (values == null) {
         return new LongsFromArray(key.field, new PackedInts.NullReader(reader.maxDoc()), 0L, docsWithField);
       }
       return new LongsFromArray(key.field, values.writer.getMutable(), values.minValue, docsWithField);
     }
   }

   public static class SortedDocValuesImpl implements Accountable {
     private final PagedBytes.Reader bytes;
     private final PackedLongValues termOrdToBytesOffset;
     private final PackedInts.Reader docToTermOrd;
     private final int numOrd;

     public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
       this.bytes = bytes;
       this.docToTermOrd = docToTermOrd;
       this.termOrdToBytesOffset = termOrdToBytesOffset;
       this.numOrd = numOrd;
     }

     public SortedDocValues iterator() {
       return new Iter();
     }

     public class Iter extends SortedDocValues {
       private int docID = -1;
       private final BytesRef term = new BytesRef();

       /** @lucene.internal Specific to this implementation and subject to change.  For internal optimization only. */
       public int getOrd(int docID) {
         // Subtract 1, matching the 1+ord we did when
         // storing, so that missing values, which are 0 in the
         // packed ints, are returned as -1 ord:
         return (int) docToTermOrd.get(docID)-1;
       }

       @Override
       public int docID() {
         return docID;
       }

       @Override
       public int nextDoc() {
         while (true) {
           docID++;
           if (docID >= docToTermOrd.size()) {
             docID = NO_MORE_DOCS;
             return docID;
           }
           if (docToTermOrd.get(docID) != 0) {
             return docID;
           }
         }
       }

       @Override
       public int advance(int target) {
         if (target < docToTermOrd.size()) {
           docID = target;
           if (docToTermOrd.get(docID) != 0) {
             return docID;
           } else{
             return nextDoc();
           }
         } else {
           docID = NO_MORE_DOCS;
           return docID;
         }
       }

       @Override
       public boolean advanceExact(int target) throws IOException {
         docID = target;
         return docToTermOrd.get(docID) != 0;
       }

       @Override
       public long cost() {
         return 0;
       }

       @Override
       public int ordValue() {
         // Subtract 1, matching the 1+ord we did when
         // storing, so that missing values, which are 0 in the
         // packed ints, are returned as -1 ord:
         return (int) docToTermOrd.get(docID)-1;
       }

       @Override
       public int getValueCount() {
         return numOrd;
       }

       @Override
       public BytesRef lookupOrd(int ord) {
         if (ord < 0) {
           throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
         }
         bytes.fill(term, termOrdToBytesOffset.get(ord));
         return term;
       }
     }

     @Override
     public long ramBytesUsed() {
       return bytes.ramBytesUsed() +
              termOrdToBytesOffset.ramBytesUsed() +
              docToTermOrd.ramBytesUsed() +
              3*RamUsageEstimator.NUM_BYTES_OBJECT_REF +
              Integer.BYTES;
     }

     @Override
     public Collection<Accountable> getChildResources() {
       List<Accountable> resources = new ArrayList<>(3);
       resources.add(Accountables.namedAccountable("term bytes", bytes));
       resources.add(Accountables.namedAccountable("ord -> term", termOrdToBytesOffset));
       resources.add(Accountables.namedAccountable("doc -> ord", docToTermOrd));
       return Collections.unmodifiableList(resources);
     }
   }

   public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException {
     return getTermsIndex(reader, field, PackedInts.FAST);
   }

   public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
     SortedDocValues valuesIn = reader.getSortedDocValues(field);
     if (valuesIn != null) {
       // Not cached here by FieldCacheImpl (cached instead
       // per-thread by SegmentReader):
       return valuesIn;
     } else {
       final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
       if (info == null) {
         return DocValues.emptySorted();
       } else if (info.getDocValuesType() != DocValuesType.NONE) {
         // we don't try to build a sorted instance from numeric/binary doc
         // values because dedup can be very costly
         throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
       } else if (info.getIndexOptions() == IndexOptions.NONE) {
         return DocValues.emptySorted();
       }
       SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
       return impl.iterator();
     }
   }

   static class SortedDocValuesCache extends Cache {
     SortedDocValuesCache(FieldCacheImpl wrapper) {
       super(wrapper);
     }

     @Override
     protected Accountable createValue(LeafReader reader, CacheKey key)
         throws IOException {

       final int maxDoc = reader.maxDoc();

       Terms terms = reader.terms(key.field);

       final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

       final PagedBytes bytes = new PagedBytes(15);

       int startTermsBPV;

       // TODO: use Uninvert?
       if (terms != null) {
         // Try for coarse estimate for number of bits; this
         // should be an underestimate most of the time, which
         // is fine -- GrowableWriter will reallocate as needed
         long numUniqueTerms = terms.size();
         if (numUniqueTerms != -1L) {
           if (numUniqueTerms > maxDoc) {
             throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead");
           }

           startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
         } else {
           startTermsBPV = 1;
         }
       } else {
         startTermsBPV = 1;
       }

       PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
       final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

       int termOrd = 0;

       // TODO: use Uninvert?

       if (terms != null) {
         final TermsEnum termsEnum = terms.iterator();
         PostingsEnum docs = null;

         while(true) {
           final BytesRef term = termsEnum.next();
           if (term == null) {
             break;
           }
           if (termOrd >= maxDoc) {
             throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead");
           }

           termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
           docs = termsEnum.postings(docs, PostingsEnum.NONE);
           while (true) {
             final int docID = docs.nextDoc();
             if (docID == DocIdSetIterator.NO_MORE_DOCS) {
               break;
             }
             // Store 1+ ord into packed bits
             docToTermOrd.set(docID, 1+termOrd);
           }
           termOrd++;
         }
       }

       // maybe an int-only impl?
       return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.build(), docToTermOrd.getMutable(), termOrd);
     }
   }

   public static class BinaryDocValuesImpl implements Accountable {
     private final PagedBytes.Reader bytes;
     private final PackedInts.Reader docToOffset;
     private final Bits docsWithField;

     public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset, Bits docsWithField) {
       this.bytes = bytes;
       this.docToOffset = docToOffset;
       this.docsWithField = docsWithField;
     }

     public BinaryDocValues iterator() {
       return new BinaryDocValues() {

         final BytesRef term = new BytesRef();

         int docID = -1;

         @Override
         public int docID() {
           return docID;
         }

         @Override
         public int nextDoc() {
           while (true) {
             docID++;
             if (docID >= docToOffset.size()) {
               docID = NO_MORE_DOCS;
               return docID;
             }
             if (docsWithField.get(docID)) {
               return docID;
             }
           }
         }

         @Override
         public int advance(int target) {
           if (target < docToOffset.size()) {
             docID = target;
             if (docsWithField.get(docID)) {
               return docID;
             } else{
               return nextDoc();
             }
           } else {
             docID = NO_MORE_DOCS;
             return docID;
           }
         }

         @Override
         public boolean advanceExact(int target) throws IOException {
           docID = target;
           return docsWithField.get(docID);
         }

         @Override
         public long cost() {
           return 0;
         }

         @Override
         public BytesRef binaryValue() {
           final long pointer = docToOffset.get(docID);
           if (pointer == 0) {
             term.length = 0;
           } else {
             bytes.fill(term, pointer);
           }
           return term;
         }
       };
     }

     @Override
     public long ramBytesUsed() {
       return bytes.ramBytesUsed() + docToOffset.ramBytesUsed() + 2*RamUsageEstimator.NUM_BYTES_OBJECT_REF;
     }

     @Override
     public Collection<Accountable> getChildResources() {
       List<Accountable> resources = new ArrayList<>(2);
       resources.add(Accountables.namedAccountable("term bytes", bytes));
       resources.add(Accountables.namedAccountable("addresses", docToOffset));
       return Collections.unmodifiableList(resources);
     }
   }

   // TODO: this if DocTermsIndex was already created, we
   // should share it...
   public BinaryDocValues getTerms(LeafReader reader, String field) throws IOException {
     return getTerms(reader, field, PackedInts.FAST);
   }

   public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
     BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
     if (valuesIn == null) {
       valuesIn = reader.getSortedDocValues(field);
     }

     if (valuesIn != null) {
       // Not cached here by FieldCacheImpl (cached instead
       // per-thread by SegmentReader):
       return valuesIn;
     }

     final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
     if (info == null) {
       return DocValues.emptyBinary();
     } else if (info.getDocValuesType() != DocValuesType.NONE) {
       throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
     } else if (info.getIndexOptions() == IndexOptions.NONE) {
       return DocValues.emptyBinary();
     }

     BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
     return impl.iterator();
   }

   static final class BinaryDocValuesCache extends Cache {
     BinaryDocValuesCache(FieldCacheImpl wrapper) {
       super(wrapper);
     }

     @Override
     protected Accountable createValue(LeafReader reader, CacheKey key)
         throws IOException {

       // TODO: would be nice to first check if DocTermsIndex
       // was already cached for this field and then return
       // that instead, to avoid insanity

       final int maxDoc = reader.maxDoc();
       Terms terms = reader.terms(key.field);

       final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

       final int termCountHardLimit = maxDoc;

       // Holds the actual term data, expanded.
       final PagedBytes bytes = new PagedBytes(15);

       int startBPV;

       if (terms != null) {
         // Try for coarse estimate for number of bits; this
         // should be an underestimate most of the time, which
         // is fine -- GrowableWriter will reallocate as needed
         long numUniqueTerms = terms.size();
         if (numUniqueTerms != -1L) {
           if (numUniqueTerms > termCountHardLimit) {
             numUniqueTerms = termCountHardLimit;
           }
           startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
         } else {
           startBPV = 1;
         }
       } else {
         startBPV = 1;
       }

       final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);

       // pointer==0 means not set
       bytes.copyUsingLengthPrefix(new BytesRef());

       if (terms != null) {
         int termCount = 0;
         final TermsEnum termsEnum = terms.iterator();
         PostingsEnum docs = null;
         while(true) {
           if (termCount++ == termCountHardLimit) {
             // app is misusing the API (there is more than
             // one term per doc); in this case we make best
             // effort to load what we can (see LUCENE-2142)
             break;
           }

           final BytesRef term = termsEnum.next();
           if (term == null) {
             break;
           }
           final long pointer = bytes.copyUsingLengthPrefix(term);
           docs = termsEnum.postings(docs, PostingsEnum.NONE);
           while (true) {
             final int docID = docs.nextDoc();
             if (docID == DocIdSetIterator.NO_MORE_DOCS) {
               break;
             }
             docToOffset.set(docID, pointer);
           }
         }
       }

       final PackedInts.Reader offsetReader = docToOffset.getMutable();
       Bits docsWithField = new Bits() {
         @Override
         public boolean get(int index) {
           return offsetReader.get(index) != 0;
         }

         @Override
         public int length() {
           return maxDoc;
         }
       };

       wrapper.setDocsWithField(reader, key.field, docsWithField, null);
       // maybe an int-only impl?
       return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader, docsWithField);
     }
   }

   // TODO: this if DocTermsIndex was already created, we
   // should share it...
   public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException {
     // not a general purpose filtering mechanism...
     assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX;

     SortedSetDocValues dv = reader.getSortedSetDocValues(field);
     if (dv != null) {
       return dv;
     }

     SortedDocValues sdv = reader.getSortedDocValues(field);
     if (sdv != null) {
       return DocValues.singleton(sdv);
     }

     final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
     if (info == null) {
       return DocValues.emptySortedSet();
     } else if (info.getDocValuesType() != DocValuesType.NONE) {
       throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
     } else if (info.getIndexOptions() == IndexOptions.NONE) {
       return DocValues.emptySortedSet();
     }

     // ok we need to uninvert. check if we can optimize a bit.

     Terms terms = reader.terms(field);
     if (terms == null) {
       return DocValues.emptySortedSet();
     } else {
       // if #postings = #docswithfield we know that the field is "single valued enough".
       // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency.
       // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf
       long numPostings = terms.getSumDocFreq();
       if (numPostings != -1 && numPostings == terms.getDocCount()) {
         return DocValues.singleton(getTermsIndex(reader, field));
       }
     }

     DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix));
     return dto.iterator(reader);
   }

   static final class DocTermOrdsCache extends Cache {
     DocTermOrdsCache(FieldCacheImpl wrapper) {
       super(wrapper);
     }

     @Override
     protected Accountable createValue(LeafReader reader, CacheKey key)
         throws IOException {
       BytesRef prefix = (BytesRef) key.custom;
       return new DocTermOrds(reader, null, key.field, prefix);
     }
   }

 }