solr/core/src/java/org/apache/solr/request/UnInvertedField.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.solr.request;

 import java.io.IOException;
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicLong;

 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.uninverting.DocTermOrds;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.component.FieldFacetStats;
 import org.apache.solr.handler.component.StatsValues;
 import org.apache.solr.handler.component.StatsValuesFactory;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.schema.TrieField;
 import org.apache.solr.search.BitDocSet;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.SolrCache;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.LongPriorityQueue;
 import org.apache.solr.util.PrimUtils;

 /**
  *
  * Final form of the un-inverted field:
  *   Each document points to a list of term numbers that are contained in that document.
  *
  *   Term numbers are in sorted order, and are encoded as variable-length deltas from the
  *   previous term number.  Real term numbers start at 2 since 0 and 1 are reserved.  A
  *   term number of 0 signals the end of the termNumber list.
  *
  *   There is a single int[maxDoc()] which either contains a pointer into a byte[] for
  *   the termNumber lists, or directly contains the termNumber list if it fits in the 4
  *   bytes of an integer.  If the first byte in the integer is 1, the next 3 bytes
  *   are a pointer into a byte[] where the termNumber list starts.
  *
  *   There are actually 256 byte arrays, to compensate for the fact that the pointers
  *   into the byte arrays are only 3 bytes long.  The correct byte array for a document
  *   is a function of it's id.
  *
  *   To save space and speed up faceting, any term that matches enough documents will
  *   not be un-inverted... it will be skipped while building the un-inverted field structure,
  *   and will use a set intersection method during faceting.
  *
  *   To further save memory, the terms (the actual string values) are not all stored in
  *   memory, but a TermIndex is used to convert term numbers to term values only
  *   for the terms needed after faceting has completed.  Only every 128th term value
  *   is stored, along with it's corresponding term number, and this is used as an
  *   index to find the closest term and iterate until the desired number is hit (very
  *   much like Lucene's own internal term index).
  *
  */
 public class UnInvertedField extends DocTermOrds {
   private static int TNUM_OFFSET=2;

   static class TopTerm {
     BytesRef term;
     int termNum;

     long memSize() {
       return 8 +   // obj header
              8 + 8 +term.length +  //term
              4;    // int
     }
   }

   long memsz;
   final AtomicLong use = new AtomicLong(); // number of uses

   int[] maxTermCounts = new int[1024];

   final Map<Integer,TopTerm> bigTerms = new LinkedHashMap<>();

   private SolrIndexSearcher.DocsEnumState deState;
   private final SolrIndexSearcher searcher;
   private final boolean isPlaceholder;

   private static UnInvertedField uifPlaceholder = new UnInvertedField();

   private UnInvertedField() { // Dummy for synchronization.
     super("fake", 0, 0); // cheapest initialization I can find.
     isPlaceholder = true;
     searcher = null;
    }

   @Override
   protected void visitTerm(TermsEnum te, int termNum) throws IOException {

     if (termNum >= maxTermCounts.length) {
       // resize by doubling - for very large number of unique terms, expanding
       // by 4K and resultant GC will dominate uninvert times.  Resize at end if material
       int[] newMaxTermCounts = new int[maxTermCounts.length*2];
       System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
       maxTermCounts = newMaxTermCounts;
     }

     final BytesRef term = te.term();

     if (te.docFreq() > maxTermDocFreq) {
       TopTerm topTerm = new TopTerm();
       topTerm.term = BytesRef.deepCopyOf(term);
       topTerm.termNum = termNum;
       bigTerms.put(topTerm.termNum, topTerm);

       if (deState == null) {
         deState = new SolrIndexSearcher.DocsEnumState();
         deState.fieldName = field;
         deState.liveDocs = searcher.getAtomicReader().getLiveDocs();
         deState.termsEnum = te;  // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
         deState.docsEnum = docsEnum;
         deState.minSetSizeCached = maxTermDocFreq;
       }

       docsEnum = deState.docsEnum;
       DocSet set = searcher.getDocSet(deState);
       maxTermCounts[termNum] = set.size();
     }
   }

   @Override
   protected void setActualDocFreq(int termNum, int docFreq) {
     maxTermCounts[termNum] = docFreq;
   }

   public long memSize() {
     // can cache the mem size since it shouldn't change
     if (memsz!=0) return memsz;
     long sz = super.ramUsedInBytes();
     sz += 8*8 + 32; // local fields
     sz += bigTerms.size() * 64;
     for (TopTerm tt : bigTerms.values()) {
       sz += tt.memSize();
     }
     if (maxTermCounts != null)
       sz += maxTermCounts.length * 4;
     if (indexedTermsArray != null) {
       // assume 8 byte references?
       sz += 8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings;
     }
     memsz = sz;
     return sz;
   }

   public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
     super(field,
           // threshold, over which we use set intersections instead of counting
           // to (1) save memory, and (2) speed up faceting.
           // Add 2 for testing purposes so that there will always be some terms under
           // the threshold even when the index is very
           // small.
           searcher.maxDoc()/20 + 2,
           DEFAULT_INDEX_INTERVAL_BITS);
     //System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc());

     isPlaceholder = false;
     final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field));
     this.searcher = searcher;
     try {
       AtomicReader r = searcher.getAtomicReader();
       uninvert(r, r.getLiveDocs(), prefix == null ? null : new BytesRef(prefix));
     } catch (IllegalStateException ise) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise.getMessage());
     }
     if (tnums != null) {
       for(byte[] target : tnums) {
         if (target != null && target.length > (1<<24)*.9) {
           SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length);
         }
       }
     }

     // free space if outrageously wasteful (tradeoff memory/cpu)
     if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
       int[] newMaxTermCounts = new int[numTermsInField];
       System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
       maxTermCounts = newMaxTermCounts;
     }

     SolrCore.log.info("UnInverted multi-valued field " + toString());
     //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index);
   }

   public int getNumTerms() {
     return numTermsInField;
   }

   public NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
     use.incrementAndGet();

     FieldType ft = searcher.getSchema().getFieldType(field);

     NamedList<Integer> res = new NamedList<>();  // order is important

     DocSet docs = baseDocs;
     int baseSize = docs.size();
     int maxDoc = searcher.maxDoc();

     //System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField);
     if (baseSize >= mincount) {

       final int[] index = this.index;
       // tricky: we add more more element than we need because we will reuse this array later
       // for ordering term ords before converting to term labels.
       final int[] counts = new int[numTermsInField + 1];

       //
       // If there is prefix, find it's start and end term numbers
       //
       int startTerm = 0;
       int endTerm = numTermsInField;  // one past the end

       TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());
       if (te != null && prefix != null && prefix.length() > 0) {
         final BytesRef prefixBr = new BytesRef(prefix);
         if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
           startTerm = numTermsInField;
         } else {
           startTerm = (int) te.ord();
         }
         prefixBr.append(UnicodeUtil.BIG_TERM);
         if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
           endTerm = numTermsInField;
         } else {
           endTerm = (int) te.ord();
         }
       }

       /***********
       // Alternative 2: get the docSet of the prefix (could take a while) and
       // then do the intersection with the baseDocSet first.
       if (prefix != null && prefix.length() > 0) {
         docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
         // The issue with this method are problems of returning 0 counts for terms w/o
         // the prefix.  We can't just filter out those terms later because it may
         // mean that we didn't collect enough terms in the queue (in the sorted case).
       }
       ***********/

       boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0
               && startTerm==0 && endTerm==numTermsInField
               && docs instanceof BitDocSet;

       if (doNegative) {
         FixedBitSet bs = ((BitDocSet)docs).getBits().clone();
         bs.flip(0, maxDoc);
         // TODO: when iterator across negative elements is available, use that
         // instead of creating a new bitset and inverting.
         docs = new BitDocSet(bs, maxDoc - baseSize);
         // simply negating will mean that we have deleted docs in the set.
         // that should be OK, as their entries in our table should be empty.
         //System.out.println("  NEG");
       }

       // For the biggest terms, do straight set intersections
       for (TopTerm tt : bigTerms.values()) {
         //System.out.println("  do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString());
         // TODO: counts could be deferred if sorted==false
         if (tt.termNum >= startTerm && tt.termNum < endTerm) {
           counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs);
           //System.out.println("    count=" + counts[tt.termNum]);
         } else {
           //System.out.println("SKIP term=" + tt.termNum);
         }
       }

       // TODO: we could short-circuit counting altogether for sorted faceting
       // where we already have enough terms from the bigTerms

       // TODO: we could shrink the size of the collection array, and
       // additionally break when the termNumber got above endTerm, but
       // it would require two extra conditionals in the inner loop (although
       // they would be predictable for the non-prefix case).
       // Perhaps a different copy of the code would be warranted.

       if (termInstances > 0) {
         DocIterator iter = docs.iterator();
         while (iter.hasNext()) {
           int doc = iter.nextDoc();
           //System.out.println("iter doc=" + doc);
           int code = index[doc];

           if ((code & 0xff)==1) {
             //System.out.println("  ptr");
             int pos = code>>>8;
             int whichArray = (doc >>> 16) & 0xff;
             byte[] arr = tnums[whichArray];
             int tnum = 0;
             for(;;) {
               int delta = 0;
               for(;;) {
                 byte b = arr[pos++];
                 delta = (delta << 7) | (b & 0x7f);
                 if ((b & 0x80) == 0) break;
               }
               if (delta == 0) break;
               tnum += delta - TNUM_OFFSET;
               //System.out.println("    tnum=" + tnum);
               counts[tnum]++;
             }
           } else {
             //System.out.println("  inlined");
             int tnum = 0;
             int delta = 0;
             for (;;) {
               delta = (delta << 7) | (code & 0x7f);
               if ((code & 0x80)==0) {
                 if (delta==0) break;
                 tnum += delta - TNUM_OFFSET;
                 //System.out.println("    tnum=" + tnum);
                 counts[tnum]++;
                 delta = 0;
               }
               code >>>= 8;
             }
           }
         }
       }
       final CharsRef charsRef = new CharsRef();

       int off=offset;
       int lim=limit>=0 ? limit : Integer.MAX_VALUE;

       if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
         int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
         maxsize = Math.min(maxsize, numTermsInField);
         LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE);

         int min=mincount-1;  // the smallest value in the top 'N' values
         //System.out.println("START=" + startTerm + " END=" + endTerm);
         for (int i=startTerm; i<endTerm; i++) {
           int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
           if (c>min) {
             // NOTE: we use c>min rather than c>=min as an optimization because we are going in
             // index order, so we already know that the keys are ordered.  This can be very
             // important if a lot of the counts are repeated (like zero counts would be).

             // smaller term numbers sort higher, so subtract the term number instead
             long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);
             boolean displaced = queue.insert(pair);
             if (displaced) min=(int)(queue.top() >>> 32);
           }
         }

         // now select the right page from the results

         // if we are deep paging, we don't have to order the highest "offset" counts.
         int collectCount = Math.max(0, queue.size() - off);
         assert collectCount <= lim;

         // the start and end indexes of our list "sorted" (starting with the highest value)
         int sortedIdxStart = queue.size() - (collectCount - 1);
         int sortedIdxEnd = queue.size() + 1;
         final long[] sorted = queue.sort(collectCount);

         final int[] indirect = counts;  // reuse the counts array for the index into the tnums array
         assert indirect.length >= sortedIdxEnd;

         for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
           long pair = sorted[i];
           int c = (int)(pair >>> 32);
           int tnum = Integer.MAX_VALUE - (int)pair;

           indirect[i] = i;   // store the index for indirect sorting
           sorted[i] = tnum;  // reuse the "sorted" array to store the term numbers for indirect sorting

           // add a null label for now... we'll fill it in later.
           res.add(null, c);
         }

         // now sort the indexes by the term numbers
         PrimUtils.sort(sortedIdxStart, sortedIdxEnd, indirect, new PrimUtils.IntComparator() {
           @Override
           public int compare(int a, int b) {
             return (int)sorted[a] - (int)sorted[b];
           }

           @Override
           public boolean lessThan(int a, int b) {
             return sorted[a] < sorted[b];
           }

           @Override
           public boolean equals(int a, int b) {
             return sorted[a] == sorted[b];
           }
         });

         // convert the term numbers to term values and set
         // as the label
         //System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd);
         for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
           int idx = indirect[i];
           int tnum = (int)sorted[idx];
           final String label = getReadableValue(getTermValue(te, tnum), ft, charsRef);
           //System.out.println("  label=" + label);
           res.setName(idx - sortedIdxStart, label);
         }

       } else {
         // add results in index order
         int i=startTerm;
         if (mincount<=0) {
           // if mincount<=0, then we won't discard any terms and we know exactly
           // where to start.
           i=startTerm+off;
           off=0;
         }

         for (; i<endTerm; i++) {
           int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
           if (c<mincount || --off>=0) continue;
           if (--lim<0) break;

           final String label = getReadableValue(getTermValue(te, i), ft, charsRef);
           res.add(label, c);
         }
       }
     }


     if (missing) {
       // TODO: a faster solution for this?
       res.add(null, SimpleFacets.getFieldMissingCount(searcher, baseDocs, field));
     }

     //System.out.println("  res=" + res);

     return res;
   }

   /**
    * Collect statistics about the UninvertedField.  Code is very similar to {@link #getCounts(org.apache.solr.search.SolrIndexSearcher, org.apache.solr.search.DocSet, int, int, Integer, boolean, String, String)}
    * It can be used to calculate stats on multivalued fields.
    * <p/>
    * This method is mainly used by the {@link org.apache.solr.handler.component.StatsComponent}.
    *
    * @param searcher The Searcher to use to gather the statistics
    * @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on
    * @param calcDistinct whether distinct values should be collected and counted
    * @param facet One or more fields to facet on.
    * @return The {@link org.apache.solr.handler.component.StatsValues} collected
    * @throws IOException If there is a low-level I/O error.
    */
   public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, boolean calcDistinct, String[] facet) throws IOException {
     //this function is ripped off nearly wholesale from the getCounts function to use
     //for multiValued fields within the StatsComponent.  may be useful to find common
     //functionality between the two and refactor code somewhat
     use.incrementAndGet();

     SchemaField sf = searcher.getSchema().getField(field);
    // FieldType ft = sf.getType();

     StatsValues allstats = StatsValuesFactory.createStatsValues(sf, calcDistinct);


     DocSet docs = baseDocs;
     int baseSize = docs.size();
     int maxDoc = searcher.maxDoc();

     if (baseSize <= 0) return allstats;

     DocSet missing = docs.andNot( searcher.getDocSet(new TermRangeQuery(field, null, null, false, false)) );

     int i = 0;
     final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
     //Initialize facetstats, if facets have been passed in
     SortedDocValues si;
     for (String f : facet) {
       SchemaField facet_sf = searcher.getSchema().getField(f);
       finfo[i] = new FieldFacetStats(searcher, f, sf, facet_sf, calcDistinct);
       i++;
     }

     final int[] index = this.index;
     final int[] counts = new int[numTermsInField];//keep track of the number of times we see each word in the field for all the documents in the docset

     TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());

     boolean doNegative = false;
     if (finfo.length == 0) {
       //if we're collecting statistics with a facet field, can't do inverted counting
       doNegative = baseSize > maxDoc >> 1 && termInstances > 0
               && docs instanceof BitDocSet;
     }

     if (doNegative) {
       FixedBitSet bs = ((BitDocSet) docs).getBits().clone();
       bs.flip(0, maxDoc);
       // TODO: when iterator across negative elements is available, use that
       // instead of creating a new bitset and inverting.
       docs = new BitDocSet(bs, maxDoc - baseSize);
       // simply negating will mean that we have deleted docs in the set.
       // that should be OK, as their entries in our table should be empty.
     }

     // For the biggest terms, do straight set intersections
     for (TopTerm tt : bigTerms.values()) {
       // TODO: counts could be deferred if sorted==false
       if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
         final Term t = new Term(field, tt.term);
         if (finfo.length == 0) {
           counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs);
         } else {
           //COULD BE VERY SLOW
           //if we're collecting stats for facet fields, we need to iterate on all matching documents
           DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(t)).intersection(docs);
           DocIterator iter = bigTermDocSet.iterator();
           while (iter.hasNext()) {
             int doc = iter.nextDoc();
             counts[tt.termNum]++;
             for (FieldFacetStats f : finfo) {
               f.facetTermNum(doc, tt.termNum);
             }
           }
         }
       }
     }


     if (termInstances > 0) {
       DocIterator iter = docs.iterator();
       while (iter.hasNext()) {
         int doc = iter.nextDoc();
         int code = index[doc];

         if ((code & 0xff) == 1) {
           int pos = code >>> 8;
           int whichArray = (doc >>> 16) & 0xff;
           byte[] arr = tnums[whichArray];
           int tnum = 0;
           for (; ;) {
             int delta = 0;
             for (; ;) {
               byte b = arr[pos++];
               delta = (delta << 7) | (b & 0x7f);
               if ((b & 0x80) == 0) break;
             }
             if (delta == 0) break;
             tnum += delta - TNUM_OFFSET;
             counts[tnum]++;
             for (FieldFacetStats f : finfo) {
               f.facetTermNum(doc, tnum);
             }
           }
         } else {
           int tnum = 0;
           int delta = 0;
           for (; ;) {
             delta = (delta << 7) | (code & 0x7f);
             if ((code & 0x80) == 0) {
               if (delta == 0) break;
               tnum += delta - TNUM_OFFSET;
               counts[tnum]++;
               for (FieldFacetStats f : finfo) {
                 f.facetTermNum(doc, tnum);
               }
               delta = 0;
             }
             code >>>= 8;
           }
         }
       }
     }

     // add results in index order
     for (i = 0; i < numTermsInField; i++) {
       int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
       if (c == 0) continue;
       BytesRef value = getTermValue(te, i);

       allstats.accumulate(value, c);
       //as we've parsed the termnum into a value, lets also accumulate fieldfacet statistics
       for (FieldFacetStats f : finfo) {
         f.accumulateTermNum(i, value);
       }
     }

     int c = missing.size();
     allstats.addMissing(c);

     if (finfo.length > 0) {
       for (FieldFacetStats f : finfo) {
         Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
         FieldType facetType = searcher.getSchema().getFieldType(f.name);
         for (Map.Entry<String,StatsValues> entry : facetStatsValues.entrySet()) {
           String termLabel = entry.getKey();
           int missingCount = searcher.numDocs(new TermQuery(new Term(f.name, facetType.toInternal(termLabel))), missing);
           entry.getValue().addMissing(missingCount);
         }
         allstats.addFacet(f.name, facetStatsValues);
       }
     }

     return allstats;

   }

   String getReadableValue(BytesRef termval, FieldType ft, CharsRef charsRef) {
     return ft.indexedToReadable(termval, charsRef).toString();
   }

   /** may return a reused BytesRef */
   BytesRef getTermValue(TermsEnum te, int termNum) throws IOException {
     //System.out.println("getTermValue termNum=" + termNum + " this=" + this + " numTerms=" + numTermsInField);
     if (bigTerms.size() > 0) {
       // see if the term is one of our big terms.
       TopTerm tt = bigTerms.get(termNum);
       if (tt != null) {
         //System.out.println("  return big " + tt.term);
         return tt.term;
       }
     }

     return lookupTerm(te, termNum);
   }

   @Override
   public String toString() {
     final long indexSize = indexedTermsArray == null ? 0 : (8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings); // assume 8 byte references?
     return "{field=" + field
             + ",memSize="+memSize()
             + ",tindexSize="+indexSize
             + ",time="+total_time
             + ",phase1="+phase1_time
             + ",nTerms="+numTermsInField
             + ",bigTerms="+bigTerms.size()
             + ",termInstances="+termInstances
             + ",uses="+use.get()
             + "}";
   }

   //////////////////////////////////////////////////////////////////
   //////////////////////////// caching /////////////////////////////
   //////////////////////////////////////////////////////////////////

   public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
     SolrCache<String,UnInvertedField> cache = searcher.getFieldValueCache();
     if (cache == null) {
       return new UnInvertedField(field, searcher);
     }
     UnInvertedField uif = null;
     Boolean doWait = false;
     synchronized (cache) {
       uif = cache.get(field);
       if (uif == null) {
         cache.put(field, uifPlaceholder); // This thread will load this field, don't let other threads try.
       } else {
         if (uif.isPlaceholder == false) {
           return uif;
         }
         doWait = true; // Someone else has put the place holder in, wait for that to complete.
       }
     }
     while (doWait) {
       try {
         synchronized (cache) {
           uif = cache.get(field); // Should at least return the placeholder, NPE if not is OK.
           if (uif.isPlaceholder == false) { // OK, another thread put this in the cache we should be good.
             return uif;
           }
           cache.wait();
         }
       } catch (InterruptedException e) {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Thread interrupted in getUninvertedField.");
       }
     }

     uif = new UnInvertedField(field, searcher);
     synchronized (cache) {
       cache.put(field, uif); // Note, this cleverly replaces the placeholder.
       cache.notifyAll();
     }

     return uif;
   }
 }