lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.List;

 import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
 import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
 import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
 import org.apache.lucene.util.packed.PackedInts;

 /**
  * A wrapper for CompositeIndexReader providing access to DocValues.
  *
  * <p><b>NOTE</b>: for multi readers, you'll get better
  * performance by gathering the sub readers using
  * {@link IndexReader#getContext()} to get the
  * atomic leaves and then operate per-AtomicReader,
  * instead of using this class.
  *
  * <p><b>NOTE</b>: This is very costly.
  *
  * @lucene.experimental
  * @lucene.internal
  */
 public class MultiDocValues {

   /** No instantiation */
   private MultiDocValues() {}

   /** Returns a NumericDocValues for a reader's norms (potentially merging on-the-fly).
    * <p>
    * This is a slow way to access normalization values. Instead, access them per-segment
    * with {@link AtomicReader#getNormValues(String)}
    * </p>
    */
   public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
     final List<AtomicReaderContext> leaves = r.leaves();
     final int size = leaves.size();
     if (size == 0) {
       return null;
     } else if (size == 1) {
       return leaves.get(0).reader().getNormValues(field);
     }
     FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo(field);
     if (fi == null || fi.hasNorms() == false) {
       return null;
     }

     boolean anyReal = false;
     final NumericDocValues[] values = new NumericDocValues[size];
     final int[] starts = new int[size+1];
     for (int i = 0; i < size; i++) {
       AtomicReaderContext context = leaves.get(i);
       NumericDocValues v = context.reader().getNormValues(field);
       if (v == null) {
         v = NumericDocValues.EMPTY;
       } else {
         anyReal = true;
       }
       values[i] = v;
       starts[i] = context.docBase;
     }
     starts[size] = r.maxDoc();

     assert anyReal;

     return new NumericDocValues() {
       @Override
       public long get(int docID) {
         int subIndex = ReaderUtil.subIndex(docID, starts);
         return values[subIndex].get(docID - starts[subIndex]);
       }
     };
   }

   /** Returns a NumericDocValues for a reader's docvalues (potentially merging on-the-fly)
    * <p>
    * This is a slow way to access numeric values. Instead, access them per-segment
    * with {@link AtomicReader#getNumericDocValues(String)}
    * </p>
    * */
   public static NumericDocValues getNumericValues(final IndexReader r, final String field) throws IOException {
     final List<AtomicReaderContext> leaves = r.leaves();
     final int size = leaves.size();
     if (size == 0) {
       return null;
     } else if (size == 1) {
       return leaves.get(0).reader().getNumericDocValues(field);
     }

     boolean anyReal = false;
     final NumericDocValues[] values = new NumericDocValues[size];
     final int[] starts = new int[size+1];
     for (int i = 0; i < size; i++) {
       AtomicReaderContext context = leaves.get(i);
       NumericDocValues v = context.reader().getNumericDocValues(field);
       if (v == null) {
         v = NumericDocValues.EMPTY;
       } else {
         anyReal = true;
       }
       values[i] = v;
       starts[i] = context.docBase;
     }
     starts[size] = r.maxDoc();

     if (!anyReal) {
       return null;
     } else {
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
           int subIndex = ReaderUtil.subIndex(docID, starts);
           return values[subIndex].get(docID - starts[subIndex]);
         }
       };
     }
   }

   /** Returns a Bits for a reader's docsWithField (potentially merging on-the-fly)
    * <p>
    * This is a slow way to access this bitset. Instead, access them per-segment
    * with {@link AtomicReader#getDocsWithField(String)}
    * </p>
    * */
   public static Bits getDocsWithField(final IndexReader r, final String field) throws IOException {
     final List<AtomicReaderContext> leaves = r.leaves();
     final int size = leaves.size();
     if (size == 0) {
       return null;
     } else if (size == 1) {
       return leaves.get(0).reader().getDocsWithField(field);
     }

     boolean anyReal = false;
     boolean anyMissing = false;
     final Bits[] values = new Bits[size];
     final int[] starts = new int[size+1];
     for (int i = 0; i < size; i++) {
       AtomicReaderContext context = leaves.get(i);
       Bits v = context.reader().getDocsWithField(field);
       if (v == null) {
         v = new Bits.MatchNoBits(context.reader().maxDoc());
         anyMissing = true;
       } else {
         anyReal = true;
         if (v instanceof Bits.MatchAllBits == false) {
           anyMissing = true;
         }
       }
       values[i] = v;
       starts[i] = context.docBase;
     }
     starts[size] = r.maxDoc();

     if (!anyReal) {
       return null;
     } else if (!anyMissing) {
       return new Bits.MatchAllBits(r.maxDoc());
     } else {
       return new MultiBits(values, starts, false);
     }
   }

   /** Returns a BinaryDocValues for a reader's docvalues (potentially merging on-the-fly)
    * <p>
    * This is a slow way to access binary values. Instead, access them per-segment
    * with {@link AtomicReader#getBinaryDocValues(String)}
    * </p>
    */
   public static BinaryDocValues getBinaryValues(final IndexReader r, final String field) throws IOException {
     final List<AtomicReaderContext> leaves = r.leaves();
     final int size = leaves.size();

     if (size == 0) {
       return null;
     } else if (size == 1) {
       return leaves.get(0).reader().getBinaryDocValues(field);
     }

     boolean anyReal = false;
     final BinaryDocValues[] values = new BinaryDocValues[size];
     final int[] starts = new int[size+1];
     for (int i = 0; i < size; i++) {
       AtomicReaderContext context = leaves.get(i);
       BinaryDocValues v = context.reader().getBinaryDocValues(field);
       if (v == null) {
         v = BinaryDocValues.EMPTY;
       } else {
         anyReal = true;
       }
       values[i] = v;
       starts[i] = context.docBase;
     }
     starts[size] = r.maxDoc();

     if (!anyReal) {
       return null;
     } else {
       return new BinaryDocValues() {
         @Override
         public void get(int docID, BytesRef result) {
           int subIndex = ReaderUtil.subIndex(docID, starts);
           values[subIndex].get(docID - starts[subIndex], result);
         }
       };
     }
   }

   /** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things).
    * <p>
    * This is an extremely slow way to access sorted values. Instead, access them per-segment
    * with {@link AtomicReader#getSortedDocValues(String)}
    * </p>
    */
   public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
     final List<AtomicReaderContext> leaves = r.leaves();
     final int size = leaves.size();

     if (size == 0) {
       return null;
     } else if (size == 1) {
       return leaves.get(0).reader().getSortedDocValues(field);
     }

     boolean anyReal = false;
     final SortedDocValues[] values = new SortedDocValues[size];
     final int[] starts = new int[size+1];
     for (int i = 0; i < size; i++) {
       AtomicReaderContext context = leaves.get(i);
       SortedDocValues v = context.reader().getSortedDocValues(field);
       if (v == null) {
         v = SortedDocValues.EMPTY;
       } else {
         anyReal = true;
       }
       values[i] = v;
       starts[i] = context.docBase;
     }
     starts[size] = r.maxDoc();

     if (!anyReal) {
       return null;
     } else {
       TermsEnum enums[] = new TermsEnum[values.length];
       for (int i = 0; i < values.length; i++) {
         enums[i] = values[i].termsEnum();
       }
       OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
       return new MultiSortedDocValues(values, starts, mapping);
     }
   }

   /** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
    * <p>
    * This is an extremely slow way to access sorted values. Instead, access them per-segment
    * with {@link AtomicReader#getSortedSetDocValues(String)}
    * </p>
    */
   public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
     final List<AtomicReaderContext> leaves = r.leaves();
     final int size = leaves.size();

     if (size == 0) {
       return null;
     } else if (size == 1) {
       return leaves.get(0).reader().getSortedSetDocValues(field);
     }

     boolean anyReal = false;
     final SortedSetDocValues[] values = new SortedSetDocValues[size];
     final int[] starts = new int[size+1];
     for (int i = 0; i < size; i++) {
       AtomicReaderContext context = leaves.get(i);
       SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
       if (v == null) {
         v = SortedSetDocValues.EMPTY;
       } else {
         anyReal = true;
       }
       values[i] = v;
       starts[i] = context.docBase;
     }
     starts[size] = r.maxDoc();

     if (!anyReal) {
       return null;
     } else {
       TermsEnum enums[] = new TermsEnum[values.length];
       for (int i = 0; i < values.length; i++) {
         enums[i] = values[i].termsEnum();
       }
       OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
       return new MultiSortedSetDocValues(values, starts, mapping);
     }
   }

   /** maps per-segment ordinals to/from global ordinal space */
   // TODO: use more efficient packed ints structures?
   // TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
   public static class OrdinalMap {
     // cache key of whoever asked for this awful thing
     final Object owner;
     // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
     final MonotonicAppendingLongBuffer globalOrdDeltas;
     // globalOrd -> first segment container
     final AppendingPackedLongBuffer firstSegments;
     // for every segment, segmentOrd -> (globalOrd - segmentOrd)
     final MonotonicAppendingLongBuffer ordDeltas[];

     /**
      * Creates an ordinal map that allows mapping ords to/from a merged
      * space from <code>subs</code>.
      * @param owner a cache key
      * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
      *             not be dense (e.g. can be FilteredTermsEnums}.
      * @throws IOException if an I/O error occurred.
      */
     public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException {
       // create the ordinal mappings by pulling a termsenum over each sub's
       // unique terms, and walking a multitermsenum over those
       this.owner = owner;
       globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
       firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
       ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
       for (int i = 0; i < ordDeltas.length; i++) {
         ordDeltas[i] = new MonotonicAppendingLongBuffer();
       }
       long segmentOrds[] = new long[subs.length];
       ReaderSlice slices[] = new ReaderSlice[subs.length];
       TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
       for (int i = 0; i < slices.length; i++) {
         slices[i] = new ReaderSlice(0, 0, i);
         indexes[i] = new TermsEnumIndex(subs[i], i);
       }
       MultiTermsEnum mte = new MultiTermsEnum(slices);
       mte.reset(indexes);
       long globalOrd = 0;
       while (mte.next() != null) {
         TermsEnumWithSlice matches[] = mte.getMatchArray();
         for (int i = 0; i < mte.getMatchCount(); i++) {
           int segmentIndex = matches[i].index;
           long segmentOrd = matches[i].terms.ord();
           long delta = globalOrd - segmentOrd;
           // for each unique term, just mark the first segment index/delta where it occurs
           if (i == 0) {
             firstSegments.add(segmentIndex);
             globalOrdDeltas.add(delta);
           }
           // for each per-segment ord, map it back to the global term.
           while (segmentOrds[segmentIndex] <= segmentOrd) {
             ordDeltas[segmentIndex].add(delta);
             segmentOrds[segmentIndex]++;
           }
         }
         globalOrd++;
       }
       firstSegments.freeze();
       globalOrdDeltas.freeze();
       for (int i = 0; i < ordDeltas.length; ++i) {
         ordDeltas[i].freeze();
       }
     }

     /**
      * Given a segment number and segment ordinal, returns
      * the corresponding global ordinal.
      */
     public long getGlobalOrd(int segmentIndex, long segmentOrd) {
       return segmentOrd + ordDeltas[segmentIndex].get(segmentOrd);
     }

     /**
      * Given global ordinal, returns the ordinal of the first segment which contains
      * this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
      */
     public long getFirstSegmentOrd(long globalOrd) {
       return globalOrd - globalOrdDeltas.get(globalOrd);
     }

     /**
      * Given a global ordinal, returns the index of the first
      * segment that contains this term.
      */
     public int getFirstSegmentNumber(long globalOrd) {
       return (int) firstSegments.get(globalOrd);
     }

     /**
      * Returns the total number of unique terms in global ord space.
      */
     public long getValueCount() {
       return globalOrdDeltas.size();
     }

     /**
      * Returns total byte size used by this ordinal map.
      */
     public long ramBytesUsed() {
       long size = globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed();
       for (int i = 0; i < ordDeltas.length; i++) {
         size += ordDeltas[i].ramBytesUsed();
       }
       return size;
     }
   }

   /**
    * Implements SortedDocValues over n subs, using an OrdinalMap
    * @lucene.internal
    */
   public static class MultiSortedDocValues extends SortedDocValues {
     /** docbase for each leaf: parallel with {@link #values} */
     public final int docStarts[];
     /** leaf values */
     public final SortedDocValues values[];
     /** ordinal map mapping ords from <code>values</code> to global ord space */
     public final OrdinalMap mapping;

     /** Creates a new MultiSortedDocValues over <code>values</code> */
     MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
       assert values.length == mapping.ordDeltas.length;
       assert docStarts.length == values.length + 1;
       this.values = values;
       this.docStarts = docStarts;
       this.mapping = mapping;
     }

     @Override
     public int getOrd(int docID) {
       int subIndex = ReaderUtil.subIndex(docID, docStarts);
       int segmentOrd = values[subIndex].getOrd(docID - docStarts[subIndex]);
       return segmentOrd == -1 ? segmentOrd : (int) mapping.getGlobalOrd(subIndex, segmentOrd);
     }

     @Override
     public void lookupOrd(int ord, BytesRef result) {
       int subIndex = mapping.getFirstSegmentNumber(ord);
       int segmentOrd = (int) mapping.getFirstSegmentOrd(ord);
       values[subIndex].lookupOrd(segmentOrd, result);
     }

     @Override
     public int getValueCount() {
       return (int) mapping.getValueCount();
     }
   }

   /**
    * Implements MultiSortedSetDocValues over n subs, using an OrdinalMap
    * @lucene.internal
    */
   public static class MultiSortedSetDocValues extends SortedSetDocValues {
     /** docbase for each leaf: parallel with {@link #values} */
     public final int docStarts[];
     /** leaf values */
     public final SortedSetDocValues values[];
     /** ordinal map mapping ords from <code>values</code> to global ord space */
     public final OrdinalMap mapping;
     int currentSubIndex;

     /** Creates a new MultiSortedSetDocValues over <code>values</code> */
     MultiSortedSetDocValues(SortedSetDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
       assert values.length == mapping.ordDeltas.length;
       assert docStarts.length == values.length + 1;
       this.values = values;
       this.docStarts = docStarts;
       this.mapping = mapping;
     }

     @Override
     public long nextOrd() {
       long segmentOrd = values[currentSubIndex].nextOrd();
       if (segmentOrd == NO_MORE_ORDS) {
         return segmentOrd;
       } else {
         return mapping.getGlobalOrd(currentSubIndex, segmentOrd);
       }
     }

     @Override
     public void setDocument(int docID) {
       currentSubIndex = ReaderUtil.subIndex(docID, docStarts);
       values[currentSubIndex].setDocument(docID - docStarts[currentSubIndex]);
     }

     @Override
     public void lookupOrd(long ord, BytesRef result) {
       int subIndex = mapping.getFirstSegmentNumber(ord);
       long segmentOrd = mapping.getFirstSegmentOrd(ord);
       values[subIndex].lookupOrd(segmentOrd, result);
     }

     @Override
     public long getValueCount() {
       return mapping.getValueCount();
     }
   }
 }
	package org.apache.lucene.index;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.List;

	import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
	import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
	import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
	import org.apache.lucene.util.packed.PackedInts;

	/**
	* A wrapper for CompositeIndexReader providing access to DocValues.
	*
	* <p><b>NOTE</b>: for multi readers, you'll get better
	* performance by gathering the sub readers using
	* {@link IndexReader#getContext()} to get the
	* atomic leaves and then operate per-AtomicReader,
	* instead of using this class.
	*
	* <p><b>NOTE</b>: This is very costly.
	*
	* @lucene.experimental
	* @lucene.internal
	*/
	public class MultiDocValues {

	/** No instantiation */
	private MultiDocValues() {}

	/** Returns a NumericDocValues for a reader's norms (potentially merging on-the-fly).
	* <p>
	* This is a slow way to access normalization values. Instead, access them per-segment
	* with {@link AtomicReader#getNormValues(String)}
	* </p>
	*/
	public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
	final List<AtomicReaderContext> leaves = r.leaves();
	final int size = leaves.size();
	if (size == 0) {
	return null;
	} else if (size == 1) {
	return leaves.get(0).reader().getNormValues(field);
	}
	FieldInfo fi = MultiFields.getMergedFieldInfos(r).fieldInfo(field);
	if (fi == null \|\| fi.hasNorms() == false) {
	return null;
	}

	boolean anyReal = false;
	final NumericDocValues[] values = new NumericDocValues[size];
	final int[] starts = new int[size+1];
	for (int i = 0; i < size; i++) {
	AtomicReaderContext context = leaves.get(i);
	NumericDocValues v = context.reader().getNormValues(field);
	if (v == null) {
	v = NumericDocValues.EMPTY;
	} else {
	anyReal = true;
	}
	values[i] = v;
	starts[i] = context.docBase;
	}
	starts[size] = r.maxDoc();

	assert anyReal;

	return new NumericDocValues() {
	@Override
	public long get(int docID) {
	int subIndex = ReaderUtil.subIndex(docID, starts);
	return values[subIndex].get(docID - starts[subIndex]);
	}
	};
	}

	/** Returns a NumericDocValues for a reader's docvalues (potentially merging on-the-fly)
	* <p>
	* This is a slow way to access numeric values. Instead, access them per-segment
	* with {@link AtomicReader#getNumericDocValues(String)}
	* </p>
	* */
	public static NumericDocValues getNumericValues(final IndexReader r, final String field) throws IOException {
	final List<AtomicReaderContext> leaves = r.leaves();
	final int size = leaves.size();
	if (size == 0) {
	return null;
	} else if (size == 1) {
	return leaves.get(0).reader().getNumericDocValues(field);
	}

	boolean anyReal = false;
	final NumericDocValues[] values = new NumericDocValues[size];
	final int[] starts = new int[size+1];
	for (int i = 0; i < size; i++) {
	AtomicReaderContext context = leaves.get(i);
	NumericDocValues v = context.reader().getNumericDocValues(field);
	if (v == null) {
	v = NumericDocValues.EMPTY;
	} else {
	anyReal = true;
	}
	values[i] = v;
	starts[i] = context.docBase;
	}
	starts[size] = r.maxDoc();

	if (!anyReal) {
	return null;
	} else {
	return new NumericDocValues() {
	@Override
	public long get(int docID) {
	int subIndex = ReaderUtil.subIndex(docID, starts);
	return values[subIndex].get(docID - starts[subIndex]);
	}
	};
	}
	}

	/** Returns a Bits for a reader's docsWithField (potentially merging on-the-fly)
	* <p>
	* This is a slow way to access this bitset. Instead, access them per-segment
	* with {@link AtomicReader#getDocsWithField(String)}
	* </p>
	* */
	public static Bits getDocsWithField(final IndexReader r, final String field) throws IOException {
	final List<AtomicReaderContext> leaves = r.leaves();
	final int size = leaves.size();
	if (size == 0) {
	return null;
	} else if (size == 1) {
	return leaves.get(0).reader().getDocsWithField(field);
	}

	boolean anyReal = false;
	boolean anyMissing = false;
	final Bits[] values = new Bits[size];
	final int[] starts = new int[size+1];
	for (int i = 0; i < size; i++) {
	AtomicReaderContext context = leaves.get(i);
	Bits v = context.reader().getDocsWithField(field);
	if (v == null) {
	v = new Bits.MatchNoBits(context.reader().maxDoc());
	anyMissing = true;
	} else {
	anyReal = true;
	if (v instanceof Bits.MatchAllBits == false) {
	anyMissing = true;
	}
	}
	values[i] = v;
	starts[i] = context.docBase;
	}
	starts[size] = r.maxDoc();

	if (!anyReal) {
	return null;
	} else if (!anyMissing) {
	return new Bits.MatchAllBits(r.maxDoc());
	} else {
	return new MultiBits(values, starts, false);
	}
	}

	/** Returns a BinaryDocValues for a reader's docvalues (potentially merging on-the-fly)
	* <p>
	* This is a slow way to access binary values. Instead, access them per-segment
	* with {@link AtomicReader#getBinaryDocValues(String)}
	* </p>
	*/
	public static BinaryDocValues getBinaryValues(final IndexReader r, final String field) throws IOException {
	final List<AtomicReaderContext> leaves = r.leaves();
	final int size = leaves.size();

	if (size == 0) {
	return null;
	} else if (size == 1) {
	return leaves.get(0).reader().getBinaryDocValues(field);
	}

	boolean anyReal = false;
	final BinaryDocValues[] values = new BinaryDocValues[size];
	final int[] starts = new int[size+1];
	for (int i = 0; i < size; i++) {
	AtomicReaderContext context = leaves.get(i);
	BinaryDocValues v = context.reader().getBinaryDocValues(field);
	if (v == null) {
	v = BinaryDocValues.EMPTY;
	} else {
	anyReal = true;
	}
	values[i] = v;
	starts[i] = context.docBase;
	}
	starts[size] = r.maxDoc();

	if (!anyReal) {
	return null;
	} else {
	return new BinaryDocValues() {
	@Override
	public void get(int docID, BytesRef result) {
	int subIndex = ReaderUtil.subIndex(docID, starts);
	values[subIndex].get(docID - starts[subIndex], result);
	}
	};
	}
	}

	/** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things).
	* <p>
	* This is an extremely slow way to access sorted values. Instead, access them per-segment
	* with {@link AtomicReader#getSortedDocValues(String)}
	* </p>
	*/
	public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
	final List<AtomicReaderContext> leaves = r.leaves();
	final int size = leaves.size();

	if (size == 0) {
	return null;
	} else if (size == 1) {
	return leaves.get(0).reader().getSortedDocValues(field);
	}

	boolean anyReal = false;
	final SortedDocValues[] values = new SortedDocValues[size];
	final int[] starts = new int[size+1];
	for (int i = 0; i < size; i++) {
	AtomicReaderContext context = leaves.get(i);
	SortedDocValues v = context.reader().getSortedDocValues(field);
	if (v == null) {
	v = SortedDocValues.EMPTY;
	} else {
	anyReal = true;
	}
	values[i] = v;
	starts[i] = context.docBase;
	}
	starts[size] = r.maxDoc();

	if (!anyReal) {
	return null;
	} else {
	TermsEnum enums[] = new TermsEnum[values.length];
	for (int i = 0; i < values.length; i++) {
	enums[i] = values[i].termsEnum();
	}
	OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
	return new MultiSortedDocValues(values, starts, mapping);
	}
	}

	/** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
	* <p>
	* This is an extremely slow way to access sorted values. Instead, access them per-segment
	* with {@link AtomicReader#getSortedSetDocValues(String)}
	* </p>
	*/
	public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
	final List<AtomicReaderContext> leaves = r.leaves();
	final int size = leaves.size();

	if (size == 0) {
	return null;
	} else if (size == 1) {
	return leaves.get(0).reader().getSortedSetDocValues(field);
	}

	boolean anyReal = false;
	final SortedSetDocValues[] values = new SortedSetDocValues[size];
	final int[] starts = new int[size+1];
	for (int i = 0; i < size; i++) {
	AtomicReaderContext context = leaves.get(i);
	SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
	if (v == null) {
	v = SortedSetDocValues.EMPTY;
	} else {
	anyReal = true;
	}
	values[i] = v;
	starts[i] = context.docBase;
	}
	starts[size] = r.maxDoc();

	if (!anyReal) {
	return null;
	} else {
	TermsEnum enums[] = new TermsEnum[values.length];
	for (int i = 0; i < values.length; i++) {
	enums[i] = values[i].termsEnum();
	}
	OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
	return new MultiSortedSetDocValues(values, starts, mapping);
	}
	}

	/** maps per-segment ordinals to/from global ordinal space */
	// TODO: use more efficient packed ints structures?
	// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
	public static class OrdinalMap {
	// cache key of whoever asked for this awful thing
	final Object owner;
	// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
	final MonotonicAppendingLongBuffer globalOrdDeltas;
	// globalOrd -> first segment container
	final AppendingPackedLongBuffer firstSegments;
	// for every segment, segmentOrd -> (globalOrd - segmentOrd)
	final MonotonicAppendingLongBuffer ordDeltas[];

	/**
	* Creates an ordinal map that allows mapping ords to/from a merged
	* space from <code>subs</code>.
	* @param owner a cache key
	* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
	* not be dense (e.g. can be FilteredTermsEnums}.
	* @throws IOException if an I/O error occurred.
	*/
	public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException {
	// create the ordinal mappings by pulling a termsenum over each sub's
	// unique terms, and walking a multitermsenum over those
	this.owner = owner;
	globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
	firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
	ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
	for (int i = 0; i < ordDeltas.length; i++) {
	ordDeltas[i] = new MonotonicAppendingLongBuffer();
	}
	long segmentOrds[] = new long[subs.length];
	ReaderSlice slices[] = new ReaderSlice[subs.length];
	TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
	for (int i = 0; i < slices.length; i++) {
	slices[i] = new ReaderSlice(0, 0, i);
	indexes[i] = new TermsEnumIndex(subs[i], i);
	}
	MultiTermsEnum mte = new MultiTermsEnum(slices);
	mte.reset(indexes);
	long globalOrd = 0;
	while (mte.next() != null) {
	TermsEnumWithSlice matches[] = mte.getMatchArray();
	for (int i = 0; i < mte.getMatchCount(); i++) {
	int segmentIndex = matches[i].index;
	long segmentOrd = matches[i].terms.ord();
	long delta = globalOrd - segmentOrd;
	// for each unique term, just mark the first segment index/delta where it occurs
	if (i == 0) {
	firstSegments.add(segmentIndex);
	globalOrdDeltas.add(delta);
	}
	// for each per-segment ord, map it back to the global term.
	while (segmentOrds[segmentIndex] <= segmentOrd) {
	ordDeltas[segmentIndex].add(delta);
	segmentOrds[segmentIndex]++;
	}
	}
	globalOrd++;
	}
	firstSegments.freeze();
	globalOrdDeltas.freeze();
	for (int i = 0; i < ordDeltas.length; ++i) {
	ordDeltas[i].freeze();
	}
	}

	/**
	* Given a segment number and segment ordinal, returns
	* the corresponding global ordinal.
	*/
	public long getGlobalOrd(int segmentIndex, long segmentOrd) {
	return segmentOrd + ordDeltas[segmentIndex].get(segmentOrd);
	}

	/**
	* Given global ordinal, returns the ordinal of the first segment which contains
	* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
	*/
	public long getFirstSegmentOrd(long globalOrd) {
	return globalOrd - globalOrdDeltas.get(globalOrd);
	}

	/**
	* Given a global ordinal, returns the index of the first
	* segment that contains this term.
	*/
	public int getFirstSegmentNumber(long globalOrd) {
	return (int) firstSegments.get(globalOrd);
	}

	/**
	* Returns the total number of unique terms in global ord space.
	*/
	public long getValueCount() {
	return globalOrdDeltas.size();
	}

	/**
	* Returns total byte size used by this ordinal map.
	*/
	public long ramBytesUsed() {
	long size = globalOrdDeltas.ramBytesUsed() + firstSegments.ramBytesUsed();
	for (int i = 0; i < ordDeltas.length; i++) {
	size += ordDeltas[i].ramBytesUsed();
	}
	return size;
	}
	}

	/**
	* Implements SortedDocValues over n subs, using an OrdinalMap
	* @lucene.internal
	*/
	public static class MultiSortedDocValues extends SortedDocValues {
	/** docbase for each leaf: parallel with {@link #values} */
	public final int docStarts[];
	/** leaf values */
	public final SortedDocValues values[];
	/** ordinal map mapping ords from <code>values</code> to global ord space */
	public final OrdinalMap mapping;

	/** Creates a new MultiSortedDocValues over <code>values</code> */
	MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
	assert values.length == mapping.ordDeltas.length;
	assert docStarts.length == values.length + 1;
	this.values = values;
	this.docStarts = docStarts;
	this.mapping = mapping;
	}

	@Override
	public int getOrd(int docID) {
	int subIndex = ReaderUtil.subIndex(docID, docStarts);
	int segmentOrd = values[subIndex].getOrd(docID - docStarts[subIndex]);
	return segmentOrd == -1 ? segmentOrd : (int) mapping.getGlobalOrd(subIndex, segmentOrd);
	}

	@Override
	public void lookupOrd(int ord, BytesRef result) {
	int subIndex = mapping.getFirstSegmentNumber(ord);
	int segmentOrd = (int) mapping.getFirstSegmentOrd(ord);
	values[subIndex].lookupOrd(segmentOrd, result);
	}

	@Override
	public int getValueCount() {
	return (int) mapping.getValueCount();
	}
	}

	/**
	* Implements MultiSortedSetDocValues over n subs, using an OrdinalMap
	* @lucene.internal
	*/
	public static class MultiSortedSetDocValues extends SortedSetDocValues {
	/** docbase for each leaf: parallel with {@link #values} */
	public final int docStarts[];
	/** leaf values */
	public final SortedSetDocValues values[];
	/** ordinal map mapping ords from <code>values</code> to global ord space */
	public final OrdinalMap mapping;
	int currentSubIndex;

	/** Creates a new MultiSortedSetDocValues over <code>values</code> */
	MultiSortedSetDocValues(SortedSetDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
	assert values.length == mapping.ordDeltas.length;
	assert docStarts.length == values.length + 1;
	this.values = values;
	this.docStarts = docStarts;
	this.mapping = mapping;
	}

	@Override
	public long nextOrd() {
	long segmentOrd = values[currentSubIndex].nextOrd();
	if (segmentOrd == NO_MORE_ORDS) {
	return segmentOrd;
	} else {
	return mapping.getGlobalOrd(currentSubIndex, segmentOrd);
	}
	}

	@Override
	public void setDocument(int docID) {
	currentSubIndex = ReaderUtil.subIndex(docID, docStarts);
	values[currentSubIndex].setDocument(docID - docStarts[currentSubIndex]);
	}

	@Override
	public void lookupOrd(long ord, BytesRef result) {
	int subIndex = mapping.getFirstSegmentNumber(ord);
	long segmentOrd = mapping.getFirstSegmentOrd(ord);
	values[subIndex].lookupOrd(segmentOrd, result);
	}

	@Override
	public long getValueCount() {
	return mapping.getValueCount();
	}
	}
	}