lucene/core/src/java/org/apache/lucene/index/Sorter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;

 import java.io.IOException;

 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.TimSorter;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;

 /**
  * Sorts documents of a given index by returning a permutation on the document
  * IDs.
  * @lucene.experimental
  */
 final class Sorter {
   final Sort sort;

   /** Creates a new Sorter to sort the index with {@code sort} */
   Sorter(Sort sort) {
     if (sort.needsScores()) {
       throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
     }
     this.sort = sort;
   }

   /**
    * A permutation of doc IDs. For every document ID between <tt>0</tt> and
    * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
    * return <code>docID</code>.
    */
   static abstract class DocMap {

     /** Given a doc ID from the original index, return its ordinal in the
      *  sorted index. */
     abstract int oldToNew(int docID);

     /** Given the ordinal of a doc ID, return its doc ID in the original index. */
     abstract int newToOld(int docID);

     /** Return the number of documents in this map. This must be equal to the
      *  {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the
      *  {@link org.apache.lucene.index.LeafReader} which is sorted. */
     abstract int size();
   }

   /** Check consistency of a {@link DocMap}, useful for assertions. */
   static boolean isConsistent(DocMap docMap) {
     final int maxDoc = docMap.size();
     for (int i = 0; i < maxDoc; ++i) {
       final int newID = docMap.oldToNew(i);
       final int oldID = docMap.newToOld(newID);
       assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
       assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
       if (i != oldID || newID < 0 || newID >= maxDoc) {
         return false;
       }
     }
     return true;
   }

   private static final class DocValueSorter extends TimSorter {

     private final int[] docs;
     private final IndexSorter.DocComparator comparator;
     private final int[] tmp;

     DocValueSorter(int[] docs, IndexSorter.DocComparator comparator) {
       super(docs.length / 64);
       this.docs = docs;
       this.comparator = comparator;
       tmp = new int[docs.length / 64];
     }

     @Override
     protected int compare(int i, int j) {
       return comparator.compare(docs[i], docs[j]);
     }

     @Override
     protected void swap(int i, int j) {
       int tmpDoc = docs[i];
       docs[i] = docs[j];
       docs[j] = tmpDoc;
     }

     @Override
     protected void copy(int src, int dest) {
       docs[dest] = docs[src];
     }

     @Override
     protected void save(int i, int len) {
       System.arraycopy(docs, i, tmp, 0, len);
     }

     @Override
     protected void restore(int i, int j) {
       docs[j] = tmp[i];
     }

     @Override
     protected int compareSaved(int i, int j) {
       return comparator.compare(tmp[i], docs[j]);
     }
   }

   /** Computes the old-to-new permutation over the given comparator. */
   private static Sorter.DocMap sort(final int maxDoc, IndexSorter.DocComparator comparator) {
     // check if the index is sorted
     boolean sorted = true;
     for (int i = 1; i < maxDoc; ++i) {
       if (comparator.compare(i-1, i) > 0) {
         sorted = false;
         break;
       }
     }
     if (sorted) {
       return null;
     }

     // sort doc IDs
     final int[] docs = new int[maxDoc];
     for (int i = 0; i < maxDoc; i++) {
       docs[i] = i;
     }

     DocValueSorter sorter = new DocValueSorter(docs, comparator);
     // It can be common to sort a reader, add docs, sort it again, ... and in
     // that case timSort can save a lot of time
     sorter.sort(0, docs.length); // docs is now the newToOld mapping

     // The reason why we use MonotonicAppendingLongBuffer here is that it
     // wastes very little memory if the index is in random order but can save
     // a lot of memory if the index is already "almost" sorted
     final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
     for (int i = 0; i < maxDoc; ++i) {
       newToOldBuilder.add(docs[i]);
     }
     final PackedLongValues newToOld = newToOldBuilder.build();

     // invert the docs mapping:
     for (int i = 0; i < maxDoc; ++i) {
       docs[(int) newToOld.get(i)] = i;
     } // docs is now the oldToNew mapping

     final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
     for (int i = 0; i < maxDoc; ++i) {
       oldToNewBuilder.add(docs[i]);
     }
     final PackedLongValues oldToNew = oldToNewBuilder.build();

     return new Sorter.DocMap() {

       @Override
       public int oldToNew(int docID) {
         return (int) oldToNew.get(docID);
       }

       @Override
       public int newToOld(int docID) {
         return (int) newToOld.get(docID);
       }

       @Override
       public int size() {
         return maxDoc;
       }
     };
   }

   /**
    * Returns a mapping from the old document ID to its new location in the
    * sorted index. Implementations can use the auxiliary
    * {@link #sort(int, IndexSorter.DocComparator)} to compute the old-to-new permutation
    * given a list of documents and their corresponding values.
    * <p>
    * A return value of <tt>null</tt> is allowed and means that
    * <code>reader</code> is already sorted.
    * <p>
    * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
    * well, they will however be marked as deleted in the sorted view.
    */
   DocMap sort(LeafReader reader) throws IOException {
     SortField[] fields = sort.getSort();
     final IndexSorter.DocComparator[] comparators = new IndexSorter.DocComparator[fields.length];

     for (int i = 0; i < fields.length; i++) {
       IndexSorter sorter = fields[i].getIndexSorter();
       if (sorter == null) {
         throw new IllegalArgumentException("Cannot use sortfield + "  + fields[i] + " to sort indexes");
       }
       comparators[i] = sorter.getDocComparator(reader, reader.maxDoc());
     }
     return sort(reader.maxDoc(), comparators);
   }


   DocMap sort(int maxDoc, IndexSorter.DocComparator[] comparators) throws IOException {
     final IndexSorter.DocComparator comparator = (docID1, docID2) -> {
       for (int i = 0; i < comparators.length; i++) {
         int comp = comparators[i].compare(docID1, docID2);
         if (comp != 0) {
           return comp;
         }
       }
       return Integer.compare(docID1, docID2); // docid order tiebreak
     };

     return sort(maxDoc, comparator);
   }

   /**
    * Returns the identifier of this {@link Sorter}.
    * <p>This identifier is similar to {@link Object#hashCode()} and should be
    * chosen so that two instances of this class that sort documents likewise
    * will have the same identifier. On the contrary, this identifier should be
    * different on different {@link Sort sorts}.
    */
   public String getID() {
     return sort.toString();
   }

   @Override
   public String toString() {
     return getID();
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;

	import java.io.IOException;

	import org.apache.lucene.search.Sort;
	import org.apache.lucene.search.SortField;
	import org.apache.lucene.util.TimSorter;
	import org.apache.lucene.util.packed.PackedInts;
	import org.apache.lucene.util.packed.PackedLongValues;

	/**
	* Sorts documents of a given index by returning a permutation on the document
	* IDs.
	* @lucene.experimental
	*/
	final class Sorter {
	final Sort sort;

	/** Creates a new Sorter to sort the index with {@code sort} */
	Sorter(Sort sort) {
	if (sort.needsScores()) {
	throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
	}
	this.sort = sort;
	}

	/**
	* A permutation of doc IDs. For every document ID between <tt>0</tt> and
	* {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
	* return <code>docID</code>.
	*/
	static abstract class DocMap {

	/** Given a doc ID from the original index, return its ordinal in the
	* sorted index. */
	abstract int oldToNew(int docID);

	/** Given the ordinal of a doc ID, return its doc ID in the original index. */
	abstract int newToOld(int docID);

	/** Return the number of documents in this map. This must be equal to the
	* {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the
	* {@link org.apache.lucene.index.LeafReader} which is sorted. */
	abstract int size();
	}

	/** Check consistency of a {@link DocMap}, useful for assertions. */
	static boolean isConsistent(DocMap docMap) {
	final int maxDoc = docMap.size();
	for (int i = 0; i < maxDoc; ++i) {
	final int newID = docMap.oldToNew(i);
	final int oldID = docMap.newToOld(newID);
	assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
	assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
	if (i != oldID \|\| newID < 0 \|\| newID >= maxDoc) {
	return false;
	}
	}
	return true;
	}

	private static final class DocValueSorter extends TimSorter {

	private final int[] docs;
	private final IndexSorter.DocComparator comparator;
	private final int[] tmp;

	DocValueSorter(int[] docs, IndexSorter.DocComparator comparator) {
	super(docs.length / 64);
	this.docs = docs;
	this.comparator = comparator;
	tmp = new int[docs.length / 64];
	}

	@Override
	protected int compare(int i, int j) {
	return comparator.compare(docs[i], docs[j]);
	}

	@Override
	protected void swap(int i, int j) {
	int tmpDoc = docs[i];
	docs[i] = docs[j];
	docs[j] = tmpDoc;
	}

	@Override
	protected void copy(int src, int dest) {
	docs[dest] = docs[src];
	}

	@Override
	protected void save(int i, int len) {
	System.arraycopy(docs, i, tmp, 0, len);
	}

	@Override
	protected void restore(int i, int j) {
	docs[j] = tmp[i];
	}

	@Override
	protected int compareSaved(int i, int j) {
	return comparator.compare(tmp[i], docs[j]);
	}
	}

	/** Computes the old-to-new permutation over the given comparator. */
	private static Sorter.DocMap sort(final int maxDoc, IndexSorter.DocComparator comparator) {
	// check if the index is sorted
	boolean sorted = true;
	for (int i = 1; i < maxDoc; ++i) {
	if (comparator.compare(i-1, i) > 0) {
	sorted = false;
	break;
	}
	}
	if (sorted) {
	return null;
	}

	// sort doc IDs
	final int[] docs = new int[maxDoc];
	for (int i = 0; i < maxDoc; i++) {
	docs[i] = i;
	}

	DocValueSorter sorter = new DocValueSorter(docs, comparator);
	// It can be common to sort a reader, add docs, sort it again, ... and in
	// that case timSort can save a lot of time
	sorter.sort(0, docs.length); // docs is now the newToOld mapping

	// The reason why we use MonotonicAppendingLongBuffer here is that it
	// wastes very little memory if the index is in random order but can save
	// a lot of memory if the index is already "almost" sorted
	final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
	for (int i = 0; i < maxDoc; ++i) {
	newToOldBuilder.add(docs[i]);
	}
	final PackedLongValues newToOld = newToOldBuilder.build();

	// invert the docs mapping:
	for (int i = 0; i < maxDoc; ++i) {
	docs[(int) newToOld.get(i)] = i;
	} // docs is now the oldToNew mapping

	final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
	for (int i = 0; i < maxDoc; ++i) {
	oldToNewBuilder.add(docs[i]);
	}
	final PackedLongValues oldToNew = oldToNewBuilder.build();

	return new Sorter.DocMap() {

	@Override
	public int oldToNew(int docID) {
	return (int) oldToNew.get(docID);
	}

	@Override
	public int newToOld(int docID) {
	return (int) newToOld.get(docID);
	}

	@Override
	public int size() {
	return maxDoc;
	}
	};
	}

	/**
	* Returns a mapping from the old document ID to its new location in the
	* sorted index. Implementations can use the auxiliary
	* {@link #sort(int, IndexSorter.DocComparator)} to compute the old-to-new permutation
	* given a list of documents and their corresponding values.
	* <p>
	* A return value of <tt>null</tt> is allowed and means that
	* <code>reader</code> is already sorted.
	* <p>
	* <b>NOTE:</b> deleted documents are expected to appear in the mapping as
	* well, they will however be marked as deleted in the sorted view.
	*/
	DocMap sort(LeafReader reader) throws IOException {
	SortField[] fields = sort.getSort();
	final IndexSorter.DocComparator[] comparators = new IndexSorter.DocComparator[fields.length];

	for (int i = 0; i < fields.length; i++) {
	IndexSorter sorter = fields[i].getIndexSorter();
	if (sorter == null) {
	throw new IllegalArgumentException("Cannot use sortfield + " + fields[i] + " to sort indexes");
	}
	comparators[i] = sorter.getDocComparator(reader, reader.maxDoc());
	}
	return sort(reader.maxDoc(), comparators);
	}


	DocMap sort(int maxDoc, IndexSorter.DocComparator[] comparators) throws IOException {
	final IndexSorter.DocComparator comparator = (docID1, docID2) -> {
	for (int i = 0; i < comparators.length; i++) {
	int comp = comparators[i].compare(docID1, docID2);
	if (comp != 0) {
	return comp;
	}
	}
	return Integer.compare(docID1, docID2); // docid order tiebreak
	};

	return sort(maxDoc, comparator);
	}

	/**
	* Returns the identifier of this {@link Sorter}.
	* <p>This identifier is similar to {@link Object#hashCode()} and should be
	* chosen so that two instances of this class that sort documents likewise
	* will have the same identifier. On the contrary, this identifier should be
	* different on different {@link Sort sorts}.
	*/
	public String getID() {
	return sort.toString();
	}

	@Override
	public String toString() {
	return getID();
	}

	}