solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.search;

 import java.util.Collection;
 import java.util.Collections;

 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.RamUsageEstimator;

 /**
  * <code>SortedIntDocSet</code> represents a sorted set of Lucene Document Ids.
  */
 public class SortedIntDocSet extends DocSetBase {
   private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SortedIntDocSet.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;

   protected final int[] docs;

   /**
    * @param docs  Sorted list of ids
    */
   public SortedIntDocSet(int[] docs) {
     this.docs = docs;
   }

   /**
    * @param docs Sorted list of ids
    * @param len  Number of ids in the list
    */
   public SortedIntDocSet(int[] docs, int len) {
     this(shrink(docs,len));
   }

   public int[] getDocs() { return docs; }

   @Override
   public int size()      { return docs.length; }

   public static int[] zeroInts = new int[0];
   public static SortedIntDocSet zero = new SortedIntDocSet(zeroInts);

   public static int[] shrink(int[] arr, int newSize) {
     if (arr.length == newSize) return arr;
     int[] newArr = new int[newSize];
     System.arraycopy(arr, 0, newArr, 0, newSize);
     return newArr;
   }

   /** Returns the index of the first non-sorted element or -1 if they are all sorted */
   public static int firstNonSorted(int[] arr, int offset, int len) {
     if (len <= 1) return -1;
     int lower = arr[offset];
     int end = offset + len;
     for(int i=offset+1; i<end; i++) {
       int next = arr[i];
       if (next <= lower) {
         for (int j=i-1; j>offset; j--) {
           if (arr[j]<next) return j+1;
         }
         return offset;
       }
       lower = next;
     }
     return -1;
   }

   public static int intersectionSize(int[] smallerSortedList, int[] biggerSortedList) {
     final int a[] = smallerSortedList;
     final int b[] = biggerSortedList;

     // The next doc we are looking for will be much closer to the last position we tried
     // than it will be to the midpoint between last and high... so probe ahead using
     // a function of the ratio of the sizes of the sets.
     int step = (b.length/a.length)+1;

     // Since the majority of probes should be misses, we'll already be above the last probe
     // and shouldn't need to move larger than the step size on average to step over our target (and thus lower
     // the high upper bound a lot.)... but if we don't go over our target, it's a big miss... so double it.
     step = step + step;

     // FUTURE: come up with a density such that target * density == likely position?
     // then check step on one side or the other?
     // (density could be cached in the DocSet)... length/maxDoc

     // FUTURE: try partitioning like a sort algorithm.  Pick the midpoint of the big
     // array, find where that should be in the small array, and then recurse with
     // the top and bottom half of both arrays until they are small enough to use
     // a fallback intersection method.
     // NOTE: I tried this and it worked, but it was actually slower than this current
     // highly optimized approach.

     int icount = 0;
     int low = 0;
     int max = b.length-1;

     for (int i=0; i<a.length; i++) {
       int doca = a[i];

       int high = max;

       int probe = low + step;     // 40% improvement!

       // short linear probe to see if we can drop the high pointer in one big jump.
       if (probe<high) {
         if (b[probe]>=doca) {
           // success!  we cut down the upper bound by a lot in one step!
           high=probe;
         } else {
           // relative failure... we get to move the low pointer, but not my much
           low=probe+1;

           // reprobe worth it? it appears so!
           probe = low + step;
           if (probe<high) {
             if (b[probe]>=doca) {
               high=probe;
             } else {
               low=probe+1;
             }
           }
         }
       }

       // binary search the rest of the way
       while (low <= high) {
         int mid = (low+high) >>> 1;
         int docb = b[mid];

         if (docb < doca) {
           low = mid+1;
         }
         else if (docb > doca) {
           high = mid-1;
         }
         else {
           icount++;
           low = mid+1;  // found it, so start at next element
           break;
         }
       }
       // Didn't find it... low is now positioned on the insertion point,
       // which is higher than what we were looking for, so continue using
       // the same low point.
     }

     return icount;
   }


   public static boolean intersects(int[] smallerSortedList, int[] biggerSortedList) {
     // see intersectionSize for more in-depth comments of this algorithm

     final int a[] = smallerSortedList;
     final int b[] = biggerSortedList;

     int step = (b.length/a.length)+1;

     step = step + step;

     int low = 0;
     int max = b.length-1;

     for (int i=0; i<a.length; i++) {
       int doca = a[i];
       int high = max;
       int probe = low + step;
       if (probe<high) {
         if (b[probe]>=doca) {
           high=probe;
         } else {
           low=probe+1;
           probe = low + step;
           if (probe<high) {
             if (b[probe]>=doca) {
               high=probe;
             } else {
               low=probe+1;
             }
           }
         }
       }

       while (low <= high) {
         int mid = (low+high) >>> 1;
         int docb = b[mid];

         if (docb < doca) {
           low = mid+1;
         }
         else if (docb > doca) {
           high = mid-1;
         }
         else {
           return true;
         }
       }
     }

     return false;
   }

   @Override
   public int intersectionSize(DocSet other) {
     if (!(other instanceof SortedIntDocSet)) {
       // assume other implementations are better at random access than we are,
       // true of BitDocSet and HashDocSet.
       int icount = 0;
       for (int i=0; i<docs.length; i++) {
         if (other.exists(docs[i])) icount++;
       }
       return icount;
     }

     // make "a" the smaller set.
     int[] otherDocs = ((SortedIntDocSet)other).docs;
     final int[] a = docs.length < otherDocs.length ? docs : otherDocs;
     final int[] b = docs.length < otherDocs.length ? otherDocs : docs;

     if (a.length==0) return 0;

     // if b is 8 times bigger than a, use the modified binary search.
     if ((b.length>>3) >= a.length) {
       return intersectionSize(a, b);
     }

     // if they are close in size, just do a linear walk of both.
     int icount=0;
     int i=0,j=0;
     int doca=a[i],docb=b[j];
     for(;;) {
       // switch on the sign bit somehow? Hopefully JVM is smart enough to just test once.

       // Since set a is less dense then set b, doca is likely to be greater than docb so
       // check that case first.  This resulted in a 13% speedup.
       if (doca > docb) {
         if (++j >= b.length) break;
         docb=b[j];
       } else if (doca < docb) {
         if (++i >= a.length) break;
         doca=a[i];
       } else {
         icount++;
         if (++i >= a.length) break;
         doca=a[i];
         if (++j >= b.length) break;
         docb=b[j];
       }
     }
     return icount;
   }

   @Override
   public boolean intersects(DocSet other) {
     if (!(other instanceof SortedIntDocSet)) {
       // assume other implementations are better at random access than we are,
       // true of BitDocSet and HashDocSet.
       for (int i=0; i<docs.length; i++) {
         if (other.exists(docs[i])) return true;
       }
       return false;
     }

     // make "a" the smaller set.
     int[] otherDocs = ((SortedIntDocSet)other).docs;
     final int[] a = docs.length < otherDocs.length ? docs : otherDocs;
     final int[] b = docs.length < otherDocs.length ? otherDocs : docs;

     if (a.length==0) return false;

     // if b is 8 times bigger than a, use the modified binary search.
     if ((b.length>>3) >= a.length) {
       return intersects(a,b);
     }

     // if they are close in size, just do a linear walk of both.
     int i=0,j=0;
     int doca=a[i],docb=b[j];
     for(;;) {
       // switch on the sign bit somehow?  Hopefull JVM is smart enough to just test once.

       // Since set a is less dense then set b, doca is likely to be greater than docb so
       // check that case first.  This resulted in a 13% speedup.
       if (doca > docb) {
         if (++j >= b.length) break;
         docb=b[j];
       } else if (doca < docb) {
         if (++i >= a.length) break;
         doca=a[i];
       } else {
         return true;
       }
     }
     return false;
   }

   /** puts the intersection of a and b into the target array and returns the size */
   public static int intersection(int a[], int lena, int b[], int lenb, int[] target) {
     if (lena > lenb) {
       int ti=lena; lena=lenb; lenb=ti;
       int[] ta=a; a=b; b=ta;
     }

     if (lena==0) return 0;


     // if b is 8 times bigger than a, use the modified binary search.
     if ((lenb>>3) >= lena) {
       return intersectionBinarySearch(a, lena, b, lenb, target);
     }

     int icount=0;
     int i=0,j=0;
     int doca=a[i],docb=b[j];
     for(;;) {
       if (doca > docb) {
         if (++j >= lenb) break;
         docb=b[j];
       } else if (doca < docb) {
         if (++i >= lena) break;
         doca=a[i];
       } else {
         target[icount++] = doca;
         if (++i >= lena) break;
         doca=a[i];
         if (++j >= lenb) break;
         docb=b[j];
       }
     }
     return icount;
   }

   /** Puts the intersection of a and b into the target array and returns the size.
    * lena should be smaller than lenb */
   protected static int intersectionBinarySearch(int[] a, int lena, int[] b, int lenb, int[] target) {
     int step = (lenb/lena)+1;
     step = step + step;


     int icount = 0;
     int low = 0;
     int max = lenb-1;

     for (int i=0; i<lena; i++) {
       int doca = a[i];

       int high = max;

       int probe = low + step;     // 40% improvement!

       // short linear probe to see if we can drop the high pointer in one big jump.
       if (probe<high) {
         if (b[probe]>=doca) {
           // success!  we cut down the upper bound by a lot in one step!
           high=probe;
         } else {
           // relative failure... we get to move the low pointer, but not my much
           low=probe+1;

           // reprobe worth it? it appears so!
           probe = low + step;
           if (probe<high) {
             if (b[probe]>=doca) {
               high=probe;
             } else {
               low=probe+1;
             }
           }
         }
       }


       // binary search
       while (low <= high) {
         int mid = (low+high) >>> 1;
         int docb = b[mid];

         if (docb < doca) {
           low = mid+1;
         }
         else if (docb > doca) {
           high = mid-1;
         }
         else {
           target[icount++] = doca;
           low = mid+1;  // found it, so start at next element
           break;
         }
       }
       // Didn't find it... low is now positioned on the insertion point,
       // which is higher than what we were looking for, so continue using
       // the same low point.
     }

     return icount;
   }

   @Override
   public DocSet intersection(DocSet other) {
     if (!(other instanceof SortedIntDocSet)) {
       int icount = 0;
       int arr[] = new int[docs.length];
       for (int i=0; i<docs.length; i++) {
         int doc = docs[i];
         if (other.exists(doc)) arr[icount++] = doc;
       }
       return new SortedIntDocSet(arr,icount);
     }

     int[] otherDocs = ((SortedIntDocSet)other).docs;
     int maxsz = Math.min(docs.length, otherDocs.length);
     int[] arr = new int[maxsz];
     int sz = intersection(docs, docs.length, otherDocs, otherDocs.length, arr);
     return new SortedIntDocSet(arr,sz);
   }


   protected static int andNotBinarySearch(int a[], int lena, int b[], int lenb, int[] target) {
    int step = (lenb/lena)+1;
     step = step + step;


     int count = 0;
     int low = 0;
     int max = lenb-1;

     outer:
     for (int i=0; i<lena; i++) {
       int doca = a[i];

       int high = max;

       int probe = low + step;     // 40% improvement!

       // short linear probe to see if we can drop the high pointer in one big jump.
       if (probe<high) {
         if (b[probe]>=doca) {
           // success!  we cut down the upper bound by a lot in one step!
           high=probe;
         } else {
           // relative failure... we get to move the low pointer, but not my much
           low=probe+1;

           // reprobe worth it? it appears so!
           probe = low + step;
           if (probe<high) {
             if (b[probe]>=doca) {
               high=probe;
             } else {
               low=probe+1;
             }
           }
         }
       }


       // binary search
       while (low <= high) {
         int mid = (low+high) >>> 1;
         int docb = b[mid];

         if (docb < doca) {
           low = mid+1;
         }
         else if (docb > doca) {
           high = mid-1;
         }
         else {
           low = mid+1;  // found it, so start at next element
           continue outer;
         }
       }
       // Didn't find it... low is now positioned on the insertion point,
       // which is higher than what we were looking for, so continue using
       // the same low point.
       target[count++] = doca;
     }

     return count;
   }

     /** puts the intersection of a and not b into the target array and returns the size */
   public static int andNot(int a[], int lena, int b[], int lenb, int[] target) {
     if (lena==0) return 0;
     if (lenb==0) {
       System.arraycopy(a,0,target,0,lena);
       return lena;
     }

     // if b is 8 times bigger than a, use the modified binary search.
     if ((lenb>>3) >= lena) {
       return andNotBinarySearch(a, lena, b, lenb, target);
     }

     int count=0;
     int i=0,j=0;
     int doca=a[i],docb=b[j];
     for(;;) {
       if (doca > docb) {
         if (++j >= lenb) break;
         docb=b[j];
       } else if (doca < docb) {
         target[count++] = doca;
         if (++i >= lena) break;
         doca=a[i];
       } else {
         if (++i >= lena) break;
         doca=a[i];
         if (++j >= lenb) break;
         docb=b[j];
       }
     }

     int leftover=lena - i;

     if (leftover > 0) {
       System.arraycopy(a,i,target,count,leftover);
       count += leftover;
     }

     return count;
   }

   @Override
   public DocSet andNot(DocSet other) {
     if (other.size()==0) return this;

     if (!(other instanceof SortedIntDocSet)) {
       int count = 0;
       int arr[] = new int[docs.length];
       for (int i=0; i<docs.length; i++) {
         int doc = docs[i];
         if (!other.exists(doc)) arr[count++] = doc;
       }
       return new SortedIntDocSet(arr,count);
     }

     int[] otherDocs = ((SortedIntDocSet)other).docs;
     int[] arr = new int[docs.length];
     int sz = andNot(docs, docs.length, otherDocs, otherDocs.length, arr);
     return new SortedIntDocSet(arr,sz);
   }

   @Override
   public void addAllTo(DocSet target) {
     for (int doc : docs) {
       target.add(doc);
     }
   }

   @Override
   public boolean exists(int doc) {
     // this could be faster by estimating where in the list the doc is likely to appear,
     // but we should get away from using exists() anyway.
     int low = 0;
     int high = docs.length-1;
     // binary search
     while (low <= high) {
       int mid = (low+high) >>> 1;
       int docb = docs[mid];

       if (docb < doc) {
         low = mid+1;
       }
       else if (docb > doc) {
         high = mid-1;
       }
       else {
         return true;
       }
     }
     return false;
   }


   @Override
   public DocIterator iterator() {
     return new DocIterator() {
       int pos=0;
       @Override
       public boolean hasNext() {
         return pos < docs.length;
       }

       @Override
       public Integer next() {
         return nextDoc();
       }

       /**
        * The remove  operation is not supported by this Iterator.
        */
       @Override
       public void remove() {
         throw new UnsupportedOperationException("The remove  operation is not supported by this Iterator.");
       }

       @Override
       public int nextDoc() {
         return docs[pos++];
       }

       @Override
       public float score() {
         return 0.0f;
       }
     };
   }

   @Override
   public FixedBitSet getBits() {
     int maxDoc = size() > 0 ? docs[size()-1] : 0;
     FixedBitSet bs = new FixedBitSet(maxDoc+1);
     for (int doc : docs) {
       bs.set(doc);
     }
     return bs;
   }

   public static int findIndex(int[] arr, int value, int low, int high) {
     // binary search
     while (low <= high) {
       int mid = (low+high) >>> 1;
       int found = arr[mid];

       if (found < value) {
         low = mid+1;
       }
       else if (found > value) {
         high = mid-1;
       }
       else {
         return mid;
       }
     }
     return low;
   }

   @Override
   public Filter getTopFilter() {
     return new Filter() {
       int lastEndIdx = 0;

       @Override
       public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) {
         LeafReader reader = context.reader();
         // all Solr DocSets that are used as filters only include live docs
         final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);

         final int base = context.docBase;
         final int maxDoc = reader.maxDoc();
         final int max = base + maxDoc;   // one past the max doc in this segment.
         int sidx = Math.max(0,lastEndIdx);

         if (sidx > 0 && docs[sidx-1] >= base) {
           // oops, the lastEndIdx isn't correct... we must have been used
           // in a multi-threaded context, or the indexreaders are being
           // used out-of-order.  start at 0.
           sidx = 0;
         }
         if (sidx < docs.length && docs[sidx] < base) {
           // if docs[sidx] is < base, we need to seek to find the real start.
           sidx = findIndex(docs, base, sidx, docs.length-1);
         }

         final int startIdx = sidx;

         // Largest possible end index is limited to the start index
         // plus the number of docs contained in the segment.  Subtract 1 since
         // the end index is inclusive.
         int eidx = Math.min(docs.length, startIdx + maxDoc) - 1;

         // find the real end
         eidx = findIndex(docs, max, startIdx, eidx) - 1;

         final int endIdx = eidx;
         lastEndIdx = endIdx;


         return BitsFilteredDocIdSet.wrap(new DocIdSet() {
           @Override
           public DocIdSetIterator iterator() {
             return new DocIdSetIterator() {
               int idx = startIdx;
               int adjustedDoc = -1;

               @Override
               public int docID() {
                 return adjustedDoc;
               }

               @Override
               public int nextDoc() {
                 return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
               }

               @Override
               public int advance(int target) {
                 if (idx > endIdx || target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
                 target += base;

                 // probe next
                 int rawDoc = docs[idx++];
                 if (rawDoc >= target) return adjustedDoc=rawDoc-base;

                 int high = endIdx;

                 // TODO: probe more before resorting to binary search?

                 // binary search
                 while (idx <= high) {
                   int mid = (idx+high) >>> 1;
                   rawDoc = docs[mid];

                   if (rawDoc < target) {
                     idx = mid+1;
                   }
                   else if (rawDoc > target) {
                     high = mid-1;
                   }
                   else {
                     idx=mid+1;
                     return adjustedDoc=rawDoc - base;
                   }
                 }

                 // low is on the insertion point...
                 if (idx <= endIdx) {
                   return adjustedDoc = docs[idx++] - base;
                 } else {
                   return adjustedDoc=NO_MORE_DOCS;
                 }
               }

               @Override
               public long cost() {
                 return docs.length;
               }
             };
           }

           @Override
           public long ramBytesUsed() {
             return RamUsageEstimator.sizeOf(docs);
           }

           @Override
           public Bits bits() {
             // random access is expensive for this set
             return null;
           }

         }, acceptDocs2);
       }
       @Override
       public String toString(String field) {
         return "SortedIntDocSetTopFilter";
       }

       // Equivalence should/could be based on docs here? How did it work previously?

       @Override
       public boolean equals(Object other) {
         return other == this;
       }

       @Override
       public int hashCode() {
         return System.identityHashCode(this);
       }
     };
   }

   @Override
   public SortedIntDocSet clone() {
     return new SortedIntDocSet(docs.clone());
   }

   @Override
   public long ramBytesUsed() {
     return BASE_RAM_BYTES_USED + (docs.length << 2);
   }

   @Override
   public Collection<Accountable> getChildResources() {
     return Collections.emptyList();
   }

   @Override
   public String toString() {
     return "SortedIntDocSet{" +
         "size=" + size() + "," +
         "ramUsed=" + RamUsageEstimator.humanReadableUnits(ramBytesUsed())+
         '}';
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.search;

	import java.util.Collection;
	import java.util.Collections;

	import org.apache.lucene.index.LeafReader;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.search.DocIdSet;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.util.Accountable;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.FixedBitSet;
	import org.apache.lucene.util.RamUsageEstimator;

	/**
	* <code>SortedIntDocSet</code> represents a sorted set of Lucene Document Ids.
	*/
	public class SortedIntDocSet extends DocSetBase {
	private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SortedIntDocSet.class) + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;

	protected final int[] docs;

	/**
	* @param docs Sorted list of ids
	*/
	public SortedIntDocSet(int[] docs) {
	this.docs = docs;
	}

	/**
	* @param docs Sorted list of ids
	* @param len Number of ids in the list
	*/
	public SortedIntDocSet(int[] docs, int len) {
	this(shrink(docs,len));
	}

	public int[] getDocs() { return docs; }

	@Override
	public int size() { return docs.length; }

	public static int[] zeroInts = new int[0];
	public static SortedIntDocSet zero = new SortedIntDocSet(zeroInts);

	public static int[] shrink(int[] arr, int newSize) {
	if (arr.length == newSize) return arr;
	int[] newArr = new int[newSize];
	System.arraycopy(arr, 0, newArr, 0, newSize);
	return newArr;
	}

	/** Returns the index of the first non-sorted element or -1 if they are all sorted */
	public static int firstNonSorted(int[] arr, int offset, int len) {
	if (len <= 1) return -1;
	int lower = arr[offset];
	int end = offset + len;
	for(int i=offset+1; i<end; i++) {
	int next = arr[i];
	if (next <= lower) {
	for (int j=i-1; j>offset; j--) {
	if (arr[j]<next) return j+1;
	}
	return offset;
	}
	lower = next;
	}
	return -1;
	}

	public static int intersectionSize(int[] smallerSortedList, int[] biggerSortedList) {
	final int a[] = smallerSortedList;
	final int b[] = biggerSortedList;

	// The next doc we are looking for will be much closer to the last position we tried
	// than it will be to the midpoint between last and high... so probe ahead using
	// a function of the ratio of the sizes of the sets.
	int step = (b.length/a.length)+1;

	// Since the majority of probes should be misses, we'll already be above the last probe
	// and shouldn't need to move larger than the step size on average to step over our target (and thus lower
	// the high upper bound a lot.)... but if we don't go over our target, it's a big miss... so double it.
	step = step + step;

	// FUTURE: come up with a density such that target * density == likely position?
	// then check step on one side or the other?
	// (density could be cached in the DocSet)... length/maxDoc

	// FUTURE: try partitioning like a sort algorithm. Pick the midpoint of the big
	// array, find where that should be in the small array, and then recurse with
	// the top and bottom half of both arrays until they are small enough to use
	// a fallback intersection method.
	// NOTE: I tried this and it worked, but it was actually slower than this current
	// highly optimized approach.

	int icount = 0;
	int low = 0;
	int max = b.length-1;

	for (int i=0; i<a.length; i++) {
	int doca = a[i];

	int high = max;

	int probe = low + step; // 40% improvement!

	// short linear probe to see if we can drop the high pointer in one big jump.
	if (probe<high) {
	if (b[probe]>=doca) {
	// success! we cut down the upper bound by a lot in one step!
	high=probe;
	} else {
	// relative failure... we get to move the low pointer, but not my much
	low=probe+1;

	// reprobe worth it? it appears so!
	probe = low + step;
	if (probe<high) {
	if (b[probe]>=doca) {
	high=probe;
	} else {
	low=probe+1;
	}
	}
	}
	}

	// binary search the rest of the way
	while (low <= high) {
	int mid = (low+high) >>> 1;
	int docb = b[mid];

	if (docb < doca) {
	low = mid+1;
	}
	else if (docb > doca) {
	high = mid-1;
	}
	else {
	icount++;
	low = mid+1; // found it, so start at next element
	break;
	}
	}
	// Didn't find it... low is now positioned on the insertion point,
	// which is higher than what we were looking for, so continue using
	// the same low point.
	}

	return icount;
	}


	public static boolean intersects(int[] smallerSortedList, int[] biggerSortedList) {
	// see intersectionSize for more in-depth comments of this algorithm

	final int a[] = smallerSortedList;
	final int b[] = biggerSortedList;

	int step = (b.length/a.length)+1;

	step = step + step;

	int low = 0;
	int max = b.length-1;

	for (int i=0; i<a.length; i++) {
	int doca = a[i];
	int high = max;
	int probe = low + step;
	if (probe<high) {
	if (b[probe]>=doca) {
	high=probe;
	} else {
	low=probe+1;
	probe = low + step;
	if (probe<high) {
	if (b[probe]>=doca) {
	high=probe;
	} else {
	low=probe+1;
	}
	}
	}
	}

	while (low <= high) {
	int mid = (low+high) >>> 1;
	int docb = b[mid];

	if (docb < doca) {
	low = mid+1;
	}
	else if (docb > doca) {
	high = mid-1;
	}
	else {
	return true;
	}
	}
	}

	return false;
	}

	@Override
	public int intersectionSize(DocSet other) {
	if (!(other instanceof SortedIntDocSet)) {
	// assume other implementations are better at random access than we are,
	// true of BitDocSet and HashDocSet.
	int icount = 0;
	for (int i=0; i<docs.length; i++) {
	if (other.exists(docs[i])) icount++;
	}
	return icount;
	}

	// make "a" the smaller set.
	int[] otherDocs = ((SortedIntDocSet)other).docs;
	final int[] a = docs.length < otherDocs.length ? docs : otherDocs;
	final int[] b = docs.length < otherDocs.length ? otherDocs : docs;

	if (a.length==0) return 0;

	// if b is 8 times bigger than a, use the modified binary search.
	if ((b.length>>3) >= a.length) {
	return intersectionSize(a, b);
	}

	// if they are close in size, just do a linear walk of both.
	int icount=0;
	int i=0,j=0;
	int doca=a[i],docb=b[j];
	for(;;) {
	// switch on the sign bit somehow? Hopefully JVM is smart enough to just test once.

	// Since set a is less dense then set b, doca is likely to be greater than docb so
	// check that case first. This resulted in a 13% speedup.
	if (doca > docb) {
	if (++j >= b.length) break;
	docb=b[j];
	} else if (doca < docb) {
	if (++i >= a.length) break;
	doca=a[i];
	} else {
	icount++;
	if (++i >= a.length) break;
	doca=a[i];
	if (++j >= b.length) break;
	docb=b[j];
	}
	}
	return icount;
	}

	@Override
	public boolean intersects(DocSet other) {
	if (!(other instanceof SortedIntDocSet)) {
	// assume other implementations are better at random access than we are,
	// true of BitDocSet and HashDocSet.
	for (int i=0; i<docs.length; i++) {
	if (other.exists(docs[i])) return true;
	}
	return false;
	}

	// make "a" the smaller set.
	int[] otherDocs = ((SortedIntDocSet)other).docs;
	final int[] a = docs.length < otherDocs.length ? docs : otherDocs;
	final int[] b = docs.length < otherDocs.length ? otherDocs : docs;

	if (a.length==0) return false;

	// if b is 8 times bigger than a, use the modified binary search.
	if ((b.length>>3) >= a.length) {
	return intersects(a,b);
	}

	// if they are close in size, just do a linear walk of both.
	int i=0,j=0;
	int doca=a[i],docb=b[j];
	for(;;) {
	// switch on the sign bit somehow? Hopefull JVM is smart enough to just test once.

	// Since set a is less dense then set b, doca is likely to be greater than docb so
	// check that case first. This resulted in a 13% speedup.
	if (doca > docb) {
	if (++j >= b.length) break;
	docb=b[j];
	} else if (doca < docb) {
	if (++i >= a.length) break;
	doca=a[i];
	} else {
	return true;
	}
	}
	return false;
	}

	/** puts the intersection of a and b into the target array and returns the size */
	public static int intersection(int a[], int lena, int b[], int lenb, int[] target) {
	if (lena > lenb) {
	int ti=lena; lena=lenb; lenb=ti;
	int[] ta=a; a=b; b=ta;
	}

	if (lena==0) return 0;


	// if b is 8 times bigger than a, use the modified binary search.
	if ((lenb>>3) >= lena) {
	return intersectionBinarySearch(a, lena, b, lenb, target);
	}

	int icount=0;
	int i=0,j=0;
	int doca=a[i],docb=b[j];
	for(;;) {
	if (doca > docb) {
	if (++j >= lenb) break;
	docb=b[j];
	} else if (doca < docb) {
	if (++i >= lena) break;
	doca=a[i];
	} else {
	target[icount++] = doca;
	if (++i >= lena) break;
	doca=a[i];
	if (++j >= lenb) break;
	docb=b[j];
	}
	}
	return icount;
	}

	/** Puts the intersection of a and b into the target array and returns the size.
	* lena should be smaller than lenb */
	protected static int intersectionBinarySearch(int[] a, int lena, int[] b, int lenb, int[] target) {
	int step = (lenb/lena)+1;
	step = step + step;


	int icount = 0;
	int low = 0;
	int max = lenb-1;

	for (int i=0; i<lena; i++) {
	int doca = a[i];

	int high = max;

	int probe = low + step; // 40% improvement!

	// short linear probe to see if we can drop the high pointer in one big jump.
	if (probe<high) {
	if (b[probe]>=doca) {
	// success! we cut down the upper bound by a lot in one step!
	high=probe;
	} else {
	// relative failure... we get to move the low pointer, but not my much
	low=probe+1;

	// reprobe worth it? it appears so!
	probe = low + step;
	if (probe<high) {
	if (b[probe]>=doca) {
	high=probe;
	} else {
	low=probe+1;
	}
	}
	}
	}


	// binary search
	while (low <= high) {
	int mid = (low+high) >>> 1;
	int docb = b[mid];

	if (docb < doca) {
	low = mid+1;
	}
	else if (docb > doca) {
	high = mid-1;
	}
	else {
	target[icount++] = doca;
	low = mid+1; // found it, so start at next element
	break;
	}
	}
	// Didn't find it... low is now positioned on the insertion point,
	// which is higher than what we were looking for, so continue using
	// the same low point.
	}

	return icount;
	}

	@Override
	public DocSet intersection(DocSet other) {
	if (!(other instanceof SortedIntDocSet)) {
	int icount = 0;
	int arr[] = new int[docs.length];
	for (int i=0; i<docs.length; i++) {
	int doc = docs[i];
	if (other.exists(doc)) arr[icount++] = doc;
	}
	return new SortedIntDocSet(arr,icount);
	}

	int[] otherDocs = ((SortedIntDocSet)other).docs;
	int maxsz = Math.min(docs.length, otherDocs.length);
	int[] arr = new int[maxsz];
	int sz = intersection(docs, docs.length, otherDocs, otherDocs.length, arr);
	return new SortedIntDocSet(arr,sz);
	}


	protected static int andNotBinarySearch(int a[], int lena, int b[], int lenb, int[] target) {
	int step = (lenb/lena)+1;
	step = step + step;


	int count = 0;
	int low = 0;
	int max = lenb-1;

	outer:
	for (int i=0; i<lena; i++) {
	int doca = a[i];

	int high = max;

	int probe = low + step; // 40% improvement!

	// short linear probe to see if we can drop the high pointer in one big jump.
	if (probe<high) {
	if (b[probe]>=doca) {
	// success! we cut down the upper bound by a lot in one step!
	high=probe;
	} else {
	// relative failure... we get to move the low pointer, but not my much
	low=probe+1;

	// reprobe worth it? it appears so!
	probe = low + step;
	if (probe<high) {
	if (b[probe]>=doca) {
	high=probe;
	} else {
	low=probe+1;
	}
	}
	}
	}


	// binary search
	while (low <= high) {
	int mid = (low+high) >>> 1;
	int docb = b[mid];

	if (docb < doca) {
	low = mid+1;
	}
	else if (docb > doca) {
	high = mid-1;
	}
	else {
	low = mid+1; // found it, so start at next element
	continue outer;
	}
	}
	// Didn't find it... low is now positioned on the insertion point,
	// which is higher than what we were looking for, so continue using
	// the same low point.
	target[count++] = doca;
	}

	return count;
	}

	/** puts the intersection of a and not b into the target array and returns the size */
	public static int andNot(int a[], int lena, int b[], int lenb, int[] target) {
	if (lena==0) return 0;
	if (lenb==0) {
	System.arraycopy(a,0,target,0,lena);
	return lena;
	}

	// if b is 8 times bigger than a, use the modified binary search.
	if ((lenb>>3) >= lena) {
	return andNotBinarySearch(a, lena, b, lenb, target);
	}

	int count=0;
	int i=0,j=0;
	int doca=a[i],docb=b[j];
	for(;;) {
	if (doca > docb) {
	if (++j >= lenb) break;
	docb=b[j];
	} else if (doca < docb) {
	target[count++] = doca;
	if (++i >= lena) break;
	doca=a[i];
	} else {
	if (++i >= lena) break;
	doca=a[i];
	if (++j >= lenb) break;
	docb=b[j];
	}
	}

	int leftover=lena - i;

	if (leftover > 0) {
	System.arraycopy(a,i,target,count,leftover);
	count += leftover;
	}

	return count;
	}

	@Override
	public DocSet andNot(DocSet other) {
	if (other.size()==0) return this;

	if (!(other instanceof SortedIntDocSet)) {
	int count = 0;
	int arr[] = new int[docs.length];
	for (int i=0; i<docs.length; i++) {
	int doc = docs[i];
	if (!other.exists(doc)) arr[count++] = doc;
	}
	return new SortedIntDocSet(arr,count);
	}

	int[] otherDocs = ((SortedIntDocSet)other).docs;
	int[] arr = new int[docs.length];
	int sz = andNot(docs, docs.length, otherDocs, otherDocs.length, arr);
	return new SortedIntDocSet(arr,sz);
	}

	@Override
	public void addAllTo(DocSet target) {
	for (int doc : docs) {
	target.add(doc);
	}
	}

	@Override
	public boolean exists(int doc) {
	// this could be faster by estimating where in the list the doc is likely to appear,
	// but we should get away from using exists() anyway.
	int low = 0;
	int high = docs.length-1;
	// binary search
	while (low <= high) {
	int mid = (low+high) >>> 1;
	int docb = docs[mid];

	if (docb < doc) {
	low = mid+1;
	}
	else if (docb > doc) {
	high = mid-1;
	}
	else {
	return true;
	}
	}
	return false;
	}


	@Override
	public DocIterator iterator() {
	return new DocIterator() {
	int pos=0;
	@Override
	public boolean hasNext() {
	return pos < docs.length;
	}

	@Override
	public Integer next() {
	return nextDoc();
	}

	/**
	* The remove operation is not supported by this Iterator.
	*/
	@Override
	public void remove() {
	throw new UnsupportedOperationException("The remove operation is not supported by this Iterator.");
	}

	@Override
	public int nextDoc() {
	return docs[pos++];
	}

	@Override
	public float score() {
	return 0.0f;
	}
	};
	}

	@Override
	public FixedBitSet getBits() {
	int maxDoc = size() > 0 ? docs[size()-1] : 0;
	FixedBitSet bs = new FixedBitSet(maxDoc+1);
	for (int doc : docs) {
	bs.set(doc);
	}
	return bs;
	}

	public static int findIndex(int[] arr, int value, int low, int high) {
	// binary search
	while (low <= high) {
	int mid = (low+high) >>> 1;
	int found = arr[mid];

	if (found < value) {
	low = mid+1;
	}
	else if (found > value) {
	high = mid-1;
	}
	else {
	return mid;
	}
	}
	return low;
	}

	@Override
	public Filter getTopFilter() {
	return new Filter() {
	int lastEndIdx = 0;

	@Override
	public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) {
	LeafReader reader = context.reader();
	// all Solr DocSets that are used as filters only include live docs
	final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);

	final int base = context.docBase;
	final int maxDoc = reader.maxDoc();
	final int max = base + maxDoc; // one past the max doc in this segment.
	int sidx = Math.max(0,lastEndIdx);

	if (sidx > 0 && docs[sidx-1] >= base) {
	// oops, the lastEndIdx isn't correct... we must have been used
	// in a multi-threaded context, or the indexreaders are being
	// used out-of-order. start at 0.
	sidx = 0;
	}
	if (sidx < docs.length && docs[sidx] < base) {
	// if docs[sidx] is < base, we need to seek to find the real start.
	sidx = findIndex(docs, base, sidx, docs.length-1);
	}

	final int startIdx = sidx;

	// Largest possible end index is limited to the start index
	// plus the number of docs contained in the segment. Subtract 1 since
	// the end index is inclusive.
	int eidx = Math.min(docs.length, startIdx + maxDoc) - 1;

	// find the real end
	eidx = findIndex(docs, max, startIdx, eidx) - 1;

	final int endIdx = eidx;
	lastEndIdx = endIdx;


	return BitsFilteredDocIdSet.wrap(new DocIdSet() {
	@Override
	public DocIdSetIterator iterator() {
	return new DocIdSetIterator() {
	int idx = startIdx;
	int adjustedDoc = -1;

	@Override
	public int docID() {
	return adjustedDoc;
	}

	@Override
	public int nextDoc() {
	return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
	}

	@Override
	public int advance(int target) {
	if (idx > endIdx \|\| target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
	target += base;

	// probe next
	int rawDoc = docs[idx++];
	if (rawDoc >= target) return adjustedDoc=rawDoc-base;

	int high = endIdx;

	// TODO: probe more before resorting to binary search?

	// binary search
	while (idx <= high) {
	int mid = (idx+high) >>> 1;
	rawDoc = docs[mid];

	if (rawDoc < target) {
	idx = mid+1;
	}
	else if (rawDoc > target) {
	high = mid-1;
	}
	else {
	idx=mid+1;
	return adjustedDoc=rawDoc - base;
	}
	}

	// low is on the insertion point...
	if (idx <= endIdx) {
	return adjustedDoc = docs[idx++] - base;
	} else {
	return adjustedDoc=NO_MORE_DOCS;
	}
	}

	@Override
	public long cost() {
	return docs.length;
	}
	};
	}

	@Override
	public long ramBytesUsed() {
	return RamUsageEstimator.sizeOf(docs);
	}

	@Override
	public Bits bits() {
	// random access is expensive for this set
	return null;
	}

	}, acceptDocs2);
	}
	@Override
	public String toString(String field) {
	return "SortedIntDocSetTopFilter";
	}

	// Equivalence should/could be based on docs here? How did it work previously?

	@Override
	public boolean equals(Object other) {
	return other == this;
	}

	@Override
	public int hashCode() {
	return System.identityHashCode(this);
	}
	};
	}

	@Override
	public SortedIntDocSet clone() {
	return new SortedIntDocSet(docs.clone());
	}

	@Override
	public long ramBytesUsed() {
	return BASE_RAM_BYTES_USED + (docs.length << 2);
	}

	@Override
	public Collection<Accountable> getChildResources() {
	return Collections.emptyList();
	}

	@Override
	public String toString() {
	return "SortedIntDocSet{" +
	"size=" + size() + "," +
	"ramUsed=" + RamUsageEstimator.humanReadableUnits(ramBytesUsed())+
	'}';
	}
	}