lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeCounter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.facet.range;

 import java.util.Arrays;
 import java.util.Comparator;

 /**
  * Counter for numeric ranges. Works for both single- and multi-valued cases (assuming you use it
  * correctly).
  *
  * <p>Usage notes: When counting a document field that only has a single value, callers should call
  * addSingleValued() with the value. Whenever a document field has multiple values, callers should
  * call startMultiValuedDoc() at the beginning of processing the document, followed by
  * addMultiValued() with each value before finally calling endMultiValuedDoc() at the end of
  * processing the document. The call to endMultiValuedDoc() will respond with a boolean indicating
  * whether-or-not the specific document matched against at least one of the ranges being counted.
  * Finally, after processing all documents, the caller should call finish(). This final call will
  * ensure the contents of the user-provided {@code countBuffer} contains accurate counts (each index
  * corresponding to the provided {@code LongRange} in {@code ranges}). The final call to finish()
  * will also report how many additional documents did not match against any ranges. The combination
  * of the endMultiValuedDoc() boolean responses and the number reported by finish() communicates the
  * total number of missing documents. Note that the call to finish() will not report any documents
  * already reported missing by endMultiValuedDoc().
  */
 abstract class LongRangeCounter {

   /** accumulated counts for all of the ranges */
   private final int[] countBuffer;

   /**
    * for multi-value docs, we keep track of the last elementary interval we've counted so we can use
    * that as a lower-bound when counting subsequent values. this takes advantage of the fact that
    * values within a given doc are sorted.
    */
   protected int multiValuedDocLastSeenElementaryInterval;

   static LongRangeCounter create(LongRange[] ranges, int[] countBuffer) {
     if (hasOverlappingRanges(ranges)) {
       return new OverlappingLongRangeCounter(ranges, countBuffer);
     } else {
       return new ExclusiveLongRangeCounter(ranges, countBuffer);
     }
   }

   protected LongRangeCounter(int[] countBuffer) {
     // We'll populate the user-provided count buffer with range counts:
     this.countBuffer = countBuffer;
   }

   /** Start processing a new doc. It's unnecessary to call this for single-value cases. */
   void startMultiValuedDoc() {
     multiValuedDocLastSeenElementaryInterval = -1;
   }

   /**
    * Finish processing a new doc. Returns whether-or-not the document contributed a count to at
    * least one range. It's unnecessary to call this for single-value cases.
    */
   abstract boolean endMultiValuedDoc();

   /** Count a single valued doc */
   void addSingleValued(long v) {

     // NOTE: this works too, but it's ~6% slower on a simple
     // test with a high-freq TermQuery w/ range faceting on
     // wikimediumall:
     /*
     int index = Arrays.binarySearch(boundaries, v);
     if (index < 0) {
       index = -index-1;
     }
     leafCounts[index]++;
     */

     // Binary search to find matched elementary range; we
     // are guaranteed to find a match because the last
     // boundary is Long.MAX_VALUE:

     long[] boundaries = boundaries();

     int lo = 0;
     int hi = boundaries.length - 1;
     while (true) {
       int mid = (lo + hi) >>> 1;
       if (v <= boundaries[mid]) {
         if (mid == 0) {
           processSingleValuedHit(mid);
           return;
         } else {
           hi = mid - 1;
         }
       } else if (v > boundaries[mid + 1]) {
         lo = mid + 1;
       } else {
         processSingleValuedHit(mid + 1);
         return;
       }
     }
   }

   /** Count a multi-valued doc value */
   void addMultiValued(long v) {

     if (rangeCount() == 0) {
       return; // don't bother if there aren't any requested ranges
     }

     long[] boundaries = boundaries();

     // First check if we've "advanced" beyond the last elementary interval we counted for this doc.
     // If we haven't, there's no sense doing anything else:
     if (multiValuedDocLastSeenElementaryInterval != -1
         && v <= boundaries[multiValuedDocLastSeenElementaryInterval]) {
       return;
     }

     // Also check if we've already counted the last elementary interval. If so, there's nothing
     // else to count for this doc:
     final int nextCandidateElementaryInterval = multiValuedDocLastSeenElementaryInterval + 1;
     if (nextCandidateElementaryInterval == boundaries.length) {
       return;
     }

     // Binary search in the range of the next candidate interval up to the last interval:
     int lo = nextCandidateElementaryInterval;
     int hi = boundaries.length - 1;
     while (true) {
       int mid = (lo + hi) >>> 1;
       if (v <= boundaries[mid]) {
         if (mid == nextCandidateElementaryInterval) {
           processMultiValuedHit(mid);
           multiValuedDocLastSeenElementaryInterval = mid;
           return;
         } else {
           hi = mid - 1;
         }
       } else if (v > boundaries[mid + 1]) {
         lo = mid + 1;
       } else {
         int idx = mid + 1;
         processMultiValuedHit(idx);
         multiValuedDocLastSeenElementaryInterval = idx;
         return;
       }
     }
   }

   /**
    * Finish processing all documents. This will return the number of docs that didn't contribute to
    * any ranges (that weren't already reported when calling endMultiValuedDoc()).
    */
   abstract int finish();

   /** Provide boundary information for elementary intervals (max inclusive value per interval) */
   protected abstract long[] boundaries();

   /** Process a single-value "hit" against an elementary interval. */
   protected abstract void processSingleValuedHit(int elementaryIntervalNum);

   /** Process a multi-value "hit" against an elementary interval. */
   protected abstract void processMultiValuedHit(int elementaryIntervalNum);

   /** Increment the specified range by one. */
   protected final void increment(int rangeNum) {
     countBuffer[rangeNum]++;
   }

   /** Increment the specified range by the specified count. */
   protected final void increment(int rangeNum, int count) {
     countBuffer[rangeNum] += count;
   }

   /** Number of ranges requested by the caller. */
   protected final int rangeCount() {
     return countBuffer.length;
   }

   /** Determine whether-or-not any requested ranges overlap */
   private static boolean hasOverlappingRanges(LongRange[] ranges) {
     if (ranges.length == 0) {
       return false;
     }

     // Copy before sorting so we don't mess with the caller's original ranges:
     LongRange[] sortedRanges = new LongRange[ranges.length];
     System.arraycopy(ranges, 0, sortedRanges, 0, ranges.length);
     Arrays.sort(sortedRanges, Comparator.comparingLong(r -> r.min));

     long previousMax = sortedRanges[0].max;
     for (int i = 1; i < sortedRanges.length; i++) {
       // Ranges overlap if the next min is <= the previous max (note that LongRange models
       // closed ranges, so equal limit points are considered overlapping):
       if (sortedRanges[i].min <= previousMax) {
         return true;
       }
       previousMax = sortedRanges[i].max;
     }

     return false;
   }

   protected static final class InclusiveRange {
     final long start;
     final long end;

     InclusiveRange(long start, long end) {
       assert end >= start;
       this.start = start;
       this.end = end;
     }

     @Override
     public String toString() {
       return start + " to " + end;
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.facet.range;

	import java.util.Arrays;
	import java.util.Comparator;

	/**
	* Counter for numeric ranges. Works for both single- and multi-valued cases (assuming you use it
	* correctly).
	*
	* <p>Usage notes: When counting a document field that only has a single value, callers should call
	* addSingleValued() with the value. Whenever a document field has multiple values, callers should
	* call startMultiValuedDoc() at the beginning of processing the document, followed by
	* addMultiValued() with each value before finally calling endMultiValuedDoc() at the end of
	* processing the document. The call to endMultiValuedDoc() will respond with a boolean indicating
	* whether-or-not the specific document matched against at least one of the ranges being counted.
	* Finally, after processing all documents, the caller should call finish(). This final call will
	* ensure the contents of the user-provided {@code countBuffer} contains accurate counts (each index
	* corresponding to the provided {@code LongRange} in {@code ranges}). The final call to finish()
	* will also report how many additional documents did not match against any ranges. The combination
	* of the endMultiValuedDoc() boolean responses and the number reported by finish() communicates the
	* total number of missing documents. Note that the call to finish() will not report any documents
	* already reported missing by endMultiValuedDoc().
	*/
	abstract class LongRangeCounter {

	/** accumulated counts for all of the ranges */
	private final int[] countBuffer;

	/**
	* for multi-value docs, we keep track of the last elementary interval we've counted so we can use
	* that as a lower-bound when counting subsequent values. this takes advantage of the fact that
	* values within a given doc are sorted.
	*/
	protected int multiValuedDocLastSeenElementaryInterval;

	static LongRangeCounter create(LongRange[] ranges, int[] countBuffer) {
	if (hasOverlappingRanges(ranges)) {
	return new OverlappingLongRangeCounter(ranges, countBuffer);
	} else {
	return new ExclusiveLongRangeCounter(ranges, countBuffer);
	}
	}

	protected LongRangeCounter(int[] countBuffer) {
	// We'll populate the user-provided count buffer with range counts:
	this.countBuffer = countBuffer;
	}

	/** Start processing a new doc. It's unnecessary to call this for single-value cases. */
	void startMultiValuedDoc() {
	multiValuedDocLastSeenElementaryInterval = -1;
	}

	/**
	* Finish processing a new doc. Returns whether-or-not the document contributed a count to at
	* least one range. It's unnecessary to call this for single-value cases.
	*/
	abstract boolean endMultiValuedDoc();

	/** Count a single valued doc */
	void addSingleValued(long v) {

	// NOTE: this works too, but it's ~6% slower on a simple
	// test with a high-freq TermQuery w/ range faceting on
	// wikimediumall:
	/*
	int index = Arrays.binarySearch(boundaries, v);
	if (index < 0) {
	index = -index-1;
	}
	leafCounts[index]++;
	*/

	// Binary search to find matched elementary range; we
	// are guaranteed to find a match because the last
	// boundary is Long.MAX_VALUE:

	long[] boundaries = boundaries();

	int lo = 0;
	int hi = boundaries.length - 1;
	while (true) {
	int mid = (lo + hi) >>> 1;
	if (v <= boundaries[mid]) {
	if (mid == 0) {
	processSingleValuedHit(mid);
	return;
	} else {
	hi = mid - 1;
	}
	} else if (v > boundaries[mid + 1]) {
	lo = mid + 1;
	} else {
	processSingleValuedHit(mid + 1);
	return;
	}
	}
	}

	/** Count a multi-valued doc value */
	void addMultiValued(long v) {

	if (rangeCount() == 0) {
	return; // don't bother if there aren't any requested ranges
	}

	long[] boundaries = boundaries();

	// First check if we've "advanced" beyond the last elementary interval we counted for this doc.
	// If we haven't, there's no sense doing anything else:
	if (multiValuedDocLastSeenElementaryInterval != -1
	&& v <= boundaries[multiValuedDocLastSeenElementaryInterval]) {
	return;
	}

	// Also check if we've already counted the last elementary interval. If so, there's nothing
	// else to count for this doc:
	final int nextCandidateElementaryInterval = multiValuedDocLastSeenElementaryInterval + 1;
	if (nextCandidateElementaryInterval == boundaries.length) {
	return;
	}

	// Binary search in the range of the next candidate interval up to the last interval:
	int lo = nextCandidateElementaryInterval;
	int hi = boundaries.length - 1;
	while (true) {
	int mid = (lo + hi) >>> 1;
	if (v <= boundaries[mid]) {
	if (mid == nextCandidateElementaryInterval) {
	processMultiValuedHit(mid);
	multiValuedDocLastSeenElementaryInterval = mid;
	return;
	} else {
	hi = mid - 1;
	}
	} else if (v > boundaries[mid + 1]) {
	lo = mid + 1;
	} else {
	int idx = mid + 1;
	processMultiValuedHit(idx);
	multiValuedDocLastSeenElementaryInterval = idx;
	return;
	}
	}
	}

	/**
	* Finish processing all documents. This will return the number of docs that didn't contribute to
	* any ranges (that weren't already reported when calling endMultiValuedDoc()).
	*/
	abstract int finish();

	/** Provide boundary information for elementary intervals (max inclusive value per interval) */
	protected abstract long[] boundaries();

	/** Process a single-value "hit" against an elementary interval. */
	protected abstract void processSingleValuedHit(int elementaryIntervalNum);

	/** Process a multi-value "hit" against an elementary interval. */
	protected abstract void processMultiValuedHit(int elementaryIntervalNum);

	/** Increment the specified range by one. */
	protected final void increment(int rangeNum) {
	countBuffer[rangeNum]++;
	}

	/** Increment the specified range by the specified count. */
	protected final void increment(int rangeNum, int count) {
	countBuffer[rangeNum] += count;
	}

	/** Number of ranges requested by the caller. */
	protected final int rangeCount() {
	return countBuffer.length;
	}

	/** Determine whether-or-not any requested ranges overlap */
	private static boolean hasOverlappingRanges(LongRange[] ranges) {
	if (ranges.length == 0) {
	return false;
	}

	// Copy before sorting so we don't mess with the caller's original ranges:
	LongRange[] sortedRanges = new LongRange[ranges.length];
	System.arraycopy(ranges, 0, sortedRanges, 0, ranges.length);
	Arrays.sort(sortedRanges, Comparator.comparingLong(r -> r.min));

	long previousMax = sortedRanges[0].max;
	for (int i = 1; i < sortedRanges.length; i++) {
	// Ranges overlap if the next min is <= the previous max (note that LongRange models
	// closed ranges, so equal limit points are considered overlapping):
	if (sortedRanges[i].min <= previousMax) {
	return true;
	}
	previousMax = sortedRanges[i].max;
	}

	return false;
	}

	protected static final class InclusiveRange {
	final long start;
	final long end;

	InclusiveRange(long start, long end) {
	assert end >= start;
	this.start = start;
	this.end = end;
	}

	@Override
	public String toString() {
	return start + " to " + end;
	}
	}
	}