lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.facet.range;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;

 import org.apache.lucene.facet.FacetResult;
 import org.apache.lucene.facet.Facets;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.LabelAndValue;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.search.ConjunctionDISI;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Weight;

 /** Base class for range faceting.
  *
  *  @lucene.experimental */
 abstract class RangeFacetCounts extends Facets {
   /** Ranges passed to constructor. */
   protected final Range[] ranges;

   /** Counts, initialized in by subclass. */
   protected final int[] counts;

   /** Optional: if specified, we first test this Query to
    *  see whether the document should be checked for
    *  matching ranges.  If this is null, all documents are
    *  checked. */
   protected final Query fastMatchQuery;

   /** Our field name. */
   protected final String field;

   /** Total number of hits. */
   protected int totCount;

   /** Create {@code RangeFacetCounts} */
   protected RangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) throws IOException {
     this.field = field;
     this.ranges = ranges;
     this.fastMatchQuery = fastMatchQuery;
     counts = new int[ranges.length];
   }

   /**
    * Create a {@link org.apache.lucene.search.DocIdSetIterator} from the provided {@code hits} that
    * relies on {@code fastMatchQuery} if available for first-pass filtering. A null response
    * indicates no documents will match.
    */
   protected DocIdSetIterator createIterator(FacetsCollector.MatchingDocs hits) throws IOException {

     if (fastMatchQuery != null) {

       final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
       final IndexSearcher searcher = new IndexSearcher(topLevelContext);
       searcher.setQueryCache(null);
       final Weight fastMatchWeight =
           searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
       final Scorer s = fastMatchWeight.scorer(hits.context);
       if (s == null) {
         return null; // no hits from the fastMatchQuery; return null
       } else {
         DocIdSetIterator fastMatchDocs = s.iterator();
         return ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), fastMatchDocs));
       }

     } else {
       return hits.bits.iterator();
     }
   }

   protected abstract LongRange[] getLongRanges();

   /** Allow sub-classes to (optionally) map from the stored long bits to a long that should be
    * used for the actual counting. Default behavior is a no-op.
    */
   protected long mapDocValue(long l) {
     return l;
   }

   /** Counts from the provided field. */
   protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
       throws IOException {

     // load doc values for all segments up front and keep track of whether-or-not we found any that
     // were actually multi-valued. this allows us to optimize the case where all segments contain
     // single-values.
     SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
     NumericDocValues[] singleValuedDocVals = null;
     boolean foundMultiValued = false;

     for (int i = 0; i < matchingDocs.size(); i++) {

       FacetsCollector.MatchingDocs hits = matchingDocs.get(i);

       SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
       multiValuedDocVals[i] = multiValues;

       // only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases
       if (foundMultiValued == false) {
         NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues);
         if (singleValues != null) {
           if (singleValuedDocVals == null) {
             singleValuedDocVals = new NumericDocValues[matchingDocs.size()];
           }
           singleValuedDocVals[i] = singleValues;
         } else {
           foundMultiValued = true;
         }
       }
     }

     // we only need to keep around one or the other at this point
     if (foundMultiValued) {
       singleValuedDocVals = null;
     } else {
       multiValuedDocVals = null;
     }

     LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts);

     int missingCount = 0;

     // if we didn't find any multi-valued cases, we can run a more optimal counting algorithm
     if (foundMultiValued == false) {

       for (int i = 0; i < matchingDocs.size(); i++) {

         FacetsCollector.MatchingDocs hits = matchingDocs.get(i);

         final DocIdSetIterator it = createIterator(hits);
         if (it == null) {
           continue;
         }

         assert singleValuedDocVals != null;
         NumericDocValues singleValues = singleValuedDocVals[i];

         totCount += hits.totalHits;
         for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
           if (singleValues.advanceExact(doc)) {
             counter.addSingleValued(mapDocValue(singleValues.longValue()));
           } else {
             missingCount++;
           }

           doc = it.nextDoc();
         }
       }
     } else {

       for (int i = 0; i < matchingDocs.size(); i++) {

         final DocIdSetIterator it = createIterator(matchingDocs.get(i));
         if (it == null) {
           continue;
         }

         SortedNumericDocValues multiValues = multiValuedDocVals[i];

         for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
           if (multiValues.advanceExact(doc)) {
             int limit = multiValues.docValueCount();
             // optimize single-value case
             if (limit == 1) {
               counter.addSingleValued(mapDocValue(multiValues.nextValue()));
               totCount++;
             } else {
               counter.startMultiValuedDoc();
               for (int j = 0; j < limit; j++) {
                 counter.addMultiValued(mapDocValue(multiValues.nextValue()));
               }
               if (counter.endMultiValuedDoc()) {
                 totCount++;
               }
             }
           }

           doc = it.nextDoc();
         }
       }
     }

     missingCount += counter.finish();
     totCount -= missingCount;
   }

   @Override
   public FacetResult getTopChildren(int topN, String dim, String... path) {
     if (dim.equals(field) == false) {
       throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
     }
     if (path.length != 0) {
       throw new IllegalArgumentException("path.length should be 0");
     }
     LabelAndValue[] labelValues = new LabelAndValue[counts.length];
     for(int i=0;i<counts.length;i++) {
       labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
     }
     return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
   }

   @Override
   public Number getSpecificValue(String dim, String... path) throws IOException {
     // TODO: should we impl this?
     throw new UnsupportedOperationException();
   }

   @Override
   public List<FacetResult> getAllDims(int topN) throws IOException {
     return Collections.singletonList(getTopChildren(topN, field));
   }

   @Override
   public String toString() {
     StringBuilder b = new StringBuilder();
     b.append("RangeFacetCounts totCount=");
     b.append(totCount);
     b.append(":\n");
     for(int i=0;i<ranges.length;i++) {
       b.append("  ");
       b.append(ranges[i].label);
       b.append(" -> count=");
       b.append(counts[i]);
       b.append('\n');
     }
     return b.toString();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.facet.range;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Collections;
	import java.util.List;

	import org.apache.lucene.facet.FacetResult;
	import org.apache.lucene.facet.Facets;
	import org.apache.lucene.facet.FacetsCollector;
	import org.apache.lucene.facet.LabelAndValue;
	import org.apache.lucene.index.DocValues;
	import org.apache.lucene.index.IndexReaderContext;
	import org.apache.lucene.index.NumericDocValues;
	import org.apache.lucene.index.ReaderUtil;
	import org.apache.lucene.index.SortedNumericDocValues;
	import org.apache.lucene.search.ConjunctionDISI;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreMode;
	import org.apache.lucene.search.Scorer;
	import org.apache.lucene.search.Weight;

	/** Base class for range faceting.
	*
	* @lucene.experimental */
	abstract class RangeFacetCounts extends Facets {
	/** Ranges passed to constructor. */
	protected final Range[] ranges;

	/** Counts, initialized in by subclass. */
	protected final int[] counts;

	/** Optional: if specified, we first test this Query to
	* see whether the document should be checked for
	* matching ranges. If this is null, all documents are
	* checked. */
	protected final Query fastMatchQuery;

	/** Our field name. */
	protected final String field;

	/** Total number of hits. */
	protected int totCount;

	/** Create {@code RangeFacetCounts} */
	protected RangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) throws IOException {
	this.field = field;
	this.ranges = ranges;
	this.fastMatchQuery = fastMatchQuery;
	counts = new int[ranges.length];
	}

	/**
	* Create a {@link org.apache.lucene.search.DocIdSetIterator} from the provided {@code hits} that
	* relies on {@code fastMatchQuery} if available for first-pass filtering. A null response
	* indicates no documents will match.
	*/
	protected DocIdSetIterator createIterator(FacetsCollector.MatchingDocs hits) throws IOException {

	if (fastMatchQuery != null) {

	final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
	final IndexSearcher searcher = new IndexSearcher(topLevelContext);
	searcher.setQueryCache(null);
	final Weight fastMatchWeight =
	searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
	final Scorer s = fastMatchWeight.scorer(hits.context);
	if (s == null) {
	return null; // no hits from the fastMatchQuery; return null
	} else {
	DocIdSetIterator fastMatchDocs = s.iterator();
	return ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), fastMatchDocs));
	}

	} else {
	return hits.bits.iterator();
	}
	}

	protected abstract LongRange[] getLongRanges();

	/** Allow sub-classes to (optionally) map from the stored long bits to a long that should be
	* used for the actual counting. Default behavior is a no-op.
	*/
	protected long mapDocValue(long l) {
	return l;
	}

	/** Counts from the provided field. */
	protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
	throws IOException {

	// load doc values for all segments up front and keep track of whether-or-not we found any that
	// were actually multi-valued. this allows us to optimize the case where all segments contain
	// single-values.
	SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
	NumericDocValues[] singleValuedDocVals = null;
	boolean foundMultiValued = false;

	for (int i = 0; i < matchingDocs.size(); i++) {

	FacetsCollector.MatchingDocs hits = matchingDocs.get(i);

	SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
	multiValuedDocVals[i] = multiValues;

	// only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases
	if (foundMultiValued == false) {
	NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues);
	if (singleValues != null) {
	if (singleValuedDocVals == null) {
	singleValuedDocVals = new NumericDocValues[matchingDocs.size()];
	}
	singleValuedDocVals[i] = singleValues;
	} else {
	foundMultiValued = true;
	}
	}
	}

	// we only need to keep around one or the other at this point
	if (foundMultiValued) {
	singleValuedDocVals = null;
	} else {
	multiValuedDocVals = null;
	}

	LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts);

	int missingCount = 0;

	// if we didn't find any multi-valued cases, we can run a more optimal counting algorithm
	if (foundMultiValued == false) {

	for (int i = 0; i < matchingDocs.size(); i++) {

	FacetsCollector.MatchingDocs hits = matchingDocs.get(i);

	final DocIdSetIterator it = createIterator(hits);
	if (it == null) {
	continue;
	}

	assert singleValuedDocVals != null;
	NumericDocValues singleValues = singleValuedDocVals[i];

	totCount += hits.totalHits;
	for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
	if (singleValues.advanceExact(doc)) {
	counter.addSingleValued(mapDocValue(singleValues.longValue()));
	} else {
	missingCount++;
	}

	doc = it.nextDoc();
	}
	}
	} else {

	for (int i = 0; i < matchingDocs.size(); i++) {

	final DocIdSetIterator it = createIterator(matchingDocs.get(i));
	if (it == null) {
	continue;
	}

	SortedNumericDocValues multiValues = multiValuedDocVals[i];

	for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
	if (multiValues.advanceExact(doc)) {
	int limit = multiValues.docValueCount();
	// optimize single-value case
	if (limit == 1) {
	counter.addSingleValued(mapDocValue(multiValues.nextValue()));
	totCount++;
	} else {
	counter.startMultiValuedDoc();
	for (int j = 0; j < limit; j++) {
	counter.addMultiValued(mapDocValue(multiValues.nextValue()));
	}
	if (counter.endMultiValuedDoc()) {
	totCount++;
	}
	}
	}

	doc = it.nextDoc();
	}
	}
	}

	missingCount += counter.finish();
	totCount -= missingCount;
	}

	@Override
	public FacetResult getTopChildren(int topN, String dim, String... path) {
	if (dim.equals(field) == false) {
	throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
	}
	if (path.length != 0) {
	throw new IllegalArgumentException("path.length should be 0");
	}
	LabelAndValue[] labelValues = new LabelAndValue[counts.length];
	for(int i=0;i<counts.length;i++) {
	labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
	}
	return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
	}

	@Override
	public Number getSpecificValue(String dim, String... path) throws IOException {
	// TODO: should we impl this?
	throw new UnsupportedOperationException();
	}

	@Override
	public List<FacetResult> getAllDims(int topN) throws IOException {
	return Collections.singletonList(getTopChildren(topN, field));
	}

	@Override
	public String toString() {
	StringBuilder b = new StringBuilder();
	b.append("RangeFacetCounts totCount=");
	b.append(totCount);
	b.append(":\n");
	for(int i=0;i<ranges.length;i++) {
	b.append(" ");
	b.append(ranges[i].label);
	b.append(" -> count=");
	b.append(counts[i]);
	b.append('\n');
	}
	return b.toString();
	}
	}