lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.facet;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MultiCollector;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorable;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.SimpleCollector;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopDocsCollector;
 import org.apache.lucene.search.TopFieldCollector;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.DocIdSetBuilder;

 /** Collects hits for subsequent faceting.  Once you've run
  *  a search and collect hits into this, instantiate one of
  *  the {@link Facets} subclasses to do the facet
  *  counting.  Use the {@code search} utility methods to
  *  perform an "ordinary" search but also collect into a
  *  {@link Collector}. */
 // redundant 'implements Collector' to workaround javadocs bugs
 public class FacetsCollector extends SimpleCollector implements Collector {

   private LeafReaderContext context;
   private Scorable scorer;
   private int totalHits;
   private float[] scores;
   private final boolean keepScores;
   private final List<MatchingDocs> matchingDocs = new ArrayList<>();
   private DocIdSetBuilder docsBuilder;

   /**
    * Holds the documents that were matched in the {@link org.apache.lucene.index.LeafReaderContext}.
    * If scores were required, then {@code scores} is not null.
    */
   public final static class MatchingDocs {

     /** Context for this segment. */
     public final LeafReaderContext context;

     /** Which documents were seen. */
     public final DocIdSet bits;

     /** Non-sparse scores array. */
     public final float[] scores;

     /** Total number of hits */
     public final int totalHits;

     /** Sole constructor. */
     public MatchingDocs(LeafReaderContext context, DocIdSet bits, int totalHits, float[] scores) {
       this.context = context;
       this.bits = bits;
       this.scores = scores;
       this.totalHits = totalHits;
     }
   }

   /** Default constructor */
   public FacetsCollector() {
     this(false);
   }

   /** Create this; if {@code keepScores} is true then a
    *  float[] is allocated to hold score of all hits. */
   public FacetsCollector(boolean keepScores) {
     this.keepScores = keepScores;
   }

   /** True if scores were saved. */
   public final boolean getKeepScores() {
     return keepScores;
   }

   /**
    * Returns the documents matched by the query, one {@link MatchingDocs} per
    * visited segment.
    */
   public List<MatchingDocs> getMatchingDocs() {
     if (docsBuilder != null) {
       matchingDocs.add(new MatchingDocs(this.context, docsBuilder.build(), totalHits, scores));
       docsBuilder = null;
       scores = null;
       context = null;
     }

     return matchingDocs;
   }

   @Override
   public final void collect(int doc) throws IOException {
     docsBuilder.grow(1).add(doc);
     if (keepScores) {
       if (totalHits >= scores.length) {
         float[] newScores = new float[ArrayUtil.oversize(totalHits + 1, 4)];
         System.arraycopy(scores, 0, newScores, 0, totalHits);
         scores = newScores;
       }
       scores[totalHits] = scorer.score();
     }
     totalHits++;
   }

   @Override
   public ScoreMode scoreMode() {
     return ScoreMode.COMPLETE;
   }

   @Override
   public final void setScorer(Scorable scorer) throws IOException {
     this.scorer = scorer;
   }

   @Override
   protected void doSetNextReader(LeafReaderContext context) throws IOException {
     if (docsBuilder != null) {
       matchingDocs.add(new MatchingDocs(this.context, docsBuilder.build(), totalHits, scores));
     }
     docsBuilder = new DocIdSetBuilder(context.reader().maxDoc());
     totalHits = 0;
     if (keepScores) {
       scores = new float[64]; // some initial size
     }
     this.context = context;
   }

   /** Utility method, to search and also collect all hits
    *  into the provided {@link Collector}. */
   public static TopDocs search(IndexSearcher searcher, Query q, int n, Collector fc) throws IOException {
     return doSearch(searcher, null, q, n, null, false, fc);
   }

   /** Utility method, to search and also collect all hits
    *  into the provided {@link Collector}. */
   public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, Collector fc) throws IOException {
     if (sort == null) {
       throw new IllegalArgumentException("sort must not be null");
     }
     return (TopFieldDocs) doSearch(searcher, null, q, n, sort, false, fc);
   }

   /** Utility method, to search and also collect all hits
    *  into the provided {@link Collector}. */
   public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, boolean doDocScores, Collector fc) throws IOException {
     if (sort == null) {
       throw new IllegalArgumentException("sort must not be null");
     }
     return (TopFieldDocs) doSearch(searcher, null, q, n, sort, doDocScores, fc);
   }

   /** Utility method, to search and also collect all hits
    *  into the provided {@link Collector}. */
   public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Collector fc) throws IOException {
     return doSearch(searcher, after, q, n, null, false, fc);
   }

   /** Utility method, to search and also collect all hits
    *  into the provided {@link Collector}. */
   public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, Collector fc) throws IOException {
     if (sort == null) {
       throw new IllegalArgumentException("sort must not be null");
     }
     return doSearch(searcher, after, q, n, sort, false, fc);
   }

   /** Utility method, to search and also collect all hits
    *  into the provided {@link Collector}. */
   public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, boolean doDocScores, Collector fc) throws IOException {
     if (sort == null) {
       throw new IllegalArgumentException("sort must not be null");
     }
     return doSearch(searcher, after, q, n, sort, doDocScores, fc);
   }

   private static TopDocs doSearch(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort,
                                   boolean doDocScores, Collector fc) throws IOException {

     int limit = searcher.getIndexReader().maxDoc();
     if (limit == 0) {
       limit = 1;
     }
     n = Math.min(n, limit);

     if (after != null && after.doc >= limit) {
       throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc="
                                          + after.doc + " limit=" + limit);
     }

     TopDocs topDocs = null;
     if (n==0) {
       TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
       searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
       topDocs = new TopDocs(new TotalHits(totalHitCountCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
     } else {
       TopDocsCollector<?> hitsCollector;
       if (sort != null) {
         if (after != null && !(after instanceof FieldDoc)) {
           // TODO: if we fix type safety of TopFieldDocs we can
           // remove this
           throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
         }
         hitsCollector = TopFieldCollector.create(sort, n,
                                                  (FieldDoc) after,
                                                  Integer.MAX_VALUE); // TODO: can we disable exact hit counts
       } else {
         hitsCollector = TopScoreDocCollector.create(n, after, Integer.MAX_VALUE);
       }
       searcher.search(q, MultiCollector.wrap(hitsCollector, fc));

       topDocs = hitsCollector.topDocs();
       if (doDocScores) {
         TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, q);
       }
     }
     return topDocs;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.facet;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.search.Collector;
	import org.apache.lucene.search.DocIdSet;
	import org.apache.lucene.search.FieldDoc;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.MultiCollector;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.Scorable;
	import org.apache.lucene.search.ScoreDoc;
	import org.apache.lucene.search.ScoreMode;
	import org.apache.lucene.search.SimpleCollector;
	import org.apache.lucene.search.Sort;
	import org.apache.lucene.search.TopDocs;
	import org.apache.lucene.search.TopDocsCollector;
	import org.apache.lucene.search.TopFieldCollector;
	import org.apache.lucene.search.TopFieldDocs;
	import org.apache.lucene.search.TopScoreDocCollector;
	import org.apache.lucene.search.TotalHitCountCollector;
	import org.apache.lucene.search.TotalHits;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.DocIdSetBuilder;

	/** Collects hits for subsequent faceting. Once you've run
	* a search and collect hits into this, instantiate one of
	* the {@link Facets} subclasses to do the facet
	* counting. Use the {@code search} utility methods to
	* perform an "ordinary" search but also collect into a
	* {@link Collector}. */
	// redundant 'implements Collector' to workaround javadocs bugs
	public class FacetsCollector extends SimpleCollector implements Collector {

	private LeafReaderContext context;
	private Scorable scorer;
	private int totalHits;
	private float[] scores;
	private final boolean keepScores;
	private final List<MatchingDocs> matchingDocs = new ArrayList<>();
	private DocIdSetBuilder docsBuilder;

	/**
	* Holds the documents that were matched in the {@link org.apache.lucene.index.LeafReaderContext}.
	* If scores were required, then {@code scores} is not null.
	*/
	public final static class MatchingDocs {

	/** Context for this segment. */
	public final LeafReaderContext context;

	/** Which documents were seen. */
	public final DocIdSet bits;

	/** Non-sparse scores array. */
	public final float[] scores;

	/** Total number of hits */
	public final int totalHits;

	/** Sole constructor. */
	public MatchingDocs(LeafReaderContext context, DocIdSet bits, int totalHits, float[] scores) {
	this.context = context;
	this.bits = bits;
	this.scores = scores;
	this.totalHits = totalHits;
	}
	}

	/** Default constructor */
	public FacetsCollector() {
	this(false);
	}

	/** Create this; if {@code keepScores} is true then a
	* float[] is allocated to hold score of all hits. */
	public FacetsCollector(boolean keepScores) {
	this.keepScores = keepScores;
	}

	/** True if scores were saved. */
	public final boolean getKeepScores() {
	return keepScores;
	}

	/**
	* Returns the documents matched by the query, one {@link MatchingDocs} per
	* visited segment.
	*/
	public List<MatchingDocs> getMatchingDocs() {
	if (docsBuilder != null) {
	matchingDocs.add(new MatchingDocs(this.context, docsBuilder.build(), totalHits, scores));
	docsBuilder = null;
	scores = null;
	context = null;
	}

	return matchingDocs;
	}

	@Override
	public final void collect(int doc) throws IOException {
	docsBuilder.grow(1).add(doc);
	if (keepScores) {
	if (totalHits >= scores.length) {
	float[] newScores = new float[ArrayUtil.oversize(totalHits + 1, 4)];
	System.arraycopy(scores, 0, newScores, 0, totalHits);
	scores = newScores;
	}
	scores[totalHits] = scorer.score();
	}
	totalHits++;
	}

	@Override
	public ScoreMode scoreMode() {
	return ScoreMode.COMPLETE;
	}

	@Override
	public final void setScorer(Scorable scorer) throws IOException {
	this.scorer = scorer;
	}

	@Override
	protected void doSetNextReader(LeafReaderContext context) throws IOException {
	if (docsBuilder != null) {
	matchingDocs.add(new MatchingDocs(this.context, docsBuilder.build(), totalHits, scores));
	}
	docsBuilder = new DocIdSetBuilder(context.reader().maxDoc());
	totalHits = 0;
	if (keepScores) {
	scores = new float[64]; // some initial size
	}
	this.context = context;
	}

	/** Utility method, to search and also collect all hits
	* into the provided {@link Collector}. */
	public static TopDocs search(IndexSearcher searcher, Query q, int n, Collector fc) throws IOException {
	return doSearch(searcher, null, q, n, null, false, fc);
	}

	/** Utility method, to search and also collect all hits
	* into the provided {@link Collector}. */
	public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, Collector fc) throws IOException {
	if (sort == null) {
	throw new IllegalArgumentException("sort must not be null");
	}
	return (TopFieldDocs) doSearch(searcher, null, q, n, sort, false, fc);
	}

	/** Utility method, to search and also collect all hits
	* into the provided {@link Collector}. */
	public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, boolean doDocScores, Collector fc) throws IOException {
	if (sort == null) {
	throw new IllegalArgumentException("sort must not be null");
	}
	return (TopFieldDocs) doSearch(searcher, null, q, n, sort, doDocScores, fc);
	}

	/** Utility method, to search and also collect all hits
	* into the provided {@link Collector}. */
	public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Collector fc) throws IOException {
	return doSearch(searcher, after, q, n, null, false, fc);
	}

	/** Utility method, to search and also collect all hits
	* into the provided {@link Collector}. */
	public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, Collector fc) throws IOException {
	if (sort == null) {
	throw new IllegalArgumentException("sort must not be null");
	}
	return doSearch(searcher, after, q, n, sort, false, fc);
	}

	/** Utility method, to search and also collect all hits
	* into the provided {@link Collector}. */
	public static TopDocs searchAfter(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, boolean doDocScores, Collector fc) throws IOException {
	if (sort == null) {
	throw new IllegalArgumentException("sort must not be null");
	}
	return doSearch(searcher, after, q, n, sort, doDocScores, fc);
	}

	private static TopDocs doSearch(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort,
	boolean doDocScores, Collector fc) throws IOException {

	int limit = searcher.getIndexReader().maxDoc();
	if (limit == 0) {
	limit = 1;
	}
	n = Math.min(n, limit);

	if (after != null && after.doc >= limit) {
	throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc="
	+ after.doc + " limit=" + limit);
	}

	TopDocs topDocs = null;
	if (n==0) {
	TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
	searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
	topDocs = new TopDocs(new TotalHits(totalHitCountCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
	} else {
	TopDocsCollector<?> hitsCollector;
	if (sort != null) {
	if (after != null && !(after instanceof FieldDoc)) {
	// TODO: if we fix type safety of TopFieldDocs we can
	// remove this
	throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
	}
	hitsCollector = TopFieldCollector.create(sort, n,
	(FieldDoc) after,
	Integer.MAX_VALUE); // TODO: can we disable exact hit counts
	} else {
	hitsCollector = TopScoreDocCollector.create(n, after, Integer.MAX_VALUE);
	}
	searcher.search(q, MultiCollector.wrap(hitsCollector, fc));

	topDocs = hitsCollector.topDocs();
	if (doDocScores) {
	TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, q);
	}
	}
	return topDocs;
	}
	}