lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.facet.taxonomy;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.ConjunctionDISI;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;

 /**
  * Computes facets counts, assuming the default encoding into DocValues was used.
  *
  * @lucene.experimental
  */
 public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {

   /** Create {@code FastTaxonomyFacetCounts}, which also counts all facet labels. */
   public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
       throws IOException {
     this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
   }

   /**
    * Create {@code FastTaxonomyFacetCounts}, using the specified {@code indexFieldName} for
    * ordinals. Use this if you had set {@link FacetsConfig#setIndexFieldName} to change the index
    * field name for certain dimensions.
    */
   public FastTaxonomyFacetCounts(
       String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
       throws IOException {
     super(indexFieldName, taxoReader, config, fc);
     count(fc.getMatchingDocs());
   }

   /**
    * Create {@code FastTaxonomyFacetCounts}, using the specified {@code indexFieldName} for
    * ordinals, and counting all non-deleted documents in the index. This is the same result as
    * searching on {@link MatchAllDocsQuery}, but faster
    */
   public FastTaxonomyFacetCounts(
       String indexFieldName, IndexReader reader, TaxonomyReader taxoReader, FacetsConfig config)
       throws IOException {
     super(indexFieldName, taxoReader, config, null);
     countAll(reader);
   }

   private final void count(List<MatchingDocs> matchingDocs) throws IOException {
     for (MatchingDocs hits : matchingDocs) {
       BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
       if (dv == null) { // this reader does not have DocValues for the requested category list
         continue;
       }

       DocIdSetIterator it =
           ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));

       for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
         final BytesRef bytesRef = dv.binaryValue();
         byte[] bytes = bytesRef.bytes;
         int end = bytesRef.offset + bytesRef.length;
         int ord = 0;
         int offset = bytesRef.offset;
         int prev = 0;
         while (offset < end) {
           byte b = bytes[offset++];
           if (b >= 0) {
             prev = ord = ((ord << 7) | b) + prev;
             increment(ord);
             ord = 0;
           } else {
             ord = (ord << 7) | (b & 0x7F);
           }
         }
       }
     }

     rollup();
   }

   private final void countAll(IndexReader reader) throws IOException {
     for (LeafReaderContext context : reader.leaves()) {
       BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
       if (dv == null) { // this reader does not have DocValues for the requested category list
         continue;
       }

       Bits liveDocs = context.reader().getLiveDocs();

       for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
         if (liveDocs != null && liveDocs.get(doc) == false) {
           continue;
         }
         final BytesRef bytesRef = dv.binaryValue();
         byte[] bytes = bytesRef.bytes;
         int end = bytesRef.offset + bytesRef.length;
         int ord = 0;
         int offset = bytesRef.offset;
         int prev = 0;
         while (offset < end) {
           byte b = bytes[offset++];
           if (b >= 0) {
             prev = ord = ((ord << 7) | b) + prev;
             increment(ord);
             ord = 0;
           } else {
             ord = (ord << 7) | (b & 0x7F);
           }
         }
       }
     }

     rollup();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.facet.taxonomy;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.List;
	import org.apache.lucene.facet.FacetsCollector;
	import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
	import org.apache.lucene.facet.FacetsConfig;
	import org.apache.lucene.index.BinaryDocValues;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.search.ConjunctionDISI;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.search.MatchAllDocsQuery;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;

	/**
	* Computes facets counts, assuming the default encoding into DocValues was used.
	*
	* @lucene.experimental
	*/
	public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {

	/** Create {@code FastTaxonomyFacetCounts}, which also counts all facet labels. */
	public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
	throws IOException {
	this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc);
	}

	/**
	* Create {@code FastTaxonomyFacetCounts}, using the specified {@code indexFieldName} for
	* ordinals. Use this if you had set {@link FacetsConfig#setIndexFieldName} to change the index
	* field name for certain dimensions.
	*/
	public FastTaxonomyFacetCounts(
	String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
	throws IOException {
	super(indexFieldName, taxoReader, config, fc);
	count(fc.getMatchingDocs());
	}

	/**
	* Create {@code FastTaxonomyFacetCounts}, using the specified {@code indexFieldName} for
	* ordinals, and counting all non-deleted documents in the index. This is the same result as
	* searching on {@link MatchAllDocsQuery}, but faster
	*/
	public FastTaxonomyFacetCounts(
	String indexFieldName, IndexReader reader, TaxonomyReader taxoReader, FacetsConfig config)
	throws IOException {
	super(indexFieldName, taxoReader, config, null);
	countAll(reader);
	}

	private final void count(List<MatchingDocs> matchingDocs) throws IOException {
	for (MatchingDocs hits : matchingDocs) {
	BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
	if (dv == null) { // this reader does not have DocValues for the requested category list
	continue;
	}

	DocIdSetIterator it =
	ConjunctionDISI.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));

	for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
	final BytesRef bytesRef = dv.binaryValue();
	byte[] bytes = bytesRef.bytes;
	int end = bytesRef.offset + bytesRef.length;
	int ord = 0;
	int offset = bytesRef.offset;
	int prev = 0;
	while (offset < end) {
	byte b = bytes[offset++];
	if (b >= 0) {
	prev = ord = ((ord << 7) \| b) + prev;
	increment(ord);
	ord = 0;
	} else {
	ord = (ord << 7) \| (b & 0x7F);
	}
	}
	}
	}

	rollup();
	}

	private final void countAll(IndexReader reader) throws IOException {
	for (LeafReaderContext context : reader.leaves()) {
	BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
	if (dv == null) { // this reader does not have DocValues for the requested category list
	continue;
	}

	Bits liveDocs = context.reader().getLiveDocs();

	for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
	if (liveDocs != null && liveDocs.get(doc) == false) {
	continue;
	}
	final BytesRef bytesRef = dv.binaryValue();
	byte[] bytes = bytesRef.bytes;
	int end = bytesRef.offset + bytesRef.length;
	int ord = 0;
	int offset = bytesRef.offset;
	int prev = 0;
	while (offset < end) {
	byte b = bytes[offset++];
	if (b >= 0) {
	prev = ord = ((ord << 7) \| b) + prev;
	increment(ord);
	ord = 0;
	} else {
	ord = (ord << 7) \| (b & 0x7F);
	}
	}
	}
	}

	rollup();
	}
	}