| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.facet.taxonomy; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.List; |
| |
| import org.apache.lucene.facet.FacetsCollector.MatchingDocs; |
| import org.apache.lucene.facet.FacetsCollector; |
| import org.apache.lucene.facet.FacetsConfig; |
| import org.apache.lucene.index.BinaryDocValues; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.search.ConjunctionDISI; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| |
| /** Computes facets counts, assuming the default encoding |
| * into DocValues was used. |
| * |
| * @lucene.experimental */ |
| public class FastTaxonomyFacetCounts extends IntTaxonomyFacets { |
| |
| /** Create {@code FastTaxonomyFacetCounts}, which also |
| * counts all facet labels. */ |
| public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { |
| this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc); |
| } |
| |
| /** Create {@code FastTaxonomyFacetCounts}, using the |
| * specified {@code indexFieldName} for ordinals. Use |
| * this if you had set {@link |
| * FacetsConfig#setIndexFieldName} to change the index |
| * field name for certain dimensions. */ |
| public FastTaxonomyFacetCounts(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { |
| super(indexFieldName, taxoReader, config, fc); |
| count(fc.getMatchingDocs()); |
| } |
| |
| /** Create {@code FastTaxonomyFacetCounts}, using the |
| * specified {@code indexFieldName} for ordinals, and |
| * counting all non-deleted documents in the index. This is |
| * the same result as searching on {@link MatchAllDocsQuery}, |
| * but faster */ |
| public FastTaxonomyFacetCounts(String indexFieldName, IndexReader reader, TaxonomyReader taxoReader, FacetsConfig config) throws IOException { |
| super(indexFieldName, taxoReader, config, null); |
| countAll(reader); |
| } |
| |
| private final void count(List<MatchingDocs> matchingDocs) throws IOException { |
| for(MatchingDocs hits : matchingDocs) { |
| BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName); |
| if (dv == null) { // this reader does not have DocValues for the requested category list |
| continue; |
| } |
| |
| DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList( |
| hits.bits.iterator(), dv)); |
| |
| for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { |
| final BytesRef bytesRef = dv.binaryValue(); |
| byte[] bytes = bytesRef.bytes; |
| int end = bytesRef.offset + bytesRef.length; |
| int ord = 0; |
| int offset = bytesRef.offset; |
| int prev = 0; |
| while (offset < end) { |
| byte b = bytes[offset++]; |
| if (b >= 0) { |
| prev = ord = ((ord << 7) | b) + prev; |
| increment(ord); |
| ord = 0; |
| } else { |
| ord = (ord << 7) | (b & 0x7F); |
| } |
| } |
| } |
| } |
| |
| rollup(); |
| } |
| |
| private final void countAll(IndexReader reader) throws IOException { |
| for(LeafReaderContext context : reader.leaves()) { |
| BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName); |
| if (dv == null) { // this reader does not have DocValues for the requested category list |
| continue; |
| } |
| |
| Bits liveDocs = context.reader().getLiveDocs(); |
| |
| for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) { |
| if (liveDocs != null && liveDocs.get(doc) == false) { |
| continue; |
| } |
| final BytesRef bytesRef = dv.binaryValue(); |
| byte[] bytes = bytesRef.bytes; |
| int end = bytesRef.offset + bytesRef.length; |
| int ord = 0; |
| int offset = bytesRef.offset; |
| int prev = 0; |
| while (offset < end) { |
| byte b = bytes[offset++]; |
| if (b >= 0) { |
| prev = ord = ((ord << 7) | b) + prev; |
| increment(ord); |
| ord = 0; |
| } else { |
| ord = (ord << 7) | (b & 0x7F); |
| } |
| } |
| } |
| } |
| |
| rollup(); |
| } |
| } |