solr/core/src/java/org/apache/solr/request/DocValuesStats.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.request;

 import java.io.IOException;
 import java.util.List;
 import java.util.Map;

 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.OrdinalMap;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongValues;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.handler.component.FieldFacetStats;
 import org.apache.solr.handler.component.StatsField;
 import org.apache.solr.handler.component.StatsValues;
 import org.apache.solr.handler.component.StatsValuesFactory;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.Filter;
 import org.apache.solr.search.SolrIndexSearcher;

 /**
  * Computes term stats for docvalues field (single or multivalued).
  * <p>
  * Instead of working on a top-level reader view (binary-search per docid),
  * it collects per-segment, but maps ordinals to global ordinal space using
  * MultiDocValues' OrdinalMap.
  */
 public class DocValuesStats {
   private DocValuesStats() {}

   public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {

     final SchemaField schemaField = statsField.getSchemaField();

     assert null != statsField.getSchemaField()
       : "DocValuesStats requires a StatsField using a SchemaField";

     final String fieldName = schemaField.getName();
     final FieldType ft = schemaField.getType();
     final StatsValues res = StatsValuesFactory.createStatsValues(statsField);

     //Initialize facetstats, if facets have been passed in
     final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
     int upto = 0;

     for (String facetField : facet) {
       SchemaField fsf = searcher.getSchema().getField(facetField);
       if ( fsf.multiValued()) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
           "Stats can only facet on single-valued fields, not: " + facetField );
       }

       SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
       facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
     }
     // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
     final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

     SortedSetDocValues si; // for term lookups only
     OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
     if (multiValued) {
       si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);

       if (si instanceof MultiSortedSetDocValues) {
         ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
       }
     } else {
       SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
       si = single == null ? null : DocValues.singleton(single);
       if (single instanceof MultiDocValues.MultiSortedDocValues) {
         ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
       }
     }
     if (si == null) {
       si = DocValues.emptySortedSet();
     }
     if (si.getValueCount() >= Integer.MAX_VALUE) {
       throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
     }

     int missingDocCountTotal = 0;
     final int nTerms = (int) si.getValueCount();
     // count collection array only needs to be as big as the number of terms we are
     // going to collect counts for.
     final int[] counts = new int[nTerms];

     Filter filter = docs.getTopFilter();
     List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();

     for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
       LeafReaderContext leaf = leaves.get(subIndex);
       DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
       DocIdSetIterator disi = null;

       if (dis != null) {
         disi = dis.iterator();
       }
       if (disi != null) {
         int docBase = leaf.docBase;

         if (multiValued) {
           SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
           if (sub == null) {
             sub = DocValues.emptySortedSet();
           }
           SortedDocValues singleton = DocValues.unwrapSingleton(sub);
           if (singleton != null) {
             // some codecs may optimize SORTED_SET storage for single-valued fields
             missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
           } else {
             missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
           }
         } else {
           SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
           if (sub == null) {
             sub = DocValues.emptySorted();
           }
           missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
         }
       }
     }
     // add results in index order
     for (int ord = 0; ord < counts.length; ord++) {
       int count = counts[ord];

       if (count > 0) {
         final BytesRef value = si.lookupOrd(ord);
         res.accumulate(value, count);
         for (FieldFacetStats f : facetStats) {
           f.accumulateTermNum(ord, value);
         }
       }
     }
     res.addMissing(missingDocCountTotal);

     if (facetStats.length > 0) {
       for (FieldFacetStats f : facetStats) {
         Map<String,StatsValues> facetStatsValues = f.facetStatsValues;
         f.accumulateMissing();
         res.addFacet(f.name, facetStatsValues);
       }
     }

     return res;
   }

   /** accumulates per-segment single-valued stats */
   static int accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
     final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
     int missingDocCount = 0;
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       if (doc > si.docID()) {
         si.advance(doc);
       }
       if (doc == si.docID()) {
         int term = si.ordValue();
         if (map != null) {
           term = (int) ordMap.get(term);
         }
         counts[term]++;
         for (FieldFacetStats f : facetStats) {
           f.facetTermNum(docBase + doc, term);
         }
       }else{
         for (FieldFacetStats f : facetStats) {
           f.facetMissingNum(docBase + doc);
         }

         missingDocCount++;
       }
     }
     return missingDocCount;
   }

   /** accumulates per-segment multi-valued stats */

   static int accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
     final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
     int missingDocCount = 0;
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       if (doc > si.docID()) {
         si.advance(doc);
       }
       if (doc == si.docID()) {
         long ord;
         while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
           int term = (int) ord;
           if (map != null) {
             term = (int) ordMap.get(term);
           }
           counts[term]++;
           for (FieldFacetStats f : facetStats) {
             f.facetTermNum(docBase + doc, term);
           }
         }
       } else {
         for (FieldFacetStats f : facetStats) {
           f.facetMissingNum(docBase + doc);
         }

         missingDocCount++;
       }
     }

     return missingDocCount;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.request;

	import java.io.IOException;
	import java.util.List;
	import java.util.Map;

	import org.apache.lucene.index.DocValues;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
	import org.apache.lucene.index.MultiDocValues;
	import org.apache.lucene.index.OrdinalMap;
	import org.apache.lucene.index.SortedDocValues;
	import org.apache.lucene.index.SortedSetDocValues;
	import org.apache.lucene.search.DocIdSet;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.LongValues;
	import org.apache.solr.common.SolrException;
	import org.apache.solr.handler.component.FieldFacetStats;
	import org.apache.solr.handler.component.StatsField;
	import org.apache.solr.handler.component.StatsValues;
	import org.apache.solr.handler.component.StatsValuesFactory;
	import org.apache.solr.schema.FieldType;
	import org.apache.solr.schema.SchemaField;
	import org.apache.solr.search.DocSet;
	import org.apache.solr.search.Filter;
	import org.apache.solr.search.SolrIndexSearcher;

	/**
	* Computes term stats for docvalues field (single or multivalued).
	* <p>
	* Instead of working on a top-level reader view (binary-search per docid),
	* it collects per-segment, but maps ordinals to global ordinal space using
	* MultiDocValues' OrdinalMap.
	*/
	public class DocValuesStats {
	private DocValuesStats() {}

	public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {

	final SchemaField schemaField = statsField.getSchemaField();

	assert null != statsField.getSchemaField()
	: "DocValuesStats requires a StatsField using a SchemaField";

	final String fieldName = schemaField.getName();
	final FieldType ft = schemaField.getType();
	final StatsValues res = StatsValuesFactory.createStatsValues(statsField);

	//Initialize facetstats, if facets have been passed in
	final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
	int upto = 0;

	for (String facetField : facet) {
	SchemaField fsf = searcher.getSchema().getField(facetField);
	if ( fsf.multiValued()) {
	throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
	"Stats can only facet on single-valued fields, not: " + facetField );
	}

	SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
	facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
	}
	// TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
	final boolean multiValued = schemaField.multiValued() \|\| ft.multiValuedFieldCache();

	SortedSetDocValues si; // for term lookups only
	OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
	if (multiValued) {
	si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);

	if (si instanceof MultiSortedSetDocValues) {
	ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
	}
	} else {
	SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
	si = single == null ? null : DocValues.singleton(single);
	if (single instanceof MultiDocValues.MultiSortedDocValues) {
	ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
	}
	}
	if (si == null) {
	si = DocValues.emptySortedSet();
	}
	if (si.getValueCount() >= Integer.MAX_VALUE) {
	throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
	}

	int missingDocCountTotal = 0;
	final int nTerms = (int) si.getValueCount();
	// count collection array only needs to be as big as the number of terms we are
	// going to collect counts for.
	final int[] counts = new int[nTerms];

	Filter filter = docs.getTopFilter();
	List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();

	for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
	LeafReaderContext leaf = leaves.get(subIndex);
	DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
	DocIdSetIterator disi = null;

	if (dis != null) {
	disi = dis.iterator();
	}
	if (disi != null) {
	int docBase = leaf.docBase;

	if (multiValued) {
	SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
	if (sub == null) {
	sub = DocValues.emptySortedSet();
	}
	SortedDocValues singleton = DocValues.unwrapSingleton(sub);
	if (singleton != null) {
	// some codecs may optimize SORTED_SET storage for single-valued fields
	missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
	} else {
	missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
	}
	} else {
	SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
	if (sub == null) {
	sub = DocValues.emptySorted();
	}
	missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
	}
	}
	}
	// add results in index order
	for (int ord = 0; ord < counts.length; ord++) {
	int count = counts[ord];

	if (count > 0) {
	final BytesRef value = si.lookupOrd(ord);
	res.accumulate(value, count);
	for (FieldFacetStats f : facetStats) {
	f.accumulateTermNum(ord, value);
	}
	}
	}
	res.addMissing(missingDocCountTotal);

	if (facetStats.length > 0) {
	for (FieldFacetStats f : facetStats) {
	Map<String,StatsValues> facetStatsValues = f.facetStatsValues;
	f.accumulateMissing();
	res.addFacet(f.name, facetStatsValues);
	}
	}

	return res;
	}

	/** accumulates per-segment single-valued stats */
	static int accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
	final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
	int missingDocCount = 0;
	int doc;
	while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
	if (doc > si.docID()) {
	si.advance(doc);
	}
	if (doc == si.docID()) {
	int term = si.ordValue();
	if (map != null) {
	term = (int) ordMap.get(term);
	}
	counts[term]++;
	for (FieldFacetStats f : facetStats) {
	f.facetTermNum(docBase + doc, term);
	}
	}else{
	for (FieldFacetStats f : facetStats) {
	f.facetMissingNum(docBase + doc);
	}

	missingDocCount++;
	}
	}
	return missingDocCount;
	}

	/** accumulates per-segment multi-valued stats */

	static int accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
	final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
	int missingDocCount = 0;
	int doc;
	while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
	if (doc > si.docID()) {
	si.advance(doc);
	}
	if (doc == si.docID()) {
	long ord;
	while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
	int term = (int) ord;
	if (map != null) {
	term = (int) ordMap.get(term);
	}
	counts[term]++;
	for (FieldFacetStats f : facetStats) {
	f.facetTermNum(docBase + doc, term);
	}
	}
	} else {
	for (FieldFacetStats f : facetStats) {
	f.facetMissingNum(docBase + doc);
	}

	missingDocCount++;
	}
	}

	return missingDocCount;
	}
	}