solr/core/src/java/org/apache/solr/search/facet/FacetField.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.search.facet;

 import java.util.HashMap;
 import java.util.Map;

 import org.apache.solr.common.SolrException;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.NumberType;
 import org.apache.solr.schema.SchemaField;

 public class FacetField extends FacetRequestSorted {
   public static final int DEFAULT_FACET_LIMIT = 10;
   String field;
   boolean missing;
   boolean allBuckets;   // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
   boolean numBuckets;
   String prefix;
   FacetMethod method;
   int cacheDf;  // 0 means "default", -1 means "never cache"

   // experimental - force perSeg collection when using dv method, currently for testing purposes only.
   Boolean perSeg;

   {
     // defaults for FacetRequestSorted
     mincount = 1;
     limit = DEFAULT_FACET_LIMIT;
   }

   public enum FacetMethod {
     DV,  // DocValues, collect into ordinal array
     UIF, // UnInvertedField, collect into ordinal array
     DVHASH, // DocValues, collect into hash
     ENUM, // TermsEnum then intersect DocSet (stream-able)
     STREAM, // presently equivalent to ENUM
     SMART,
     ;

     public static FacetMethod fromString(String method) {
       if (method == null || method.length()==0) return DEFAULT_METHOD;
       switch (method) {
         case "dv": return DV;
         case "uif": return UIF;
         case "dvhash": return DVHASH;
         case "enum": return ENUM;
         case "stream": return STREAM; // TODO replace with enum?
         case "smart": return SMART;
         default:
           throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetField method " + method);
       }
     }

     static FacetMethod DEFAULT_METHOD = SMART; // non-final for tests to vary
   }

   @Override
   @SuppressWarnings("rawtypes")
   public FacetProcessor createFacetProcessor(FacetContext fcontext) {
     SchemaField sf = fcontext.searcher.getSchema().getField(field);
     FieldType ft = sf.getType();
     boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();

     if (fcontext.facetInfo != null) {
       // refinement... we will end up either skipping the entire facet, or doing calculating only specific facet buckets
       if (multiToken && !sf.hasDocValues() && method!=FacetMethod.DV && sf.isUninvertible()) {
         // Match the access method from the first phase.
         // It won't always matter, but does currently for an all-values bucket
         return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
       }
       return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
     }

     NumberType ntype = ft.getNumberType();
     // ensure we can support the requested options for numeric faceting:
     if (ntype != null) {
       if (prefix != null) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
             "Doesn't make sense to set facet prefix on a numeric field");
       }
       if (mincount == 0) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
             "Numeric fields do not support facet mincount=0; try indexing as terms");
         // TODO if indexed=true then we could add support
       }
     }

     // TODO auto-pick ENUM/STREAM SOLR-9351 when index asc and DocSet cardinality is *not* much smaller than term cardinality
     if (method == FacetMethod.ENUM) {// at the moment these two are the same
       method = FacetMethod.STREAM;
     }
     if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField() &&
         // streaming doesn't support allBuckets, numBuckets or missing
         // so, don't use stream processor if anyone of them is enabled
         !(allBuckets || numBuckets || missing) &&
         // whether we can use stream processing depends on whether this is a shard request, whether
         // re-sorting has been requested, and if the effective sort during collection is "index asc"
         ( fcontext.isShard()
           // for a shard request, the effective per-shard sort must be index asc
           ? FacetSort.INDEX_ASC.equals(null == prelim_sort ? sort : prelim_sort)
           // for a non-shard request, we can only use streaming if there is no pre-sorting
           : (null == prelim_sort && FacetSort.INDEX_ASC.equals( sort ) ) ) ) {

       return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
     }

     // TODO if method=UIF and not single-valued numerics then simply choose that now? TODO add FieldType.getDocValuesType()

     if (!multiToken) {
       if (mincount > 0 && prefix == null && (ntype != null || method == FacetMethod.DVHASH)) {
         // TODO can we auto-pick for strings when term cardinality is much greater than DocSet cardinality?
         //   or if we don't know cardinality but DocSet size is very small
         return new FacetFieldProcessorByHashDV(fcontext, this, sf);
       } else if (ntype == null) {
         // single valued string...
         return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
       } else {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
             "Couldn't pick facet algorithm for field " + sf);
       }
     }

     if (sf.hasDocValues() && sf.getType().isPointField()) {
       return new FacetFieldProcessorByHashDV(fcontext, this, sf);
     }

     // multi-valued after this point

     if (sf.hasDocValues() || method == FacetMethod.DV || !sf.isUninvertible()) {
       // single and multi-valued string docValues
       return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
     }

     // Top-level multi-valued field cache (UIF)
     return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
   }

   @Override
   public FacetMerger createFacetMerger(Object prototype) {
     return new FacetFieldMerger(this);
   }

   @Override
   public Map<String, Object> getFacetDescription() {
     Map<String, Object> descr = new HashMap<>();
     descr.put("field", field);
     descr.put("limit", limit);
     return descr;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.search.facet;

	import java.util.HashMap;
	import java.util.Map;

	import org.apache.solr.common.SolrException;
	import org.apache.solr.schema.FieldType;
	import org.apache.solr.schema.NumberType;
	import org.apache.solr.schema.SchemaField;

	public class FacetField extends FacetRequestSorted {
	public static final int DEFAULT_FACET_LIMIT = 10;
	String field;
	boolean missing;
	boolean allBuckets; // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
	boolean numBuckets;
	String prefix;
	FacetMethod method;
	int cacheDf; // 0 means "default", -1 means "never cache"

	// experimental - force perSeg collection when using dv method, currently for testing purposes only.
	Boolean perSeg;

	{
	// defaults for FacetRequestSorted
	mincount = 1;
	limit = DEFAULT_FACET_LIMIT;
	}

	public enum FacetMethod {
	DV, // DocValues, collect into ordinal array
	UIF, // UnInvertedField, collect into ordinal array
	DVHASH, // DocValues, collect into hash
	ENUM, // TermsEnum then intersect DocSet (stream-able)
	STREAM, // presently equivalent to ENUM
	SMART,
	;

	public static FacetMethod fromString(String method) {
	if (method == null \|\| method.length()==0) return DEFAULT_METHOD;
	switch (method) {
	case "dv": return DV;
	case "uif": return UIF;
	case "dvhash": return DVHASH;
	case "enum": return ENUM;
	case "stream": return STREAM; // TODO replace with enum?
	case "smart": return SMART;
	default:
	throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetField method " + method);
	}
	}

	static FacetMethod DEFAULT_METHOD = SMART; // non-final for tests to vary
	}

	@Override
	@SuppressWarnings("rawtypes")
	public FacetProcessor createFacetProcessor(FacetContext fcontext) {
	SchemaField sf = fcontext.searcher.getSchema().getField(field);
	FieldType ft = sf.getType();
	boolean multiToken = sf.multiValued() \|\| ft.multiValuedFieldCache();

	if (fcontext.facetInfo != null) {
	// refinement... we will end up either skipping the entire facet, or doing calculating only specific facet buckets
	if (multiToken && !sf.hasDocValues() && method!=FacetMethod.DV && sf.isUninvertible()) {
	// Match the access method from the first phase.
	// It won't always matter, but does currently for an all-values bucket
	return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
	}
	return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
	}

	NumberType ntype = ft.getNumberType();
	// ensure we can support the requested options for numeric faceting:
	if (ntype != null) {
	if (prefix != null) {
	throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
	"Doesn't make sense to set facet prefix on a numeric field");
	}
	if (mincount == 0) {
	throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
	"Numeric fields do not support facet mincount=0; try indexing as terms");
	// TODO if indexed=true then we could add support
	}
	}

	// TODO auto-pick ENUM/STREAM SOLR-9351 when index asc and DocSet cardinality is not much smaller than term cardinality
	if (method == FacetMethod.ENUM) {// at the moment these two are the same
	method = FacetMethod.STREAM;
	}
	if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField() &&
	// streaming doesn't support allBuckets, numBuckets or missing
	// so, don't use stream processor if anyone of them is enabled
	!(allBuckets \|\| numBuckets \|\| missing) &&
	// whether we can use stream processing depends on whether this is a shard request, whether
	// re-sorting has been requested, and if the effective sort during collection is "index asc"
	( fcontext.isShard()
	// for a shard request, the effective per-shard sort must be index asc
	? FacetSort.INDEX_ASC.equals(null == prelim_sort ? sort : prelim_sort)
	// for a non-shard request, we can only use streaming if there is no pre-sorting
	: (null == prelim_sort && FacetSort.INDEX_ASC.equals( sort ) ) ) ) {

	return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
	}

	// TODO if method=UIF and not single-valued numerics then simply choose that now? TODO add FieldType.getDocValuesType()

	if (!multiToken) {
	if (mincount > 0 && prefix == null && (ntype != null \|\| method == FacetMethod.DVHASH)) {
	// TODO can we auto-pick for strings when term cardinality is much greater than DocSet cardinality?
	// or if we don't know cardinality but DocSet size is very small
	return new FacetFieldProcessorByHashDV(fcontext, this, sf);
	} else if (ntype == null) {
	// single valued string...
	return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
	} else {
	throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
	"Couldn't pick facet algorithm for field " + sf);
	}
	}

	if (sf.hasDocValues() && sf.getType().isPointField()) {
	return new FacetFieldProcessorByHashDV(fcontext, this, sf);
	}

	// multi-valued after this point

	if (sf.hasDocValues() \|\| method == FacetMethod.DV \|\| !sf.isUninvertible()) {
	// single and multi-valued string docValues
	return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
	}

	// Top-level multi-valued field cache (UIF)
	return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
	}

	@Override
	public FacetMerger createFacetMerger(Object prototype) {
	return new FacetFieldMerger(this);
	}

	@Override
	public Map<String, Object> getFacetDescription() {
	Map<String, Object> descr = new HashMap<>();
	descr.put("field", field);
	descr.put("limit", limit);
	return descr;
	}
	}