blob: fa1c085dc04990fa75bac9f5eb646c76a6e2ace8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
public class FacetField extends FacetRequestSorted {
public static final int DEFAULT_FACET_LIMIT = 10;
String field;
boolean missing;
boolean allBuckets; // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
boolean numBuckets;
String prefix;
FacetMethod method;
int cacheDf; // 0 means "default", -1 means "never cache"
// experimental - force perSeg collection when using dv method, currently for testing purposes only.
Boolean perSeg;
{
// defaults for FacetRequestSorted
mincount = 1;
limit = DEFAULT_FACET_LIMIT;
}
public enum FacetMethod {
DV, // DocValues, collect into ordinal array
UIF, // UnInvertedField, collect into ordinal array
DVHASH, // DocValues, collect into hash
ENUM, // TermsEnum then intersect DocSet (stream-able)
STREAM, // presently equivalent to ENUM
SMART,
;
public static FacetMethod fromString(String method) {
if (method == null || method.length()==0) return DEFAULT_METHOD;
switch (method) {
case "dv": return DV;
case "uif": return UIF;
case "dvhash": return DVHASH;
case "enum": return ENUM;
case "stream": return STREAM; // TODO replace with enum?
case "smart": return SMART;
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetField method " + method);
}
}
static FacetMethod DEFAULT_METHOD = SMART; // non-final for tests to vary
}
@Override
@SuppressWarnings("rawtypes")
public FacetProcessor createFacetProcessor(FacetContext fcontext) {
SchemaField sf = fcontext.searcher.getSchema().getField(field);
FieldType ft = sf.getType();
boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
if (fcontext.facetInfo != null) {
// refinement... we will end up either skipping the entire facet, or doing calculating only specific facet buckets
if (multiToken && !sf.hasDocValues() && method!=FacetMethod.DV && sf.isUninvertible()) {
// Match the access method from the first phase.
// It won't always matter, but does currently for an all-values bucket
return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
}
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
}
NumberType ntype = ft.getNumberType();
// ensure we can support the requested options for numeric faceting:
if (ntype != null) {
if (prefix != null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Doesn't make sense to set facet prefix on a numeric field");
}
if (mincount == 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Numeric fields do not support facet mincount=0; try indexing as terms");
// TODO if indexed=true then we could add support
}
}
// TODO auto-pick ENUM/STREAM SOLR-9351 when index asc and DocSet cardinality is *not* much smaller than term cardinality
if (method == FacetMethod.ENUM) {// at the moment these two are the same
method = FacetMethod.STREAM;
}
if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField() &&
// streaming doesn't support allBuckets, numBuckets or missing
// so, don't use stream processor if anyone of them is enabled
!(allBuckets || numBuckets || missing) &&
// whether we can use stream processing depends on whether this is a shard request, whether
// re-sorting has been requested, and if the effective sort during collection is "index asc"
( fcontext.isShard()
// for a shard request, the effective per-shard sort must be index asc
? FacetSort.INDEX_ASC.equals(null == prelim_sort ? sort : prelim_sort)
// for a non-shard request, we can only use streaming if there is no pre-sorting
: (null == prelim_sort && FacetSort.INDEX_ASC.equals( sort ) ) ) ) {
return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
}
// TODO if method=UIF and not single-valued numerics then simply choose that now? TODO add FieldType.getDocValuesType()
if (!multiToken) {
if (mincount > 0 && prefix == null && (ntype != null || method == FacetMethod.DVHASH)) {
// TODO can we auto-pick for strings when term cardinality is much greater than DocSet cardinality?
// or if we don't know cardinality but DocSet size is very small
return new FacetFieldProcessorByHashDV(fcontext, this, sf);
} else if (ntype == null) {
// single valued string...
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Couldn't pick facet algorithm for field " + sf);
}
}
if (sf.hasDocValues() && sf.getType().isPointField()) {
return new FacetFieldProcessorByHashDV(fcontext, this, sf);
}
// multi-valued after this point
if (sf.hasDocValues() || method == FacetMethod.DV || !sf.isUninvertible()) {
// single and multi-valued string docValues
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
}
// Top-level multi-valued field cache (UIF)
return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new FacetFieldMerger(this);
}
@Override
public Map<String, Object> getFacetDescription() {
Map<String, Object> descr = new HashMap<>();
descr.put("field", field);
descr.put("limit", limit);
return descr;
}
}