blob: ecce09f08032d76740f2835a0e8be0a647362c70 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.PointField;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.facet.FacetDebugInfo;
import org.apache.solr.util.RTimer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Computes facets -- aggregations with counts of terms or ranges over the whole search results.
*
* @since solr 1.3
*/
@SuppressWarnings("rawtypes")
public class FacetComponent extends SearchComponent {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String COMPONENT_NAME = "facet";
public static final String FACET_QUERY_KEY = "facet_queries";
public static final String FACET_FIELD_KEY = "facet_fields";
public static final String FACET_RANGES_KEY = "facet_ranges";
public static final String FACET_INTERVALS_KEY = "facet_intervals";
private static final String PIVOT_KEY = "facet_pivot";
private static final String PIVOT_REFINE_PREFIX = "{!"+PivotFacet.REFINE_PARAM+"=";
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (rb.req.getParams().getBool(FacetParams.FACET, false)) {
rb.setNeedDocSet(true);
rb.doFacets = true;
// Deduplicate facet params
ModifiableSolrParams params = new ModifiableSolrParams();
SolrParams origParams = rb.req.getParams();
Iterator<String> iter = origParams.getParameterNamesIterator();
while (iter.hasNext()) {
String paramName = iter.next();
// Deduplicate the list with LinkedHashSet, but _only_ for facet params.
if (!paramName.startsWith(FacetParams.FACET)) {
params.add(paramName, origParams.getParams(paramName));
continue;
}
HashSet<String> deDupe = new LinkedHashSet<>(Arrays.asList(origParams.getParams(paramName)));
params.add(paramName, deDupe.toArray(new String[deDupe.size()]));
}
rb.req.setParams(params);
// Initialize context
FacetContext.initContext(rb);
}
}
/* Custom facet components can return a custom SimpleFacets object */
protected SimpleFacets newSimpleFacets(SolrQueryRequest req, DocSet docSet, SolrParams params, ResponseBuilder rb) {
return new SimpleFacets(req, docSet, params, rb);
}
/**
* Encapsulates facet ranges and facet queries such that their parameters
* are parsed and cached for efficient re-use.
* <p>
* An instance of this class is initialized and kept in the request context via the static
* method {@link org.apache.solr.handler.component.FacetComponent.FacetContext#initContext(ResponseBuilder)} and
* can be retrieved via {@link org.apache.solr.handler.component.FacetComponent.FacetContext#getFacetContext(SolrQueryRequest)}
* <p>
* This class is used exclusively in a single-node context (i.e. non distributed requests or an individual shard
* request). Also see {@link org.apache.solr.handler.component.FacetComponent.FacetInfo} which is
* dedicated exclusively for merging responses from multiple shards and plays no role during computation of facet
* counts in a single node request.
*
* <b>This API is experimental and subject to change</b>
*
* @see org.apache.solr.handler.component.FacetComponent.FacetInfo
*/
public static class FacetContext {
private static final String FACET_CONTEXT_KEY = "_facet.context";
private final List<RangeFacetRequest> allRangeFacets; // init in constructor
private final List<FacetBase> allQueryFacets; // init in constructor
private final Map<String, List<RangeFacetRequest>> taggedRangeFacets;
private final Map<String, List<FacetBase>> taggedQueryFacets;
/**
* Initializes FacetContext using request parameters and saves it in the request
* context which can be retrieved via {@link #getFacetContext(SolrQueryRequest)}
*
* @param rb the ResponseBuilder object from which the request parameters are read
* and to which the FacetContext object is saved.
*/
public static void initContext(ResponseBuilder rb) {
// Parse facet queries and ranges and put them in the request
// context so that they can be hung under pivots if needed without re-parsing
List<RangeFacetRequest> facetRanges = null;
List<FacetBase> facetQueries = null;
String[] ranges = rb.req.getParams().getParams(FacetParams.FACET_RANGE);
if (ranges != null) {
facetRanges = new ArrayList<>(ranges.length);
for (String range : ranges) {
RangeFacetRequest rangeFacetRequest = new RangeFacetRequest(rb, range);
facetRanges.add(rangeFacetRequest);
}
}
String[] queries = rb.req.getParams().getParams(FacetParams.FACET_QUERY);
if (queries != null) {
facetQueries = new ArrayList<>();
for (String query : queries) {
facetQueries.add(new FacetBase(rb, FacetParams.FACET_QUERY, query));
}
}
rb.req.getContext().put(FACET_CONTEXT_KEY, new FacetContext(facetRanges, facetQueries));
}
private FacetContext(List<RangeFacetRequest> allRangeFacets, List<FacetBase> allQueryFacets) {
// avoid NPEs, set to empty list if parameters are null
this.allRangeFacets = allRangeFacets == null ? Collections.emptyList() : allRangeFacets;
this.allQueryFacets = allQueryFacets == null ? Collections.emptyList() : allQueryFacets;
taggedRangeFacets = new HashMap<>();
for (RangeFacetRequest rf : this.allRangeFacets) {
for (String tag : rf.getTags()) {
List<RangeFacetRequest> list = taggedRangeFacets.get(tag);
if (list == null) {
list = new ArrayList<>(1); // typically just one object
taggedRangeFacets.put(tag, list);
}
list.add(rf);
}
}
taggedQueryFacets = new HashMap<>();
for (FacetBase qf : this.allQueryFacets) {
for (String tag : qf.getTags()) {
List<FacetBase> list = taggedQueryFacets.get(tag);
if (list == null) {
list = new ArrayList<>(1);
taggedQueryFacets.put(tag, list);
}
list.add(qf);
}
}
}
/**
* Return the {@link org.apache.solr.handler.component.FacetComponent.FacetContext} instance
* cached in the request context.
*
* @param req the {@link SolrQueryRequest}
* @return the cached FacetContext instance
* @throws IllegalStateException if no cached FacetContext instance is found in the request context
*/
public static FacetContext getFacetContext(SolrQueryRequest req) throws IllegalStateException {
FacetContext result = (FacetContext) req.getContext().get(FACET_CONTEXT_KEY);
if (null == result) {
throw new IllegalStateException("FacetContext can't be accessed before it's initialized in request context");
}
return result;
}
/**
* @return a {@link List} of {@link RangeFacetRequest} objects each representing a facet.range to be
* computed. Returns an empty list if no facet.range were requested.
*/
public List<RangeFacetRequest> getAllRangeFacetRequests() {
return allRangeFacets;
}
/**
* @return a {@link List} of {@link org.apache.solr.handler.component.FacetComponent.FacetBase} objects
* each representing a facet.query to be computed. Returns an empty list of no facet.query were requested.
*/
public List<FacetBase> getAllQueryFacets() {
return allQueryFacets;
}
/**
* @param tag a String tag usually specified via local param on a facet.pivot
* @return a list of {@link RangeFacetRequest} objects which have been tagged with the given tag.
* Returns an empty list if none found.
*/
public List<RangeFacetRequest> getRangeFacetRequestsForTag(String tag) {
List<RangeFacetRequest> list = taggedRangeFacets.get(tag);
return list == null ? Collections.emptyList() : list;
}
/**
* @param tag a String tag usually specified via local param on a facet.pivot
* @return a list of {@link org.apache.solr.handler.component.FacetComponent.FacetBase} objects which have been
* tagged with the given tag. Returns and empty List if none found.
*/
public List<FacetBase> getQueryFacetsForTag(String tag) {
List<FacetBase> list = taggedQueryFacets.get(tag);
return list == null ? Collections.emptyList() : list;
}
}
/**
* Actually run the query
*/
@Override
public void process(ResponseBuilder rb) throws IOException {
if (rb.doFacets) {
SolrParams params = rb.req.getParams();
SimpleFacets f = newSimpleFacets(rb.req, rb.getResults().docSet, params, rb);
RTimer timer = null;
FacetDebugInfo fdebug = null;
if (rb.isDebug()) {
fdebug = new FacetDebugInfo();
rb.req.getContext().put("FacetDebugInfo-nonJson", fdebug);
timer = new RTimer();
}
NamedList<Object> counts = FacetComponent.getFacetCounts(f, fdebug);
String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
if (!ArrayUtils.isEmpty(pivots)) {
PivotFacetProcessor pivotProcessor
= new PivotFacetProcessor(rb.req, rb.getResults().docSet, params, rb);
SimpleOrderedMap<List<NamedList<Object>>> v
= pivotProcessor.process(pivots);
if (v != null) {
counts.add(PIVOT_KEY, v);
}
}
if (fdebug != null) {
long timeElapsed = (long) timer.getTime();
fdebug.setElapse(timeElapsed);
}
rb.rsp.add("facet_counts", counts);
}
}
public static NamedList<Object> getFacetCounts(SimpleFacets simpleFacets) {
return getFacetCounts(simpleFacets, null);
}
/**
* Looks at various Params to determining if any simple Facet Constraint count
* computations are desired.
*
* @see SimpleFacets#getFacetQueryCounts
* @see SimpleFacets#getFacetFieldCounts
* @see RangeFacetProcessor#getFacetRangeCounts
* @see RangeFacetProcessor#getFacetIntervalCounts
* @see FacetParams#FACET
* @return a NamedList of Facet Count info or null
*/
public static NamedList<Object> getFacetCounts(SimpleFacets simpleFacets, FacetDebugInfo fdebug) {
// if someone called this method, benefit of the doubt: assume true
if (!simpleFacets.getGlobalParams().getBool(FacetParams.FACET, true))
return null;
RangeFacetProcessor rangeFacetProcessor = new RangeFacetProcessor(simpleFacets.getRequest(), simpleFacets.getDocsOrig(), simpleFacets.getGlobalParams(), simpleFacets.getResponseBuilder());
NamedList<Object> counts = new SimpleOrderedMap<>();
try {
counts.add(FACET_QUERY_KEY, simpleFacets.getFacetQueryCounts());
if (fdebug != null) {
FacetDebugInfo fd = new FacetDebugInfo();
fd.putInfoItem("action", "field facet");
fd.setProcessor(simpleFacets.getClass().getSimpleName());
fdebug.addChild(fd);
simpleFacets.setFacetDebugInfo(fd);
final RTimer timer = new RTimer();
counts.add(FACET_FIELD_KEY, simpleFacets.getFacetFieldCounts());
long timeElapsed = (long) timer.getTime();
fd.setElapse(timeElapsed);
} else {
counts.add(FACET_FIELD_KEY, simpleFacets.getFacetFieldCounts());
}
counts.add(FACET_RANGES_KEY, rangeFacetProcessor.getFacetRangeCounts());
counts.add(FACET_INTERVALS_KEY, simpleFacets.getFacetIntervalCounts());
counts.add(SpatialHeatmapFacets.RESPONSE_KEY, simpleFacets.getHeatmapCounts());
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
} catch (SyntaxError e) {
throw new SolrException(ErrorCode.BAD_REQUEST, e);
}
return counts;
}
private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$";
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (!rb.doFacets) {
return ResponseBuilder.STAGE_DONE;
}
if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) {
return ResponseBuilder.STAGE_DONE;
}
// Overlap facet refinement requests (those shards that we need a count
// for particular facet values from), where possible, with
// the requests to get fields (because we know that is the
// only other required phase).
// We do this in distributedProcess so we can look at all of the
// requests in the outgoing queue at once.
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
List<String> distribFieldFacetRefinements = null;
// FieldFacetAdditions
for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
if (!dff.needRefinements) continue;
List<String> refList = dff._toRefine[shardNum];
if (refList == null || refList.size() == 0) continue;
String key = dff.getKey(); // reuse the same key that was used for the
// main facet
String termsKey = key + "__terms";
String termsVal = StrUtils.join(refList, ',');
String facetCommand;
// add terms into the original facet.field command
// do it via parameter reference to avoid another layer of encoding.
String termsKeyEncoded = ClientUtils.encodeLocalParamVal(termsKey);
if (dff.localParams != null) {
facetCommand = commandPrefix + termsKeyEncoded + " "
+ dff.facetStr.substring(2);
} else {
facetCommand = commandPrefix + termsKeyEncoded + '}' + dff.field;
}
if (distribFieldFacetRefinements == null) {
distribFieldFacetRefinements = new ArrayList<>();
}
distribFieldFacetRefinements.add(facetCommand);
distribFieldFacetRefinements.add(termsKey);
distribFieldFacetRefinements.add(termsVal);
}
if (distribFieldFacetRefinements != null) {
String shard = rb.shards[shardNum];
ShardRequest shardsRefineRequest = null;
boolean newRequest = false;
// try to find a request that is already going out to that shard.
// If nshards becomes too great, we may want to move to hashing for
// better scalability.
for (ShardRequest sreq : rb.outgoing) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0
&& sreq.shards != null
&& sreq.shards.length == 1
&& sreq.shards[0].equals(shard)) {
shardsRefineRequest = sreq;
break;
}
}
if (shardsRefineRequest == null) {
// we didn't find any other suitable requests going out to that shard,
// so create one ourselves.
newRequest = true;
shardsRefineRequest = new ShardRequest();
shardsRefineRequest.shards = new String[] { rb.shards[shardNum] };
shardsRefineRequest.params = new ModifiableSolrParams(rb.req.getParams());
// don't request any documents
shardsRefineRequest.params.remove(CommonParams.START);
shardsRefineRequest.params.set(CommonParams.ROWS, "0");
}
shardsRefineRequest.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
shardsRefineRequest.params.set(FacetParams.FACET, "true");
removeMainFacetTypeParams(shardsRefineRequest);
for (int i = 0; i < distribFieldFacetRefinements.size();) {
String facetCommand = distribFieldFacetRefinements.get(i++);
String termsKey = distribFieldFacetRefinements.get(i++);
String termsVal = distribFieldFacetRefinements.get(i++);
shardsRefineRequest.params.add(FacetParams.FACET_FIELD,
facetCommand);
shardsRefineRequest.params.set(termsKey, termsVal);
}
if (newRequest) {
rb.addRequest(this, shardsRefineRequest);
}
}
// PivotFacetAdditions
if (doAnyPivotFacetRefinementRequestsExistForShard(rb._facetInfo, shardNum)) {
enqueuePivotFacetShardRequests(rb, shardNum);
}
} // for shardNum
return ResponseBuilder.STAGE_DONE;
}
public static String[] FACET_TYPE_PARAMS = {
FacetParams.FACET_FIELD, FacetParams.FACET_PIVOT, FacetParams.FACET_QUERY, FacetParams.FACET_DATE,
FacetParams.FACET_RANGE, FacetParams.FACET_INTERVAL, FacetParams.FACET_HEATMAP
};
private void removeMainFacetTypeParams(ShardRequest shardsRefineRequest) {
for (String param : FACET_TYPE_PARAMS) {
shardsRefineRequest.params.remove(param);
}
}
private void enqueuePivotFacetShardRequests(ResponseBuilder rb, int shardNum) {
FacetInfo fi = rb._facetInfo;
ShardRequest shardsRefineRequestPivot = new ShardRequest();
shardsRefineRequestPivot.shards = new String[] {rb.shards[shardNum]};
shardsRefineRequestPivot.params = new ModifiableSolrParams(rb.req.getParams());
// don't request any documents
shardsRefineRequestPivot.params.remove(CommonParams.START);
shardsRefineRequestPivot.params.set(CommonParams.ROWS, "0");
shardsRefineRequestPivot.purpose |= ShardRequest.PURPOSE_REFINE_PIVOT_FACETS;
shardsRefineRequestPivot.params.set(FacetParams.FACET, "true");
removeMainFacetTypeParams(shardsRefineRequestPivot);
shardsRefineRequestPivot.params.set(FacetParams.FACET_PIVOT_MINCOUNT, -1);
shardsRefineRequestPivot.params.remove(FacetParams.FACET_OFFSET);
for (int pivotIndex = 0; pivotIndex < fi.pivotFacets.size(); pivotIndex++) {
String pivotFacetKey = fi.pivotFacets.getName(pivotIndex);
PivotFacet pivotFacet = fi.pivotFacets.getVal(pivotIndex);
List<PivotFacetValue> queuedRefinementsForShard =
pivotFacet.getQueuedRefinements(shardNum);
if ( ! queuedRefinementsForShard.isEmpty() ) {
String fieldsKey = PivotFacet.REFINE_PARAM + fi.pivotRefinementCounter;
String command;
if (pivotFacet.localParams != null) {
command = PIVOT_REFINE_PREFIX + fi.pivotRefinementCounter + " "
+ pivotFacet.facetStr.substring(2);
} else {
command = PIVOT_REFINE_PREFIX + fi.pivotRefinementCounter + "}"
+ pivotFacet.getKey();
}
shardsRefineRequestPivot.params.add(FacetParams.FACET_PIVOT, command);
for (PivotFacetValue refinementValue : queuedRefinementsForShard) {
String refinementStr = PivotFacetHelper
.encodeRefinementValuePath(refinementValue.getValuePath());
shardsRefineRequestPivot.params.add(fieldsKey, refinementStr);
}
}
fi.pivotRefinementCounter++;
}
rb.addRequest(this, shardsRefineRequestPivot);
}
public void modifyRequest(ResponseBuilder rb, SearchComponent who,ShardRequest sreq) {
if (!rb.doFacets) return;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS;
FacetInfo fi = rb._facetInfo;
if (fi == null) {
rb._facetInfo = fi = new FacetInfo();
fi.parse(rb.req.getParams(), rb);
}
modifyRequestForFieldFacets(rb, sreq, fi);
modifyRequestForRangeFacets(sreq);
modifyRequestForPivotFacets(rb, sreq, fi.pivotFacets);
SpatialHeatmapFacets.distribModifyRequest(sreq, fi.heatmapFacets);
sreq.params.remove(FacetParams.FACET_MINCOUNT);
sreq.params.remove(FacetParams.FACET_OFFSET);
} else {
// turn off faceting on other requests
sreq.params.set(FacetParams.FACET, "false");
// we could optionally remove faceting params
}
}
// we must get all the range buckets back in order to have coherent lists at the end, see SOLR-6154
private void modifyRequestForRangeFacets(ShardRequest sreq) {
// Collect all the range fields.
final String[] fields = sreq.params.getParams(FacetParams.FACET_RANGE);
if (fields != null) {
for (String field : fields) {
sreq.params.set("f." + field + ".facet.mincount", "0");
}
}
}
private void modifyRequestForFieldFacets(ResponseBuilder rb, ShardRequest sreq, FacetInfo fi) {
for (DistribFieldFacet dff : fi.facets.values()) {
String paramStart = "f." + dff.field + '.';
sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
dff.initialLimit = dff.limit <= 0 ? dff.limit : dff.offset + dff.limit;
if (dff.sort.equals(FacetParams.FACET_SORT_COUNT)) {
if (dff.limit > 0) {
// set the initial limit higher to increase accuracy
dff.initialLimit = doOverRequestMath(dff.initialLimit, dff.overrequestRatio,
dff.overrequestCount);
}
dff.initialMincount = Math.min(dff.minCount, 1);
} else {
// we're sorting by index order.
// if minCount==0, we should always be able to get accurate results w/o
// over-requesting or refining
// if minCount==1, we should be able to get accurate results w/o
// over-requesting, but we'll need to refine
// if minCount==n (>1), we can set the initialMincount to
// minCount/nShards, rounded up.
// For example, we know that if minCount=10 and we have 3 shards, then
// at least one shard must have a count of 4 for the term
// For the minCount>1 case, we can generate too short of a list (miss
// terms at the end of the list) unless limit==-1
// For example: each shard could produce a list of top 10, but some of
// those could fail to make it into the combined list (i.e.
// we needed to go beyond the top 10 to generate the top 10 combined).
// Overrequesting can help a little here, but not as
// much as when sorting by count.
if (dff.minCount <= 1) {
dff.initialMincount = dff.minCount;
} else {
dff.initialMincount = (int) Math.ceil((double) dff.minCount / rb.slices.length);
}
}
// Currently this is for testing only and allows overriding of the
// facet.limit set to the shards
dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit);
sreq.params.set(paramStart + FacetParams.FACET_MINCOUNT, dff.initialMincount);
}
}
private void modifyRequestForPivotFacets(ResponseBuilder rb,
ShardRequest sreq,
SimpleOrderedMap<PivotFacet> pivotFacets) {
for (Entry<String,PivotFacet> pfwEntry : pivotFacets) {
PivotFacet pivot = pfwEntry.getValue();
for (String pivotField : StrUtils.splitSmart(pivot.getKey(), ',')) {
modifyRequestForIndividualPivotFacets(rb, sreq, pivotField);
}
}
}
private void modifyRequestForIndividualPivotFacets(ResponseBuilder rb, ShardRequest sreq,
String fieldToOverRequest) {
final SolrParams originalParams = rb.req.getParams();
final String paramStart = "f." + fieldToOverRequest + ".";
final int requestedLimit = originalParams.getFieldInt(fieldToOverRequest,
FacetParams.FACET_LIMIT, 100);
sreq.params.remove(paramStart + FacetParams.FACET_LIMIT);
final int offset = originalParams.getFieldInt(fieldToOverRequest,
FacetParams.FACET_OFFSET, 0);
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
final double overRequestRatio = originalParams.getFieldDouble
(fieldToOverRequest, FacetParams.FACET_OVERREQUEST_RATIO, 1.5);
sreq.params.remove(paramStart + FacetParams.FACET_OVERREQUEST_RATIO);
final int overRequestCount = originalParams.getFieldInt
(fieldToOverRequest, FacetParams.FACET_OVERREQUEST_COUNT, 10);
sreq.params.remove(paramStart + FacetParams.FACET_OVERREQUEST_COUNT);
final int requestedMinCount = originalParams.getFieldInt
(fieldToOverRequest, FacetParams.FACET_PIVOT_MINCOUNT, 1);
sreq.params.remove(paramStart + FacetParams.FACET_PIVOT_MINCOUNT);
final String defaultSort = (requestedLimit > 0)
? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX;
final String sort = originalParams.getFieldParam
(fieldToOverRequest, FacetParams.FACET_SORT, defaultSort);
int shardLimit = requestedLimit + offset;
int shardMinCount = Math.min(requestedMinCount, 1);
// per-shard mincount & overrequest
if ( FacetParams.FACET_SORT_INDEX.equals(sort) &&
1 < requestedMinCount &&
0 < requestedLimit) {
// We can divide the mincount by num shards rounded up, because unless
// a single shard has at least that many it can't compete...
shardMinCount = (int) Math.ceil((double) requestedMinCount / rb.slices.length);
// ...but we still need to overrequest to reduce chances of missing something
shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
// (for mincount <= 1, no overrequest needed)
} else if ( FacetParams.FACET_SORT_COUNT.equals(sort) ) {
if ( 0 < requestedLimit ) {
shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
}
}
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit);
sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount);
}
private int doOverRequestMath(int limit, double ratio, int count) {
// NOTE: normally, "1.0F < ratio"
//
// if the user chooses a ratio < 1, we allow it and don't "bottom out" at
// the original limit until *after* we've also added the count.
int adjustedLimit = (int) (limit * ratio) + count;
return Math.max(limit, adjustedLimit);
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if (!rb.doFacets) return;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS) != 0) {
countFacets(rb, sreq);
} else {
// at present PURPOSE_REFINE_FACETS and PURPOSE_REFINE_PIVOT_FACETS
// don't co-exist in individual requests, but don't assume that
// will always be the case
if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS) != 0) {
refineFacets(rb, sreq);
}
if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_PIVOT_FACETS) != 0) {
refinePivotFacets(rb, sreq);
}
}
}
private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp : sreq.responses) {
int shardNum = rb.getShardNum(srsp.getShard());
NamedList facet_counts = null;
try {
facet_counts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
if (facet_counts==null) {
NamedList<?> responseHeader = (NamedList<?>)srsp.getSolrResponse().getResponse().get("responseHeader");
if (Boolean.TRUE.equals(responseHeader.getBooleanArg(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY))) {
continue;
} else {
log.warn("corrupted response on {} : {}", srsp.getShardRequest(), srsp.getSolrResponse());
throw new SolrException(ErrorCode.SERVER_ERROR,
"facet_counts is absent in response from " + srsp.getNodeName() +
", but "+SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY+" hasn't been responded");
}
}
} catch (Exception ex) {
if (ShardParams.getShardsTolerantAsBool(rb.req.getParams())) {
continue; // looks like a shard did not return anything
}
throw new SolrException(ErrorCode.SERVER_ERROR,
"Unable to read facet info for shard: " + srsp.getShard(), ex);
}
// handle facet queries
NamedList facet_queries = (NamedList) facet_counts.get("facet_queries");
if (facet_queries != null) {
for (int i = 0; i < facet_queries.size(); i++) {
String returnedKey = facet_queries.getName(i);
long count = ((Number) facet_queries.getVal(i)).longValue();
QueryFacet qf = fi.queryFacets.get(returnedKey);
qf.count += count;
}
}
// step through each facet.field, adding results from this shard
NamedList facet_fields = (NamedList) facet_counts.get("facet_fields");
if (facet_fields != null) {
for (DistribFieldFacet dff : fi.facets.values()) {
dff.add(shardNum, (NamedList) facet_fields.get(dff.getKey()), dff.initialLimit);
}
}
// Distributed facet_ranges
@SuppressWarnings("unchecked")
SimpleOrderedMap<SimpleOrderedMap<Object>> rangesFromShard = (SimpleOrderedMap<SimpleOrderedMap<Object>>)
facet_counts.get("facet_ranges");
if (rangesFromShard != null) {
RangeFacetRequest.DistribRangeFacet.mergeFacetRangesFromShardResponse(fi.rangeFacets, rangesFromShard);
}
// Distributed facet_intervals
doDistribIntervals(fi, facet_counts);
// Distributed facet_pivots - this is just the per shard collection,
// refinement reqs still needed (below) once we've considered every shard
doDistribPivots(rb, shardNum, facet_counts);
// Distributed facet_heatmaps
SpatialHeatmapFacets.distribHandleResponse(fi.heatmapFacets, facet_counts);
} // end for-each-response-in-shard-request...
// refine each pivot based on the new shard data
for (Entry<String,PivotFacet> pivotFacet : fi.pivotFacets) {
pivotFacet.getValue().queuePivotRefinementRequests();
}
//
// This code currently assumes that there will be only a single
// request ((with responses from all shards) sent out to get facets...
// otherwise we would need to wait until all facet responses were received.
//
for (DistribFieldFacet dff : fi.facets.values()) {
// no need to check these facets for refinement
if (dff.initialLimit <= 0 && dff.initialMincount <= 1) continue;
// only other case where index-sort doesn't need refinement is if minCount==0
if (dff.minCount <= 1 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
@SuppressWarnings("unchecked") // generic array's are annoying
List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
dff._toRefine = tmp;
ShardFacetCount[] counts = dff.getCountSorted();
int ntop = Math.min(counts.length,
dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
long smallestCount = counts.length == 0 ? 0 : counts[ntop - 1].count;
for (int i = 0; i < counts.length; i++) {
ShardFacetCount sfc = counts[i];
boolean needRefinement = false;
if (i < ntop) {
// automatically flag the top values for refinement
// this should always be true for facet.sort=index
needRefinement = true;
} else {
// this logic should only be invoked for facet.sort=index (for now)
// calculate the maximum value that this term may have
// and if it is >= smallestCount, then flag for refinement
long maxCount = sfc.count;
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
FixedBitSet fbs = dff.counted[shardNum];
// fbs can be null if a shard request failed
if (fbs != null && (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum))) {
// if missing from this shard, add the max it could be
maxCount += dff.maxPossible(shardNum);
}
}
if (maxCount >= smallestCount) {
// TODO: on a tie, we could check the term values
needRefinement = true;
}
}
if (needRefinement) {
// add a query for each shard missing the term that needs refinement
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
FixedBitSet fbs = dff.counted[shardNum];
// fbs can be null if a shard request failed
if (fbs != null &&
(sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) &&
dff.maxPossible(shardNum) > 0) {
dff.needRefinements = true;
List<String> lst = dff._toRefine[shardNum];
if (lst == null) {
lst = dff._toRefine[shardNum] = new ArrayList<>();
}
lst.add(sfc.name);
}
}
}
}
}
removeFieldFacetsUnderLimits(rb);
removeRangeFacetsUnderLimits(rb);
removeQueryFacetsUnderLimits(rb);
}
private void removeQueryFacetsUnderLimits(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
return;
}
FacetInfo fi = rb._facetInfo;
Map<String, QueryFacet> query_facets = fi.queryFacets;
if (query_facets == null) {
return;
}
LinkedHashMap<String, QueryFacet> newQueryFacets = new LinkedHashMap<>();
// The
int minCount = rb.req.getParams().getInt(FacetParams.FACET_MINCOUNT, 0);
boolean replace = false;
for (Map.Entry<String, QueryFacet> ent : query_facets.entrySet()) {
if (ent.getValue().count >= minCount) {
newQueryFacets.put(ent.getKey(), ent.getValue());
} else {
if (log.isTraceEnabled()) {
log.trace("Removing facetQuery/key: {}/{} mincount={}", ent.getKey(), ent.getValue(), minCount);
}
replace = true;
}
}
if (replace) {
fi.queryFacets = newQueryFacets;
}
}
private void removeRangeFacetsUnderLimits(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
return;
}
FacetInfo fi = rb._facetInfo;
for (Map.Entry<String, RangeFacetRequest.DistribRangeFacet> entry : fi.rangeFacets.entrySet()) {
final String field = entry.getKey();
final RangeFacetRequest.DistribRangeFacet rangeFacet = entry.getValue();
int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
if (minCount == 0) {
continue;
}
rangeFacet.removeRangeFacetsUnderLimits(minCount);
}
}
private void removeFieldFacetsUnderLimits(ResponseBuilder rb) {
if (rb.stage != ResponseBuilder.STAGE_DONE) {
return;
}
FacetInfo fi = rb._facetInfo;
if (fi.facets == null) {
return;
}
// Do field facets
for (Entry<String, DistribFieldFacet> ent : fi.facets.entrySet()) {
String field = ent.getKey();
int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
if (minCount == 0) { // return them all
continue;
}
ent.getValue().respectMinCount(minCount);
}
}
// The implementation below uses the first encountered shard's
// facet_intervals as the basis for subsequent shards' data to be merged.
private void doDistribIntervals(FacetInfo fi, NamedList facet_counts) {
@SuppressWarnings("unchecked")
SimpleOrderedMap<SimpleOrderedMap<Integer>> facet_intervals =
(SimpleOrderedMap<SimpleOrderedMap<Integer>>)
facet_counts.get("facet_intervals");
if (facet_intervals != null) {
for (Map.Entry<String, SimpleOrderedMap<Integer>> entry : facet_intervals) {
final String field = entry.getKey();
SimpleOrderedMap<Integer> existingCounts = fi.intervalFacets.get(field);
if (existingCounts == null) {
// first time we've seen this field, no merging
fi.intervalFacets.add(field, entry.getValue());
} else {
// not the first time, merge current field counts
Iterator<Map.Entry<String, Integer>> newItr = entry.getValue().iterator();
Iterator<Map.Entry<String, Integer>> exItr = existingCounts.iterator();
// all intervals should be returned by each shard, even if they have zero count,
// and in the same order
while (exItr.hasNext()) {
Map.Entry<String, Integer> exItem = exItr.next();
if (!newItr.hasNext()) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Interval facet shard response missing key: " + exItem.getKey());
}
Map.Entry<String, Integer> newItem = newItr.next();
if (!newItem.getKey().equals(exItem.getKey())) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Interval facet shard response has extra key: " + newItem.getKey());
}
exItem.setValue(exItem.getValue() + newItem.getValue());
}
if (newItr.hasNext()) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Interval facet shard response has at least one extra key: "
+ newItr.next().getKey());
}
}
}
}
}
private void doDistribPivots(ResponseBuilder rb, int shardNum, NamedList facet_counts) {
@SuppressWarnings("unchecked")
SimpleOrderedMap<List<NamedList<Object>>> facet_pivot
= (SimpleOrderedMap<List<NamedList<Object>>>) facet_counts.get(PIVOT_KEY);
if (facet_pivot != null) {
for (Map.Entry<String,List<NamedList<Object>>> pivot : facet_pivot) {
final String pivotName = pivot.getKey();
PivotFacet facet = rb._facetInfo.pivotFacets.get(pivotName);
facet.mergeResponseFromShard(shardNum, rb, pivot.getValue());
}
}
}
private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp : sreq.responses) {
// int shardNum = rb.getShardNum(srsp.shard);
NamedList facet_counts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
NamedList facet_fields = (NamedList) facet_counts.get("facet_fields");
if (facet_fields == null) continue; // this can happen when there's an exception
for (int i = 0; i < facet_fields.size(); i++) {
String key = facet_fields.getName(i);
DistribFieldFacet dff = fi.facets.get(key);
if (dff == null) continue;
NamedList shardCounts = (NamedList) facet_fields.getVal(i);
for (int j = 0; j < shardCounts.size(); j++) {
String name = shardCounts.getName(j);
long count = ((Number) shardCounts.getVal(j)).longValue();
ShardFacetCount sfc = dff.counts.get(name);
if (sfc == null) {
// we got back a term we didn't ask for?
log.error("Unexpected term returned for facet refining. key='{}' term='{}'\n\trequest params={}\n\ttoRefine={}\n\tresponse={}"
, key, name, sreq.params, dff._toRefine, shardCounts);
continue;
}
sfc.count += count;
}
}
}
}
private void refinePivotFacets(ResponseBuilder rb, ShardRequest sreq) {
// This is after the shard has returned the refinement request
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp : sreq.responses) {
int shardNumber = rb.getShardNum(srsp.getShard());
NamedList facetCounts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
@SuppressWarnings("unchecked")
NamedList<List<NamedList<Object>>> pivotFacetResponsesFromShard
= (NamedList<List<NamedList<Object>>>) facetCounts.get(PIVOT_KEY);
if (null == pivotFacetResponsesFromShard) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"No pivot refinement response from shard: " + srsp.getShard());
}
for (Entry<String,List<NamedList<Object>>> pivotFacetResponseFromShard : pivotFacetResponsesFromShard) {
PivotFacet aggregatedPivotFacet = fi.pivotFacets.get(pivotFacetResponseFromShard.getKey());
aggregatedPivotFacet.mergeResponseFromShard(shardNumber, rb, pivotFacetResponseFromShard.getValue());
aggregatedPivotFacet.removeAllRefinementsForShard(shardNumber);
}
}
if (allPivotFacetsAreFullyRefined(fi)) {
for (Entry<String,PivotFacet> pf : fi.pivotFacets) {
pf.getValue().queuePivotRefinementRequests();
}
reQueuePivotFacetShardRequests(rb);
}
}
private boolean allPivotFacetsAreFullyRefined(FacetInfo fi) {
for (Entry<String,PivotFacet> pf : fi.pivotFacets) {
if (pf.getValue().isRefinementsRequired()) {
return false;
}
}
return true;
}
private boolean doAnyPivotFacetRefinementRequestsExistForShard(FacetInfo fi,
int shardNum) {
for (int i = 0; i < fi.pivotFacets.size(); i++) {
PivotFacet pf = fi.pivotFacets.getVal(i);
if ( ! pf.getQueuedRefinements(shardNum).isEmpty() ) {
return true;
}
}
return false;
}
private void reQueuePivotFacetShardRequests(ResponseBuilder rb) {
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
if (doAnyPivotFacetRefinementRequestsExistForShard(rb._facetInfo, shardNum)) {
enqueuePivotFacetShardRequests(rb, shardNum);
}
}
}
@Override
public void finishStage(ResponseBuilder rb) {
if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
// wait until STAGE_GET_FIELDS
// so that "result" is already stored in the response (for aesthetics)
FacetInfo fi = rb._facetInfo;
NamedList<Object> facet_counts = new SimpleOrderedMap<>();
NamedList<Number> facet_queries = new SimpleOrderedMap<>();
facet_counts.add("facet_queries", facet_queries);
for (QueryFacet qf : fi.queryFacets.values()) {
facet_queries.add(qf.getKey(), num(qf.count));
}
NamedList<Object> facet_fields = new SimpleOrderedMap<>();
facet_counts.add("facet_fields", facet_fields);
for (DistribFieldFacet dff : fi.facets.values()) {
// order is important for facet values, so use NamedList
NamedList<Object> fieldCounts = new NamedList<>();
facet_fields.add(dff.getKey(), fieldCounts);
ShardFacetCount[] counts;
boolean countSorted = dff.sort.equals(FacetParams.FACET_SORT_COUNT);
if (countSorted) {
counts = dff.countSorted;
if (counts == null || dff.needRefinements) {
counts = dff.getCountSorted();
}
} else if (dff.sort.equals(FacetParams.FACET_SORT_INDEX)) {
counts = dff.getLexSorted();
} else { // TODO: log error or throw exception?
counts = dff.getLexSorted();
}
if (countSorted) {
int end = dff.limit < 0
? counts.length : Math.min(dff.offset + dff.limit, counts.length);
for (int i = dff.offset; i < end; i++) {
if (counts[i].count < dff.minCount) {
break;
}
fieldCounts.add(counts[i].name, num(counts[i].count));
}
} else {
int off = dff.offset;
int lim = dff.limit >= 0 ? dff.limit : Integer.MAX_VALUE;
// index order...
for (int i = 0; i < counts.length; i++) {
long count = counts[i].count;
if (count < dff.minCount) continue;
if (off > 0) {
off--;
continue;
}
if (lim <= 0) {
break;
}
lim--;
fieldCounts.add(counts[i].name, num(count));
}
}
if (dff.missing) {
fieldCounts.add(null, num(dff.missingCount));
}
}
SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacetOutput = new SimpleOrderedMap<>();
for (Map.Entry<String, RangeFacetRequest.DistribRangeFacet> entry : fi.rangeFacets.entrySet()) {
String key = entry.getKey();
RangeFacetRequest.DistribRangeFacet value = entry.getValue();
rangeFacetOutput.add(key, value.rangeFacet);
}
facet_counts.add("facet_ranges", rangeFacetOutput);
facet_counts.add("facet_intervals", fi.intervalFacets);
facet_counts.add(SpatialHeatmapFacets.RESPONSE_KEY,
SpatialHeatmapFacets.distribFinish(fi.heatmapFacets, rb));
if (fi.pivotFacets != null && fi.pivotFacets.size() > 0) {
facet_counts.add(PIVOT_KEY, createPivotFacetOutput(rb));
}
rb.rsp.add("facet_counts", facet_counts);
rb._facetInfo = null; // could be big, so release asap
}
private SimpleOrderedMap<List<NamedList<Object>>> createPivotFacetOutput(ResponseBuilder rb) {
SimpleOrderedMap<List<NamedList<Object>>> combinedPivotFacets = new SimpleOrderedMap<>();
for (Entry<String,PivotFacet> entry : rb._facetInfo.pivotFacets) {
String key = entry.getKey();
PivotFacet pivot = entry.getValue();
List<NamedList<Object>> trimmedPivots = pivot.getTrimmedPivotsAsListOfNamedLists();
if (null == trimmedPivots) {
trimmedPivots = Collections.<NamedList<Object>>emptyList();
}
combinedPivotFacets.add(key, trimmedPivots);
}
return combinedPivotFacets;
}
// use <int> tags for smaller facet counts (better back compatibility)
/**
* @param val a primitive long value
* @return an {@link Integer} if the value of the argument is less than {@link Integer#MAX_VALUE}
* else a @{link java.lang.Long}
*/
static Number num(long val) {
if (val < Integer.MAX_VALUE) return (int)val;
else return val;
}
/**
* @param val a {@link java.lang.Long} value
* @return an {@link Integer} if the value of the argument is less than {@link Integer#MAX_VALUE}
* else a @{link java.lang.Long}
*/
static Number num(Long val) {
if (val.longValue() < Integer.MAX_VALUE) return val.intValue();
else return val;
}
/////////////////////////////////////////////
/// SolrInfoBean
////////////////////////////////////////////
@Override
public String getDescription() {
return "Handle Faceting";
}
@Override
public Category getCategory() {
return Category.QUERY;
}
/**
* This class is used exclusively for merging results from each shard
* in a distributed facet request. It plays no role in the computation
* of facet counts inside a single node.
*
* A related class {@link org.apache.solr.handler.component.FacetComponent.FacetContext}
* exists for assisting computation inside a single node.
*
* <b>This API is experimental and subject to change</b>
*
* @see org.apache.solr.handler.component.FacetComponent.FacetContext
*/
public static class FacetInfo {
/**
* Incremented counter used to track the values being refined in a given request.
* This counter is used in conjunction with {@link PivotFacet#REFINE_PARAM} to identify
* which refinement values are associated with which pivots.
*/
int pivotRefinementCounter = 0;
public LinkedHashMap<String,QueryFacet> queryFacets;
public LinkedHashMap<String,DistribFieldFacet> facets;
public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
= new SimpleOrderedMap<>();
public LinkedHashMap<String, RangeFacetRequest.DistribRangeFacet> rangeFacets
= new LinkedHashMap<>();
public SimpleOrderedMap<SimpleOrderedMap<Integer>> intervalFacets
= new SimpleOrderedMap<>();
public SimpleOrderedMap<PivotFacet> pivotFacets
= new SimpleOrderedMap<>();
public LinkedHashMap<String,SpatialHeatmapFacets.HeatmapFacet> heatmapFacets;
void parse(SolrParams params, ResponseBuilder rb) {
queryFacets = new LinkedHashMap<>();
facets = new LinkedHashMap<>();
String[] facetQs = params.getParams(FacetParams.FACET_QUERY);
if (facetQs != null) {
for (String query : facetQs) {
QueryFacet queryFacet = new QueryFacet(rb, query);
queryFacets.put(queryFacet.getKey(), queryFacet);
}
}
String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
if (facetFs != null) {
for (String field : facetFs) {
final DistribFieldFacet ff;
if (params.getFieldBool(field, FacetParams.FACET_EXISTS, false)) {
// cap facet count by 1 with this method
ff = new DistribFacetExistsField(rb, field);
} else {
ff = new DistribFieldFacet(rb, field);
}
facets.put(ff.getKey(), ff);
}
}
// Develop Pivot Facet Information
String[] facetPFs = params.getParams(FacetParams.FACET_PIVOT);
if (facetPFs != null) {
for (String fieldGroup : facetPFs) {
PivotFacet pf = new PivotFacet(rb, fieldGroup);
pivotFacets.add(pf.getKey(), pf);
}
}
heatmapFacets = SpatialHeatmapFacets.distribParse(params, rb);
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class FacetBase {
String facetType; // facet.field, facet.query, etc (make enum?)
String facetStr; // original parameter value of facetStr
String facetOn; // the field or query, absent localParams if appropriate
private String key; // label in the response for the result...
// "foo" for {!key=foo}myfield
SolrParams localParams; // any local params for the facet
private List<String> tags = Collections.emptyList();
private List<String> excludeTags = Collections.emptyList();
private int threadCount = -1;
public FacetBase(ResponseBuilder rb, String facetType, String facetStr) {
this.facetType = facetType;
this.facetStr = facetStr;
try {
this.localParams = QueryParsing.getLocalParams(facetStr,
rb.req.getParams());
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
this.facetOn = facetStr;
this.key = facetStr;
if (localParams != null) {
// remove local params unless it's a query
if (!facetType.equals(FacetParams.FACET_QUERY)) {
facetOn = localParams.get(CommonParams.VALUE);
key = facetOn;
}
key = localParams.get(CommonParams.OUTPUT_KEY, key);
String tagStr = localParams.get(CommonParams.TAG);
this.tags = tagStr == null ? Collections.<String>emptyList() : StrUtils.splitSmart(tagStr,',');
String threadStr = localParams.get(CommonParams.THREADS);
this.threadCount = threadStr != null ? Integer.parseInt(threadStr) : -1;
String excludeStr = localParams.get(CommonParams.EXCLUDE);
if (StringUtils.isEmpty(excludeStr)) {
this.excludeTags = Collections.emptyList();
} else {
this.excludeTags = StrUtils.splitSmart(excludeStr,',');
}
}
}
/** returns the key in the response that this facet will be under */
public String getKey() { return key; }
public String getType() { return facetType; }
public List<String> getTags() { return tags; }
public List<String> getExcludeTags() { return excludeTags; }
public int getThreadCount() { return threadCount; }
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class QueryFacet extends FacetBase {
public long count;
public QueryFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_QUERY, facetStr);
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class FieldFacet extends FacetBase {
public String field; // the field to facet on... "myfield" for
// {!key=foo}myfield
public FieldType ftype;
public int offset;
public int limit;
public int minCount;
public String sort;
public boolean missing;
public String prefix;
public long missingCount;
public FieldFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_FIELD, facetStr);
fillParams(rb, rb.req.getParams(), facetOn);
}
protected void fillParams(ResponseBuilder rb, SolrParams params, String field) {
this.field = field;
this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field);
this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
if (mincount == null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
mincount = (zeros != null && !zeros) ? 1 : 0;
// current default is to include zeros.
}
this.minCount = mincount;
this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// default to sorting by count if there is a limit.
this.sort = params.getFieldParam(field, FacetParams.FACET_SORT,
(limit > 0 ?
FacetParams.FACET_SORT_COUNT
: FacetParams.FACET_SORT_INDEX));
if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
this.sort = FacetParams.FACET_SORT_COUNT;
} else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) {
this.sort = FacetParams.FACET_SORT_INDEX;
}
this.prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
@SuppressWarnings("rawtypes")
public static class DistribFieldFacet extends FieldFacet {
public List<String>[] _toRefine; // a List<String> of refinements needed,
// one for each shard.
// SchemaField sf; // currently unneeded
// the max possible count for a term appearing on no list
public long missingMaxPossible;
// the max possible count for a missing term for each shard (indexed by
// shardNum)
public long[] missingMax;
// a bitset for each shard, keeping track of which terms seen
public FixedBitSet[] counted;
public HashMap<String,ShardFacetCount> counts = new HashMap<>(128);
public int termNum;
public int initialLimit; // how many terms requested in first phase
public int initialMincount; // mincount param sent to each shard
public double overrequestRatio;
public int overrequestCount;
public boolean needRefinements;
public ShardFacetCount[] countSorted;
DistribFieldFacet(ResponseBuilder rb, String facetStr) {
super(rb, facetStr);
// sf = rb.req.getSchema().getField(field);
missingMax = new long[rb.shards.length];
counted = new FixedBitSet[rb.shards.length];
}
protected void fillParams(ResponseBuilder rb, SolrParams params, String field) {
super.fillParams(rb, params, field);
this.overrequestRatio
= params.getFieldDouble(field, FacetParams.FACET_OVERREQUEST_RATIO, 1.5);
this.overrequestCount
= params.getFieldInt(field, FacetParams.FACET_OVERREQUEST_COUNT, 10);
}
void add(int shardNum, NamedList shardCounts, int numRequested) {
// shardCounts could be null if there was an exception
int sz = shardCounts == null ? 0 : shardCounts.size();
int numReceived = sz;
FixedBitSet terms = new FixedBitSet(termNum + sz);
long last = 0;
for (int i = 0; i < sz; i++) {
String name = shardCounts.getName(i);
long count = ((Number) shardCounts.getVal(i)).longValue();
if (name == null) {
missingCount += count;
numReceived--;
} else {
ShardFacetCount sfc = counts.get(name);
if (sfc == null) {
sfc = new ShardFacetCount();
sfc.name = name;
if (ftype == null) {
sfc.indexed = null;
} else if (ftype.isPointField()) {
sfc.indexed = ((PointField)ftype).toInternalByteRef(sfc.name);
} else {
sfc.indexed = new BytesRef(ftype.toInternal(sfc.name));
}
sfc.termNum = termNum++;
counts.put(name, sfc);
}
incCount(sfc, count);
terms.set(sfc.termNum);
last = count;
}
}
// the largest possible missing term is (initialMincount - 1) if we received
// less than the number requested.
if (numRequested < 0 || numRequested != 0 && numReceived < numRequested) {
last = Math.max(0, initialMincount - 1);
}
missingMaxPossible += last;
missingMax[shardNum] = last;
counted[shardNum] = terms;
}
protected void incCount(ShardFacetCount sfc, long count) {
sfc.count += count;
}
public ShardFacetCount[] getLexSorted() {
ShardFacetCount[] arr
= counts.values().toArray(new ShardFacetCount[counts.size()]);
Arrays.sort(arr, (o1, o2) -> o1.indexed.compareTo(o2.indexed));
countSorted = arr;
return arr;
}
public ShardFacetCount[] getCountSorted() {
ShardFacetCount[] arr
= counts.values().toArray(new ShardFacetCount[counts.size()]);
Arrays.sort(arr, (o1, o2) -> {
if (o2.count < o1.count) return -1;
else if (o1.count < o2.count) return 1;
return o1.indexed.compareTo(o2.indexed);
});
countSorted = arr;
return arr;
}
// returns the max possible value this ShardFacetCount could have for this shard
// (assumes the shard did not report a count for this value)
long maxPossible(int shardNum) {
return missingMax[shardNum];
// TODO: could store the last term in the shard to tell if this term
// comes before or after it. If it comes before, we could subtract 1
}
public void respectMinCount(long minCount) {
HashMap<String, ShardFacetCount> newOne = new HashMap<>();
boolean replace = false;
for (Map.Entry<String, ShardFacetCount> ent : counts.entrySet()) {
if (ent.getValue().count >= minCount) {
newOne.put(ent.getKey(), ent.getValue());
} else {
if (log.isTraceEnabled()) {
log.trace("Removing facet/key: {}/{} mincount={}", ent.getKey(), ent.getValue(), minCount);
}
replace = true;
}
}
if (replace) {
counts = newOne;
}
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
public static class ShardFacetCount {
public String name;
// the indexed form of the name... used for comparisons
public BytesRef indexed;
public long count;
public int termNum; // term number starting at 0 (used in bit arrays)
@Override
public String toString() {
return "{term=" + name + ",termNum=" + termNum + ",count=" + count + "}";
}
}
private static final class DistribFacetExistsField extends DistribFieldFacet {
private DistribFacetExistsField(ResponseBuilder rb, String facetStr) {
super(rb, facetStr);
SimpleFacets.checkMincountOnExists(field, minCount);
}
@Override
protected void incCount(ShardFacetCount sfc, long count) {
if (count>0) {
sfc.count = 1;
}
}
}
}