blob: 9b738410edb4808de0b91ce221062f68a8a10c98 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
/**
* Models a single field somewhere in a hierarchy of fields as part of a pivot facet.
* This pivot field contains {@link PivotFacetValue}s which may each contain a nested
* {@link PivotFacetField} child. This <code>PivotFacetField</code> may itself
* be a child of a {@link PivotFacetValue} parent.
*
* @see PivotFacetValue
* @see PivotFacetFieldValueCollection
*/
@SuppressWarnings("rawtypes")
public class PivotFacetField {
public final String field;
// null if this is a top level pivot,
// otherwise the value of the parent pivot we are nested under
public final PivotFacetValue parentValue;
public final PivotFacetFieldValueCollection valueCollection;
// Facet parameters relating to this field
private final int facetFieldLimit;
private final int facetFieldMinimumCount;
private final int facetFieldOffset;
private final String facetFieldSort;
private final Map<Integer, Integer> numberOfValuesContributedByShard = new HashMap<>();
private final Map<Integer, Integer> shardLowestCount = new HashMap<>();
private boolean needRefinementAtThisLevel = true;
private PivotFacetField(ResponseBuilder rb, PivotFacetValue parent, String fieldName) {
field = fieldName;
parentValue = parent;
// facet params
SolrParams parameters = rb.req.getParams();
facetFieldMinimumCount = parameters.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
facetFieldOffset = parameters.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
facetFieldLimit = parameters.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
String defaultSort = (facetFieldLimit > 0) ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX;
facetFieldSort = parameters.getFieldParam(field, FacetParams.FACET_SORT, defaultSort);
valueCollection = new PivotFacetFieldValueCollection(facetFieldMinimumCount, facetFieldOffset, facetFieldLimit, facetFieldSort);
if ( (facetFieldLimit < 0) ||
// TODO: possible refinement issue if limit=0 & mincount=0 & missing=true
// (ie: we only want the missing count for this field)
(facetFieldLimit <= 0 && facetFieldMinimumCount == 0) ||
(facetFieldSort.equals(FacetParams.FACET_SORT_INDEX) && facetFieldMinimumCount <= 0)
) {
// in any of these cases, there's no need to refine this level of the pivot
needRefinementAtThisLevel = false;
}
}
/**
* A recursive method that walks up the tree of pivot fields/values to build
* a list of String representations of the values that lead down to this
* PivotFacetField.
*
* @return A mutable List of the pivot values leading down to this pivot field,
* will never be null but may contain nulls and may be empty if this is a top
* level pivot field
* @see PivotFacetValue#getValuePath
*/
public List<String> getValuePath() {
if (null != parentValue) {
return parentValue.getValuePath();
}
return new ArrayList<String>(3);
}
/**
* A recursive method to construct a new <code>PivotFacetField</code> object from
* the contents of the {@link NamedList}s provided by the specified shard, relative
* to a parent value (if this is not the top field in the pivot hierarchy)
*
* The associated child {@link PivotFacetValue}s will be recursively built as well.
*
* @see PivotFacetValue#createFromNamedList
* @param shardNumber the id of the shard that provided this data
* @param rb The response builder of the current request
* @param owner the parent value in the current pivot (may be null)
* @param pivotValues the data from the specified shard for this pivot field, may be null or empty
* @return the new PivotFacetField, null if pivotValues is null or empty.
*/
public static PivotFacetField createFromListOfNamedLists(int shardNumber, ResponseBuilder rb, PivotFacetValue owner, List<NamedList<Object>> pivotValues) {
if (null == pivotValues || pivotValues.size() <= 0) return null;
NamedList<Object> firstValue = pivotValues.get(0);
PivotFacetField createdPivotFacetField
= new PivotFacetField(rb, owner, PivotFacetHelper.getField(firstValue));
int lowestCount = Integer.MAX_VALUE;
for (NamedList<Object> pivotValue : pivotValues) {
lowestCount = Math.min(lowestCount, PivotFacetHelper.getCount(pivotValue));
PivotFacetValue newValue = PivotFacetValue.createFromNamedList
(shardNumber, rb, createdPivotFacetField, pivotValue);
createdPivotFacetField.valueCollection.add(newValue);
}
createdPivotFacetField.shardLowestCount.put(shardNumber, lowestCount);
createdPivotFacetField.numberOfValuesContributedByShard.put(shardNumber, pivotValues.size());
return createdPivotFacetField;
}
/**
* Destructive method that recursively prunes values from the data structure
* based on the counts for those values and the effective sort, mincount, limit,
* and offset being used for each field.
* <p>
* This method should only be called after all refinement is completed just prior
* calling {@link #convertToListOfNamedLists}
* </p>
*
* @see PivotFacet#getTrimmedPivotsAsListOfNamedLists
* @see PivotFacetFieldValueCollection#trim
*/
public void trim() {
// SOLR-6331...
//
// we can probably optimize the memory usage by trimming each level of the pivot once
// we know we've fully refined the values at that level
// (ie: fold this logic into refineNextLevelOfFacets)
this.valueCollection.trim();
}
/**
* Recursively sorts the collection of values associated with this field, and
* any sub-pivots those values have.
*
* @see FacetParams#FACET_SORT
* @see PivotFacetFieldValueCollection#sort
*/
public void sort() {
this.valueCollection.sort();
}
/**
* A recursive method for generating <code>NamedLists</code> from this field
* suitable for including in a pivot facet response to the original distributed request.
*/
public List<NamedList<Object>> convertToListOfNamedLists() {
List<NamedList<Object>> convertedPivotList = null;
if (valueCollection.size() > 0) {
convertedPivotList = new LinkedList<>();
for (PivotFacetValue pivot : valueCollection)
convertedPivotList.add(pivot.convertToNamedList());
}
return convertedPivotList;
}
/**
* A recursive method for determining which {@link PivotFacetValue}s need to be
* refined for this pivot.
*
* @see PivotFacet#queuePivotRefinementRequests
*/
public void queuePivotRefinementRequests(PivotFacet pf) {
if (needRefinementAtThisLevel) {
if (0 < facetFieldMinimumCount) {
// missing is always a candidate for refinement if at least one shard met the minimum
PivotFacetValue missing = valueCollection.getMissingValue();
if (null != missing) {
processDefiniteCandidateElement(pf, valueCollection.getMissingValue());
}
}
if (! valueCollection.getExplicitValuesList().isEmpty()) {
if (FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)) {
// we only need to things that are currently in our limit,
// or might be in our limit if we get increased counts from shards that
// didn't include this value the first time
final int indexOfCountThreshold
= Math.min(valueCollection.getExplicitValuesListSize(),
facetFieldOffset + facetFieldLimit) - 1;
final int countThreshold = valueCollection.getAt(indexOfCountThreshold).getCount();
int positionInResults = 0;
for (PivotFacetValue value : valueCollection.getExplicitValuesList()) {
if (positionInResults <= indexOfCountThreshold) {
// This element is within the top results, so we need to get information
// from all of the shards.
processDefiniteCandidateElement(pf, value);
} else {
// This element is not within the top results, but may still need to be refined.
processPossibleCandidateElement(pf, value, countThreshold);
}
positionInResults++;
}
} else { // FACET_SORT_INDEX
// everything needs refined to see what the per-shard mincount excluded
for (PivotFacetValue value : valueCollection.getExplicitValuesList()) {
processDefiniteCandidateElement(pf, value);
}
}
}
needRefinementAtThisLevel = false;
}
if ( pf.isRefinementsRequired() ) {
// if any refinements are needed, then we need to stop and wait to
// see how the picture may change before drilling down to child pivot fields
return;
} else {
// Since outstanding requests have been filled, then we can drill down
// to the next deeper level and check it.
refineNextLevelOfFacets(pf);
}
}
/**
* Adds refinement requests for the value for each shard that has not already contributed
* a count for this value.
*/
private void processDefiniteCandidateElement(PivotFacet pf, PivotFacetValue value) {
for (int shard = pf.knownShards.nextSetBit(0);
0 <= shard;
shard = pf.knownShards.nextSetBit(shard+1)) {
if ( ! value.shardHasContributed(shard) ) {
if ( // if we're doing index order, we need to refine anything
// (mincount may have excluded from a shard)
FacetParams.FACET_SORT_INDEX.equals(facetFieldSort)
|| (// 'missing' value isn't affected by limit, needs refined if shard didn't provide
null == value.getValue() ||
// if we are doing count order, we need to refine if the limit was hit
// (if not, the shard doesn't have the value or it would have returned already)
numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(shard))) {
pf.addRefinement(shard, value);
}
}
}
}
private boolean numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(int shardNumber) {
return facetFieldLimit <= numberOfValuesContributedByShard(shardNumber);
}
private int numberOfValuesContributedByShard(final int shardNumber) {
return numberOfValuesContributedByShard.containsKey(shardNumber)
? numberOfValuesContributedByShard.get(shardNumber)
: 0;
}
/**
* Checks the {@link #lowestCountContributedbyShard} for each shard, combined with the
* counts we already know, to see if this value is a viable candidate --
* <b>Does not make sense when using {@link FacetParams#FACET_SORT_INDEX}</b>
*
* @see #processDefiniteCandidateElement
*/
private void processPossibleCandidateElement(PivotFacet pf, PivotFacetValue value,
final int refinementThreshold) {
assert FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)
: "Method only makes sense when sorting by count";
int maxPossibleCountAfterRefinement = value.getCount();
for (int shard = pf.knownShards.nextSetBit(0);
0 <= shard;
shard = pf.knownShards.nextSetBit(shard+1)) {
if ( ! value.shardHasContributed(shard) ) {
maxPossibleCountAfterRefinement += lowestCountContributedbyShard(shard);
}
}
if (refinementThreshold <= maxPossibleCountAfterRefinement) {
processDefiniteCandidateElement(pf, value);
}
}
private int lowestCountContributedbyShard(int shardNumber) {
return (shardLowestCount.containsKey(shardNumber))
? shardLowestCount.get(shardNumber)
: 0;
}
private void refineNextLevelOfFacets(PivotFacet pf) {
List<PivotFacetValue> explicitValsToRefine
= valueCollection.getNextLevelValuesToRefine();
for (PivotFacetValue value : explicitValsToRefine) {
if (null != value.getChildPivot()) {
value.getChildPivot().queuePivotRefinementRequests(pf);
}
}
PivotFacetValue missing = this.valueCollection.getMissingValue();
if(null != missing && null != missing.getChildPivot()) {
missing.getChildPivot().queuePivotRefinementRequests(pf);
}
}
private void incrementShardValueCount(int shardNumber) {
if (!numberOfValuesContributedByShard.containsKey(shardNumber)) {
numberOfValuesContributedByShard.put(shardNumber, 1);
} else {
numberOfValuesContributedByShard.put(shardNumber, numberOfValuesContributedByShard.get(shardNumber)+1);
}
}
private void contributeValueFromShard(int shardNumber, ResponseBuilder rb, NamedList<Object> shardValue) {
incrementShardValueCount(shardNumber);
Comparable value = PivotFacetHelper.getValue(shardValue);
int count = PivotFacetHelper.getCount(shardValue);
// We're changing values so we most mark the collection as dirty
valueCollection.markDirty();
if ( ( !shardLowestCount.containsKey(shardNumber) )
|| shardLowestCount.get(shardNumber) > count) {
shardLowestCount.put(shardNumber, count);
}
PivotFacetValue facetValue = valueCollection.get(value);
if (null == facetValue) {
// never seen before, we need to create it from scratch
facetValue = PivotFacetValue.createFromNamedList(shardNumber, rb, this, shardValue);
this.valueCollection.add(facetValue);
} else {
facetValue.mergeContributionFromShard(shardNumber, rb, shardValue);
}
}
/**
* Recursively merges the contributions from the specified shard for each
* {@link PivotFacetValue} represended in the <code>response</code>.
*
* @see PivotFacetValue#mergeContributionFromShard
* @param shardNumber the id of the shard that provided this data
* @param rb The response builder of the current request
* @param response the data from the specified shard for this pivot field, may be null
*/
public void contributeFromShard(int shardNumber, ResponseBuilder rb, List<NamedList<Object>> response) {
if (null == response) return;
for (NamedList<Object> responseValue : response) {
contributeValueFromShard(shardNumber, rb, responseValue);
}
}
public String toString(){
return String.format(Locale.ROOT, "P:%s F:%s V:%s",
parentValue, field, valueCollection);
}
}