| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.solr.search.facet; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.function.BiPredicate; |
| import java.util.function.Function; |
| import java.util.function.IntFunction; |
| |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.PriorityQueue; |
| import org.apache.solr.common.SolrException; |
| import org.apache.solr.common.util.SimpleOrderedMap; |
| import org.apache.solr.schema.FieldType; |
| import org.apache.solr.schema.SchemaField; |
| import org.apache.solr.search.DocSet; |
| import org.apache.solr.search.facet.SlotAcc.SlotContext; |
| import org.apache.solr.search.facet.SlotAcc.SweepableSlotAcc; |
| import org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc; |
| |
| import static org.apache.solr.search.facet.FacetContext.SKIP_FACET; |
| |
| /** |
| * Facet processing based on field values. (not range nor by query) |
| * @see FacetField |
| */ |
| abstract class FacetFieldProcessor extends FacetProcessor<FacetField> { |
| SchemaField sf; |
| SlotAcc indexOrderAcc; |
| int effectiveMincount; |
| final boolean singlePassSlotAccCollection; |
| final FacetRequest.FacetSort sort; // never null (may be the user's requested sort, or the prelim_sort) |
| final FacetRequest.FacetSort resort; // typically null (unless the user specified a prelim_sort) |
| |
| final Map<String,AggValueSource> deferredAggs = new HashMap<String,AggValueSource>(); |
| |
| // TODO: push any of this down to base class? |
| |
| // |
| // For sort="x desc", collectAcc would point to "x", and sortAcc would also point to "x". |
| // collectAcc would be used to accumulate all buckets, and sortAcc would be used to sort those buckets. |
| // |
| SlotAcc collectAcc; // Accumulator to collect across entire domain (in addition to the countAcc). May be null. |
| SlotAcc sortAcc; // Accumulator to use for sorting *only* (i.e. not used for collection). May be an alias of countAcc, collectAcc, or indexOrderAcc |
| SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets. |
| |
| SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist. |
| |
| FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) { |
| super(fcontext, freq); |
| this.sf = sf; |
| this.effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount); |
| this.singlePassSlotAccCollection = (freq.limit == -1 && freq.subFacets.size() == 0); |
| |
| if ( null == freq.prelim_sort ) { |
| // If the user has not specified any preliminary sort, then things are very simple. |
| // Just use the "sort" as is w/o needing any re-sorting |
| this.sort = freq.sort; |
| this.resort = null; |
| } else { |
| assert null != freq.prelim_sort; |
| |
| if ( fcontext.isShard() ) { |
| // for a shard request, we can ignore the users requested "sort" and focus solely on the prelim_sort |
| // the merger will worry about the final sorting -- we don't need to resort anything... |
| this.sort = freq.prelim_sort; |
| this.resort = null; |
| |
| } else { // non shard... |
| if ( singlePassSlotAccCollection ) { // special case situation... |
| // when we can do a single pass SlotAcc collection on non-shard request, there is |
| // no point re-sorting. Ignore the freq.prelim_sort and use the freq.sort option as is... |
| this.sort = freq.sort; |
| this.resort = null; |
| } else { |
| // for a non-shard request, we will use the prelim_sort as our initial sort option if it exists |
| // then later we will re-sort on the final desired sort... |
| this.sort = freq.prelim_sort; |
| this.resort = freq.sort; |
| } |
| } |
| } |
| assert null != this.sort; |
| } |
| |
| /** This is used to create accs for second phase (or to create accs for all aggs) */ |
| @Override |
| protected void createAccs(int docCount, int slotCount) throws IOException { |
| if (accMap == null) { |
| accMap = new LinkedHashMap<>(); |
| } |
| |
| // allow a custom count acc to be used |
| if (countAcc == null) { |
| countAcc = new SlotAcc.CountSlotArrAcc(fcontext, slotCount); |
| } |
| |
| if (accs != null) { |
| // reuse these accs, but reset them first and resize since size could be different |
| for (SlotAcc acc : accs) { |
| acc.reset(); |
| acc.resize(new FlatteningResizer(slotCount)); |
| } |
| return; |
| } else { |
| accs = new SlotAcc[ freq.getFacetStats().size() ]; |
| } |
| |
| int accIdx = 0; |
| for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) { |
| SlotAcc acc = null; |
| if (slotCount == 1) { |
| acc = accMap.get(entry.getKey()); |
| if (acc != null) { |
| acc.reset(); |
| } |
| } |
| if (acc == null) { |
| acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount); |
| acc.key = entry.getKey(); |
| accMap.put(acc.key, acc); |
| } |
| accs[accIdx++] = acc; |
| } |
| } |
| |
| /** |
| * Simple helper for checking if a {@link FacetRequest.FacetSort} is on "count" or "index" and picking |
| * the existing SlotAcc |
| * @return an existing SlotAcc for sorting, else null if it should be built from the Aggs |
| */ |
| private SlotAcc getTrivialSortingSlotAcc(FacetRequest.FacetSort fsort) { |
| if ("count".equals(fsort.sortVariable)) { |
| assert null != countAcc; |
| return countAcc; |
| } else if ("index".equals(fsort.sortVariable)) { |
| // allow subclass to set indexOrderAcc first |
| if (indexOrderAcc == null) { |
| // This sorting accumulator just goes by the slot number, so does not need to be collected |
| // and hence does not need to find it's way into the accMap or accs array. |
| indexOrderAcc = new SlotAcc.SortSlotAcc(fcontext); |
| } |
| return indexOrderAcc; |
| } |
| return null; |
| } |
| |
| void createCollectAcc(int numDocs, int numSlots) throws IOException { |
| accMap = new LinkedHashMap<>(); |
| |
| // start with the assumption that we're going to defer the computation of all stats |
| deferredAggs.putAll(freq.getFacetStats()); |
| |
| // we always count... |
| // allow a subclass to set a custom counter. |
| if (countAcc == null) { |
| countAcc = new SlotAcc.CountSlotArrAcc(fcontext, numSlots); |
| } |
| |
| sortAcc = getTrivialSortingSlotAcc(this.sort); |
| |
| if (this.singlePassSlotAccCollection) { |
| // If we are going to return all buckets, and if there are no subfacets (that would need a domain), |
| // then don't defer any aggregation calculations to a second phase. |
| // This way we can avoid calculating domains for each bucket, which can be expensive. |
| |
| // TODO: BEGIN: why can't we just call createAccs here ? |
| accs = new SlotAcc[ freq.getFacetStats().size() ]; |
| int otherAccIdx = 0; |
| for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) { |
| AggValueSource agg = entry.getValue(); |
| SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots); |
| acc.key = entry.getKey(); |
| accMap.put(acc.key, acc); |
| accs[otherAccIdx++] = acc; |
| } |
| // TODO: END: why can't we just call createAccs here ? |
| if (accs.length == 1) { |
| collectAcc = accs[0]; |
| } else { |
| collectAcc = new MultiAcc(fcontext, accs); |
| } |
| |
| if (sortAcc == null) { |
| sortAcc = accMap.get(sort.sortVariable); |
| assert sortAcc != null; |
| } |
| |
| deferredAggs.clear(); |
| } |
| |
| if (sortAcc == null) { |
| AggValueSource sortAgg = freq.getFacetStats().get(sort.sortVariable); |
| if (sortAgg != null) { |
| collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots); |
| collectAcc.key = sort.sortVariable; // TODO: improve this |
| } |
| sortAcc = collectAcc; |
| deferredAggs.remove(sort.sortVariable); |
| } |
| |
| boolean needOtherAccs = freq.allBuckets; // TODO: use for missing too... |
| |
| if (sortAcc == null) { |
| // as sort is already validated, in what case sortAcc would be null? |
| throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, |
| "Invalid sort '" + sort + "' for field '" + sf.getName() + "'"); |
| } |
| |
| if (!needOtherAccs) { |
| // we may need them later, but we don't want to create them now |
| // otherwise we won't know if we need to call setNextReader on them. |
| return; |
| } |
| |
| // create the deferred aggs up front for use by allBuckets |
| createOtherAccs(numDocs, 1); |
| } |
| |
| private void createOtherAccs(int numDocs, int numSlots) throws IOException { |
| if (otherAccs != null) { |
| // reuse existing accumulators |
| for (SlotAcc acc : otherAccs) { |
| acc.reset(); // todo - make reset take numDocs and numSlots? |
| } |
| return; |
| } |
| |
| final int numDeferred = deferredAggs.size(); |
| if (numDeferred <= 0) return; |
| |
| otherAccs = new SlotAcc[ numDeferred ]; |
| |
| int otherAccIdx = 0; |
| for (Map.Entry<String,AggValueSource> entry : deferredAggs.entrySet()) { |
| AggValueSource agg = entry.getValue(); |
| SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots); |
| acc.key = entry.getKey(); |
| accMap.put(acc.key, acc); |
| otherAccs[otherAccIdx++] = acc; |
| } |
| |
| if (numDeferred == freq.getFacetStats().size()) { |
| // accs and otherAccs are the same... |
| accs = otherAccs; |
| } |
| } |
| |
| int collectFirstPhase(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException { |
| int num = -1; |
| if (collectAcc != null) { |
| num = collectAcc.collect(docs, slot, slotContext); |
| } |
| if (allBucketsAcc != null) { |
| num = allBucketsAcc.collect(docs, slot, slotContext); |
| } |
| return num >= 0 ? num : docs.size(); |
| } |
| |
| void collectFirstPhase(int segDoc, int slot, IntFunction<SlotContext> slotContext) throws IOException { |
| if (collectAcc != null) { |
| collectAcc.collect(segDoc, slot, slotContext); |
| } |
| if (allBucketsAcc != null) { |
| allBucketsAcc.collect(segDoc, slot, slotContext); |
| } |
| } |
| |
| /** Processes the collected data to finds the top slots, and composes it in the response NamedList. */ |
| SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int slotCardinality, |
| @SuppressWarnings("rawtypes") IntFunction<Comparable> bucketValFromSlotNumFunc, |
| @SuppressWarnings("rawtypes") Function<Comparable, String> fieldQueryValFunc) throws IOException { |
| assert this.sortAcc != null; |
| int numBuckets = 0; |
| |
| final int off = fcontext.isShard() ? 0 : (int) freq.offset; |
| |
| long effectiveLimit = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow |
| if (freq.limit >= 0) { |
| effectiveLimit = freq.limit; |
| if (fcontext.isShard()) { |
| if (freq.overrequest == -1) { |
| // add over-request if this is a shard request and if we have a small offset (large offsets will already be gathering many more buckets than needed) |
| if (freq.offset < 10) { |
| effectiveLimit = (long) (effectiveLimit * 1.1 + 4); // default: add 10% plus 4 (to overrequest for very small limits) |
| } |
| } else { |
| effectiveLimit += freq.overrequest; |
| } |
| } else if (null != resort && 0 < freq.overrequest) { |
| // in non-shard situations, if we have a 'resort' we check for explicit overrequest > 0 |
| effectiveLimit += freq.overrequest; |
| } |
| } |
| |
| final int sortMul = sort.sortDirection.getMultiplier(); |
| |
| int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(freq.offset + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1); |
| maxTopVals = Math.min(maxTopVals, slotCardinality); |
| final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc; |
| final BiPredicate<Slot,Slot> orderPredicate; |
| if (indexOrderAcc != null && indexOrderAcc != sortAcc) { |
| orderPredicate = (a, b) -> { |
| int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; |
| return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; |
| }; |
| } else { |
| orderPredicate = (a, b) -> { |
| int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; |
| return cmp == 0 ? b.slot < a.slot : cmp < 0; |
| }; |
| } |
| final PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxTopVals) { |
| @Override |
| protected boolean lessThan(Slot a, Slot b) { return orderPredicate.test(a, b); } |
| }; |
| |
| // note: We avoid object allocation by having a Slot and re-using the 'bottom'. |
| Slot bottom = null; |
| Slot scratchSlot = new Slot(); |
| boolean shardHasMoreBuckets = false; // This shard has more buckets than were returned |
| for (int slotNum = 0; slotNum < numSlots; slotNum++) { |
| |
| // screen out buckets not matching mincount |
| if (effectiveMincount > 0) { |
| int count = countAcc.getCount(slotNum); |
| if (count < effectiveMincount) { |
| if (count > 0) |
| numBuckets++; // Still increment numBuckets as long as we have some count. This is for consistency between distrib and non-distrib mode. |
| continue; |
| } |
| } |
| |
| numBuckets++; |
| |
| if (bottom != null) { |
| shardHasMoreBuckets = true; |
| scratchSlot.slot = slotNum; // scratchSlot is only used to hold this slotNum for the following line |
| if (orderPredicate.test(bottom, scratchSlot)) { |
| bottom.slot = slotNum; |
| bottom = queue.updateTop(); |
| } |
| } else if (effectiveLimit > 0) { |
| // queue not full |
| Slot s = new Slot(); |
| s.slot = slotNum; |
| queue.add(s); |
| if (queue.size() >= maxTopVals) { |
| bottom = queue.top(); |
| } |
| } |
| } |
| |
| assert queue.size() <= numBuckets; |
| |
| SimpleOrderedMap<Object> res = new SimpleOrderedMap<>(); |
| if (freq.numBuckets) { |
| if (!fcontext.isShard()) { |
| res.add("numBuckets", numBuckets); |
| } else { |
| calculateNumBuckets(res); |
| } |
| } |
| |
| FacetDebugInfo fdebug = fcontext.getDebugInfo(); |
| if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets); |
| |
| if (freq.allBuckets) { |
| SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>(); |
| // countAcc.setValues(allBuckets, allBucketsSlot); |
| allBuckets.add("count", allBucketsAcc.getSpecialCount()); |
| allBucketsAcc.setValues(allBuckets, -1); // -1 slotNum is unused for SpecialSlotAcc |
| // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) |
| res.add("allBuckets", allBuckets); |
| } |
| |
| SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); |
| if (freq.missing) { |
| res.add("missing", missingBucket); |
| // moved missing fillBucket after we fill facet since it will reset all the accumulators. |
| } |
| |
| final boolean needFilter = (!deferredAggs.isEmpty()) || freq.getSubFacets().size() > 0; |
| if (needFilter) { |
| createOtherAccs(-1, 1); |
| } |
| |
| // if we are deep paging, we don't have to order the highest "offset" counts... |
| // ...unless we need to resort. |
| int collectCount = Math.max(0, queue.size() - (null == this.resort ? off : 0)); |
| // |
| assert collectCount <= maxTopVals; |
| Slot[] sortedSlots = new Slot[collectCount]; |
| for (int i = collectCount - 1; i >= 0; i--) { |
| Slot slot = sortedSlots[i] = queue.pop(); |
| // At this point we know we're either returning this Slot as a Bucket, or resorting it, |
| // so definitely fill in the bucket value -- we'll need it either way |
| slot.bucketVal = bucketValFromSlotNumFunc.apply(slot.slot); |
| |
| if (needFilter || null != this.resort) { |
| slot.bucketFilter = makeBucketQuery(fieldQueryValFunc.apply(slot.bucketVal)); |
| } |
| } |
| |
| final SlotAcc resortAccForFill = resortSlots(sortedSlots); // No-Op if not needed |
| |
| if (null != this.resort) { |
| // now that we've completely resorted, throw away extra docs from possible offset/overrequest... |
| final int endOffset = (int)Math.min((long) sortedSlots.length, |
| // NOTE: freq.limit is long, so no risk of overflow here |
| off + (freq.limit < 0 ? Integer.MAX_VALUE : freq.limit)); |
| if (0 < off || endOffset < sortedSlots.length) { |
| sortedSlots = Arrays.copyOfRange(sortedSlots, off, endOffset); |
| } |
| } |
| @SuppressWarnings({"rawtypes"}) |
| List<SimpleOrderedMap> bucketList = new ArrayList<>(sortedSlots.length); |
| |
| for (Slot slot : sortedSlots) { |
| SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); |
| bucket.add("val", slot.bucketVal); |
| |
| fillBucketFromSlot(bucket, slot, resortAccForFill); |
| |
| bucketList.add(bucket); |
| } |
| |
| res.add("buckets", bucketList); |
| |
| |
| if (fcontext.isShard() && shardHasMoreBuckets) { |
| // Currently, "more" is an internal implementation detail and only returned for distributed sub-requests |
| res.add("more", true); |
| } |
| |
| if (freq.missing) { |
| // TODO: it would be more efficient to build up a missing DocSet if we need it here anyway. |
| fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false, null); |
| } |
| |
| return res; |
| } |
| |
| /** |
| * Trivial helper method for building up a bucket query given the (Stringified) bucket value |
| */ |
| protected Query makeBucketQuery(final String bucketValue) { |
| // TODO: this isn't viable for things like text fields w/ analyzers that are non-idempotent (ie: stemmers) |
| // TODO: but changing it to just use TermQuery isn't safe for things like numerics, dates, etc... |
| return sf.getType().getFieldQuery(null, sf, bucketValue); |
| } |
| |
| private void calculateNumBuckets(SimpleOrderedMap<Object> target) throws IOException { |
| DocSet domain = fcontext.base; |
| if (freq.prefix != null) { |
| Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix); |
| domain = fcontext.searcher.getDocSet(prefixFilter, domain); |
| } |
| |
| HLLAgg agg = new HLLAgg(freq.field); |
| SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1); |
| acc.collect(domain, 0, null); // we know HLL doesn't care about the bucket query |
| acc.key = "numBuckets"; |
| acc.setValues(target, 0); |
| } |
| |
| private static class Slot { |
| /** The Slot number used during collection */ |
| int slot; |
| |
| /** filled in only once we know the bucket will either be involved in resorting, or returned */ |
| @SuppressWarnings({"rawtypes"}) |
| Comparable bucketVal; |
| |
| /** Filled in if and only if needed for resorting, deferred stats, or subfacets */ |
| Query bucketFilter; |
| // TODO: we could potentially store the bucket's (DocSet)subDomain as well, |
| // but that's much bigger object to hang onto for every slot at the sametime |
| // Probably best to just trust the filterCache to do it's job |
| |
| /** The Slot number used during resorting */ |
| int resortSlotNum; |
| } |
| |
| /** Helper method used solely when looping over buckets to be returned in findTopSlots */ |
| private void fillBucketFromSlot(SimpleOrderedMap<Object> target, Slot slot, |
| SlotAcc resortAcc) throws IOException { |
| final int slotOrd = slot.slot; |
| countAcc.setValues(target, slotOrd); |
| if (countAcc.getCount(slotOrd) <= 0 && !freq.processEmpty) return; |
| |
| if (slotOrd >= 0 && collectAcc != null) { |
| collectAcc.setValues(target, slotOrd); |
| } |
| |
| if (otherAccs == null && freq.subFacets.isEmpty()) return; |
| |
| assert null != slot.bucketFilter; |
| final Query filter = slot.bucketFilter; |
| final DocSet subDomain = fcontext.searcher.getDocSet(filter, fcontext.base); |
| |
| // if no subFacets, we only need a DocSet |
| // otherwise we need more? |
| // TODO: save something generic like "slotNum" in the context and use that to implement things like filter exclusion if necessary? |
| // Hmmm, but we need to look up some stuff anyway (for the label?) |
| // have a method like "DocSet applyConstraint(facet context, DocSet parent)" |
| // that's needed for domain changing things like joins anyway??? |
| |
| if (otherAccs != null) { |
| // do acc at a time (traversing domain each time) or do all accs for each doc? |
| for (SlotAcc acc : otherAccs) { |
| if (acc == resortAcc) { |
| // already collected, just need to get the value from the correct slot |
| acc.setValues(target, slot.resortSlotNum); |
| } else { |
| acc.reset(); // TODO: only needed if we previously used for allBuckets or missing |
| acc.collect(subDomain, 0, s -> { return new SlotContext(filter); }); |
| acc.setValues(target, 0); |
| } |
| } |
| } |
| |
| processSubs(target, filter, subDomain, false, null); |
| } |
| |
| /** |
| * Helper method that resorts the slots (if needed). |
| * |
| * @return a SlotAcc that should be used {@link SlotAcc#setValues} on the final buckets via |
| * {@link Slot#resortSlotNum} or null if no special SlotAcc was needed (ie: no resorting, or resorting |
| * on something already known/collected) |
| */ |
| private SlotAcc resortSlots(Slot[] slots) throws IOException { |
| if (null == this.resort) { |
| return null; // Nothing to do. |
| } |
| assert ! fcontext.isShard(); |
| |
| // NOTE: getMultiplier() is confusing and weird and ment for use in PriorityQueue.lessThan, |
| // so it's backwards from what you'd expect in a Comparator... |
| final int resortMul = -1 * resort.sortDirection.getMultiplier(); |
| |
| SlotAcc resortAcc = getTrivialSortingSlotAcc(this.resort); |
| if (null != resortAcc) { |
| // resorting on count or index is rare (and not particularly useful) but if someone chooses to do |
| // either of these we don't need to re-collect ... instead just re-sort the slots based on |
| // the previously collected values using the originally collected slot numbers... |
| if (resortAcc.equals(countAcc)) { |
| final Comparator<Slot> comparator = null != indexOrderAcc ? |
| (new Comparator<Slot>() { |
| public int compare(Slot x, Slot y) { |
| final int cmp = resortMul * countAcc.compare(x.slot, y.slot); |
| return cmp != 0 ? cmp : indexOrderAcc.compare(x.slot, y.slot); |
| } |
| }) |
| : (new Comparator<Slot>() { |
| public int compare(Slot x, Slot y) { |
| final int cmp = resortMul * countAcc.compare(x.slot, y.slot); |
| return cmp != 0 ? cmp : Integer.compare(x.slot, y.slot); |
| } |
| }); |
| Arrays.sort(slots, comparator); |
| return null; |
| } |
| if (resortAcc.equals(indexOrderAcc)) { |
| // obviously indexOrderAcc is not null, and no need for a fancy tie breaker... |
| Arrays.sort(slots, new Comparator<Slot>() { |
| public int compare(Slot x, Slot y) { |
| return resortMul * indexOrderAcc.compare(x.slot, y.slot); |
| } |
| }); |
| return null; |
| } |
| // nothing else should be possible |
| assert false : "trivial resort isn't count or index: " + this.resort; |
| } |
| |
| assert null == resortAcc; |
| for (SlotAcc acc : otherAccs) { |
| if (acc.key.equals(this.resort.sortVariable)) { |
| resortAcc = acc; |
| break; |
| } |
| } |
| // TODO: what if resortAcc is still null, ie: bad input? ... throw an error? (see SOLR-13022) |
| // looks like equivilent sort code path silently ignores sorting if sortVariable isn't in accMap... |
| // ...and we get a deffered NPE when trying to collect. |
| assert null != resortAcc; |
| |
| final SlotAcc acc = resortAcc; |
| |
| // reset resortAcc to be (just) big enough for all the slots we care about... |
| acc.reset(); |
| acc.resize(new FlatteningResizer(slots.length)); |
| |
| // give each existing Slot a new resortSlotNum and let the resortAcc collect it... |
| for (int slotNum = 0; slotNum < slots.length; slotNum++) { |
| Slot slot = slots[slotNum]; |
| slot.resortSlotNum = slotNum; |
| |
| assert null != slot.bucketFilter : "null filter for slot=" +slot.bucketVal; |
| |
| final DocSet subDomain = fcontext.searcher.getDocSet(slot.bucketFilter, fcontext.base); |
| acc.collect(subDomain, slotNum, s -> { return new SlotContext(slot.bucketFilter); } ); |
| } |
| |
| // now resort all the Slots according to the new collected values... |
| final Comparator<Slot> comparator = null != indexOrderAcc ? |
| (new Comparator<Slot>() { |
| public int compare(Slot x, Slot y) { |
| final int cmp = resortMul * acc.compare(x.resortSlotNum, y.resortSlotNum); |
| return cmp != 0 ? cmp : indexOrderAcc.compare(x.slot, y.slot); |
| } |
| }) |
| : (new Comparator<Slot>() { |
| public int compare(Slot x, Slot y) { |
| final int cmp = resortMul * acc.compare(x.resortSlotNum, y.resortSlotNum); |
| return cmp != 0 ? cmp : Integer.compare(x.slot, y.slot); |
| } |
| }); |
| Arrays.sort(slots, comparator); |
| return acc; |
| } |
| |
| @Override |
| protected void processStats(SimpleOrderedMap<Object> bucket, Query bucketQ, DocSet docs, int docCount) throws IOException { |
| if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) { |
| bucket.add("count", docCount); |
| return; |
| } |
| createAccs(docCount, 1); |
| assert null != bucketQ; |
| int collected = collect(docs, 0, slotNum -> { return new SlotContext(bucketQ); }); |
| |
| // countAcc.incrementCount(0, collected); // should we set the counton the acc instead of just passing it? |
| |
| assert collected == docCount; |
| addStats(bucket, collected, 0); |
| } |
| |
| // overrides but with different signature! |
| private void addStats(SimpleOrderedMap<Object> target, int count, int slotNum) throws IOException { |
| target.add("count", count); |
| if (count > 0 || freq.processEmpty) { |
| for (SlotAcc acc : accs) { |
| acc.setValues(target, slotNum); |
| } |
| } |
| } |
| |
| @Override |
| void setNextReader(LeafReaderContext ctx) throws IOException { |
| // base class calls this (for missing bucket...) ... go over accs[] in that case |
| super.setNextReader(ctx); |
| } |
| |
| void setNextReaderFirstPhase(LeafReaderContext ctx) throws IOException { |
| if (collectAcc != null) { |
| collectAcc.setNextReader(ctx); |
| } |
| if (otherAccs != null) { |
| for (SlotAcc acc : otherAccs) { |
| acc.setNextReader(ctx); |
| } |
| } |
| } |
| |
| static class MultiAcc extends SlotAcc implements SweepableSlotAcc<SlotAcc> { |
| final SlotAcc[] subAccs; |
| |
| MultiAcc(FacetContext fcontext, SlotAcc[] subAccs) { |
| super(fcontext); |
| this.subAccs = subAccs; |
| } |
| |
| @Override |
| public void setNextReader(LeafReaderContext ctx) throws IOException { |
| for (SlotAcc acc : subAccs) { |
| acc.setNextReader(ctx); |
| } |
| } |
| |
| @Override |
| public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException { |
| for (SlotAcc acc : subAccs) { |
| acc.collect(doc, slot, slotContext); |
| } |
| } |
| |
| @Override |
| public int compare(int slotA, int slotB) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public Object getValue(int slotNum) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| for (SlotAcc acc : subAccs) { |
| acc.reset(); |
| } |
| } |
| |
| @Override |
| public void resize(Resizer resizer) { |
| for (SlotAcc acc : subAccs) { |
| acc.resize(resizer); |
| } |
| } |
| |
| @Override |
| public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException { |
| for (SlotAcc acc : subAccs) { |
| acc.setValues(bucket, slotNum); |
| } |
| } |
| |
| @Override |
| public SlotAcc registerSweepingAccs(SweepingCountSlotAcc baseSweepingAcc) { |
| final FacetFieldProcessor p = (FacetFieldProcessor) fcontext.processor; |
| int j = 0; |
| for (int i = 0; i < subAccs.length; i++) { |
| final SlotAcc acc = subAccs[i]; |
| if (acc instanceof SweepableSlotAcc) { |
| SlotAcc replacement = ((SweepableSlotAcc<?>)acc).registerSweepingAccs(baseSweepingAcc); |
| if (replacement == null) { |
| // drop acc, do not increment j |
| continue; |
| } else if (replacement != acc || j < i) { |
| subAccs[j] = replacement; |
| } |
| } else if (j < i) { |
| subAccs[j] = acc; |
| } |
| j++; |
| } |
| switch (j) { |
| case 0: |
| return null; |
| case 1: |
| return subAccs[0]; |
| default: |
| if (j == subAccs.length) { |
| return this; |
| } else { |
| // must resize final field subAccs |
| return new MultiAcc(fcontext, ArrayUtil.copyOfSubArray(subAccs, 0, j)); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Helper method that subclasses can use to indicate they with to use sweeping. |
| * If {@link #countAcc} and {@link #collectAcc} support sweeping, then this method will: |
| * <ul> |
| * <li>replace {@link #collectAcc} with it's sweeping equivalent</li> |
| * <li>update {@link #allBucketsAcc}'s reference to {@link #collectAcc} (if it exists)</li> |
| * </ul> |
| * |
| * @return true if the above actions were taken |
| * @see SweepableSlotAcc |
| * @see SweepingCountSlotAcc |
| */ |
| protected boolean registerSweepingAccIfSupportedByCollectAcc() { |
| if (countAcc instanceof SweepingCountSlotAcc && collectAcc instanceof SweepableSlotAcc) { |
| final SweepingCountSlotAcc sweepingCountAcc = (SweepingCountSlotAcc)countAcc; |
| collectAcc = ((SweepableSlotAcc<?>)collectAcc).registerSweepingAccs(sweepingCountAcc); |
| if (allBucketsAcc != null) { |
| allBucketsAcc.collectAcc = collectAcc; |
| allBucketsAcc.sweepingCountAcc = sweepingCountAcc; |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| private static final SlotContext ALL_BUCKETS_SLOT_CONTEXT = new SlotContext(null) { |
| @Override |
| public Query getSlotQuery() { |
| throw new IllegalStateException("getSlotQuery() is mutually exclusive with isAllBuckets==true"); |
| } |
| @Override |
| public boolean isAllBuckets() { |
| return true; |
| } |
| }; |
| private static final IntFunction<SlotContext> ALL_BUCKETS_SLOT_FUNCTION = new IntFunction<SlotContext>() { |
| @Override |
| public SlotContext apply(int value) { |
| return ALL_BUCKETS_SLOT_CONTEXT; |
| } |
| }; |
| |
| static class SpecialSlotAcc extends SlotAcc { |
| SlotAcc collectAcc; |
| SlotAcc[] otherAccs; |
| int collectAccSlot; |
| int otherAccsSlot; |
| long count; |
| SweepingCountSlotAcc sweepingCountAcc; // null unless/until sweeping is initialized |
| |
| SpecialSlotAcc(FacetContext fcontext, SlotAcc collectAcc, int collectAccSlot, SlotAcc[] otherAccs, int otherAccsSlot) { |
| super(fcontext); |
| this.collectAcc = collectAcc; |
| this.collectAccSlot = collectAccSlot; |
| this.otherAccs = otherAccs; |
| this.otherAccsSlot = otherAccsSlot; |
| } |
| |
| public int getCollectAccSlot() { return collectAccSlot; } |
| public int getOtherAccSlot() { return otherAccsSlot; } |
| |
| long getSpecialCount() { |
| return count; |
| } |
| |
| @Override |
| public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException { |
| assert slot != collectAccSlot || slot < 0; |
| count++; |
| if (collectAcc != null) { |
| collectAcc.collect(doc, collectAccSlot, ALL_BUCKETS_SLOT_FUNCTION); |
| } |
| if (otherAccs != null) { |
| for (SlotAcc otherAcc : otherAccs) { |
| otherAcc.collect(doc, otherAccsSlot, ALL_BUCKETS_SLOT_FUNCTION); |
| } |
| } |
| } |
| |
| @Override |
| public void setNextReader(LeafReaderContext readerContext) throws IOException { |
| // collectAcc and otherAccs will normally have setNextReader called directly on them. |
| // This, however, will be used when collect(DocSet,slot) variant is used on this Acc. |
| if (collectAcc != null) { |
| collectAcc.setNextReader(readerContext); |
| } |
| if (otherAccs != null) { |
| for (SlotAcc otherAcc : otherAccs) { |
| otherAcc.setNextReader(readerContext); |
| } |
| } |
| } |
| |
| @Override |
| public int compare(int slotA, int slotB) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public Object getValue(int slotNum) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException { |
| if (sweepingCountAcc != null) { |
| sweepingCountAcc.setSweepValues(bucket, collectAccSlot); |
| } |
| if (collectAcc != null) { |
| collectAcc.setValues(bucket, collectAccSlot); |
| } |
| if (otherAccs != null) { |
| for (SlotAcc otherAcc : otherAccs) { |
| otherAcc.setValues(bucket, otherAccsSlot); |
| } |
| } |
| } |
| |
| @Override |
| public void reset() { |
| // reset should be called on underlying accs |
| // TODO: but in case something does need to be done here, should we require this method to be called but do nothing for now? |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public void resize(Resizer resizer) { |
| // someone else will call resize on collectAcc directly |
| if (collectAccSlot >= 0) { |
| collectAccSlot = resizer.getNewSlot(collectAccSlot); |
| } |
| } |
| } |
| |
| |
| /* |
| "qfacet":{"cat2":{"_l":["A"]}}, |
| "all":{"_s":[[ |
| "all", |
| {"cat3":{"_l":["A"]}}]]}, |
| "cat1":{"_l":["A"]}}} |
| */ |
| |
| @SuppressWarnings({"unchecked"}) |
| static <T> List<T> asList(Object list) { |
| return list != null ? (List<T>)list : Collections.emptyList(); |
| } |
| |
| @SuppressWarnings({"rawtypes", "unchecked"}) |
| protected SimpleOrderedMap<Object> refineFacets() throws IOException { |
| boolean skipThisFacet = (fcontext.flags & SKIP_FACET) != 0; |
| |
| |
| List leaves = asList(fcontext.facetInfo.get("_l")); // We have not seen this bucket: do full faceting for this bucket, including all sub-facets |
| List<List> skip = asList(fcontext.facetInfo.get("_s")); // We have seen this bucket, so skip stats on it, and skip sub-facets except for the specified sub-facets that should calculate specified buckets. |
| List<List> partial = asList(fcontext.facetInfo.get("_p")); // We have not seen this bucket, do full faceting for this bucket, and most sub-facets... but some sub-facets are partial and should only visit specified buckets. |
| |
| // For leaf refinements, we do full faceting for each leaf bucket. Any sub-facets of these buckets will be fully evaluated. Because of this, we should never |
| // encounter leaf refinements that have sub-facets that return partial results. |
| |
| SimpleOrderedMap<Object> res = new SimpleOrderedMap<>(); |
| List<SimpleOrderedMap> bucketList = new ArrayList<>( leaves.size() + skip.size() + partial.size() ); |
| res.add("buckets", bucketList); |
| |
| // TODO: an alternate implementations can fill all accs at once |
| createAccs(-1, 1); |
| |
| for (Object bucketVal : leaves) { |
| bucketList.add( refineBucket(bucketVal, false, null) ); |
| } |
| |
| for (List bucketAndFacetInfo : skip) { |
| assert bucketAndFacetInfo.size() == 2; |
| Object bucketVal = bucketAndFacetInfo.get(0); |
| Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1); |
| |
| bucketList.add( refineBucket(bucketVal, true, facetInfo ) ); |
| } |
| |
| // The only difference between skip and missing is the value of "skip" passed to refineBucket |
| |
| for (List bucketAndFacetInfo : partial) { |
| assert bucketAndFacetInfo.size() == 2; |
| Object bucketVal = bucketAndFacetInfo.get(0); |
| Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1); |
| |
| bucketList.add( refineBucket(bucketVal, false, facetInfo ) ); |
| } |
| |
| if (freq.missing) { |
| Map<String,Object> bucketFacetInfo = (Map<String,Object>)fcontext.facetInfo.get("missing"); |
| |
| if (bucketFacetInfo != null || !skipThisFacet) { |
| SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); |
| fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, skipThisFacet, bucketFacetInfo); |
| res.add("missing", missingBucket); |
| } |
| } |
| |
| if (freq.numBuckets && !skipThisFacet) { |
| calculateNumBuckets(res); |
| } |
| |
| // If there are just a couple of leaves, and if the domain is large, then |
| // going by term is likely the most efficient? |
| // If the domain is small, or if the number of leaves is large, then doing |
| // the normal collection method may be best. |
| |
| return res; |
| } |
| |
| private SimpleOrderedMap<Object> refineBucket(Object bucketVal, boolean skip, Map<String,Object> facetInfo) throws IOException { |
| SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>(); |
| FieldType ft = sf.getType(); |
| bucketVal = ft.toNativeType(bucketVal); // refinement info passed in as JSON will cause int->long and float->double |
| bucket.add("val", bucketVal); |
| |
| // fieldQuery currently relies on a string input of the value... |
| String bucketStr = bucketVal instanceof Date ? ((Date)bucketVal).toInstant().toString() : bucketVal.toString(); |
| Query domainQ = ft.getFieldQuery(null, sf, bucketStr); |
| |
| fillBucket(bucket, domainQ, null, skip, facetInfo); |
| |
| return bucket; |
| } |
| |
| /** Resizes to the specified size, remapping all existing slots to slot 0 */ |
| private static final class FlatteningResizer extends SlotAcc.Resizer { |
| private final int slotCount; |
| public FlatteningResizer(int slotCount) { |
| this.slotCount = slotCount; |
| } |
| @Override |
| public int getNewSize() { |
| return slotCount; |
| } |
| |
| @Override |
| public int getNewSlot(int oldSlot) { |
| return 0; |
| } |
| } |
| } |