blob: 241a2a36d02e1136a5e1b4b25e87a154bdd31eae [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.IntFunction;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.facet.SlotAcc.SlotContext;
/** Base abstraction for a class that computes facets. This is fairly internal to the module. */
public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
SimpleOrderedMap<Object> response;
FacetContext fcontext;
FacetRequestT freq;
DocSet filter; // additional filters specified by "filter" // TODO: do these need to be on the context to support recomputing during multi-select?
LinkedHashMap<String,SlotAcc> accMap;
SlotAcc[] accs;
SlotAcc.CountSlotAcc countAcc;
FacetProcessor(FacetContext fcontext, FacetRequestT freq) {
this.fcontext = fcontext;
this.freq = freq;
fcontext.processor = this;
}
public org.apache.solr.common.MapWriter getResponse() {
return response;
}
public void process() throws IOException {
handleDomainChanges();
}
private void evalFilters() throws IOException {
if (freq.domain.filters == null || freq.domain.filters.isEmpty()) return;
this.filter = fcontext.searcher.getDocSet(evalJSONFilterQueryStruct(fcontext, freq.domain.filters));
}
private static List<Query> evalJSONFilterQueryStruct(FacetContext fcontext, List<Object> filters) throws IOException {
List<Query> qlist = new ArrayList<>(filters.size());
// TODO: prevent parsing filters each time!
for (Object rawFilter : filters) {
if (rawFilter instanceof String) {
qlist.add(parserFilter((String) rawFilter, fcontext.req));
} else if (rawFilter instanceof Map) {
@SuppressWarnings({"unchecked"})
Map<String,Object> m = (Map<String, Object>) rawFilter;
String type;
Object args;
if (m.size() == 1) {
Map.Entry<String, Object> entry = m.entrySet().iterator().next();
type = entry.getKey();
args = entry.getValue();
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't convert map to query:" + rawFilter);
}
if (!"param".equals(type)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown type. Can't convert map to query:" + rawFilter);
}
String tag;
if (!(args instanceof String)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't retrieve non-string param:" + args);
}
tag = (String)args;
String[] qstrings = fcontext.req.getParams().getParams(tag);
// idea is to support multivalued parameter ie, 0 or more values
// so, when value not specified, it is ignored rather than throwing exception
if (qstrings != null) {
for (String qstring : qstrings) {
qlist.add(parserFilter(qstring, fcontext.req));
}
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad query (expected a string):" + rawFilter);
}
}
return qlist;
}
private static Query parserFilter(String rawFilter, SolrQueryRequest req) {
QParser parser = null;
try {
parser = QParser.getParser(rawFilter, req);
parser.setIsFilter(true);
Query symbolicFilter = parser.getQuery();
if (symbolicFilter == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"QParser yields null, perhaps unresolved parameter reference in: "+rawFilter);
}
return symbolicFilter;
} catch (SyntaxError syntaxError) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
}
private void handleDomainChanges() throws IOException {
if (freq.domain == null) return;
if (null != freq.domain.explicitQueries) {
try {
final List<Query> domainQs = evalJSONFilterQueryStruct(fcontext, freq.domain.explicitQueries);
if (domainQs.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"'query' domain must not evaluate to an empty list of queries");
}
fcontext.base = fcontext.searcher.getDocSet(domainQs);
} catch (SolrException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Unable to parse domain 'query': " + freq.domain.explicitQueries +
" -- reason: " + e.getMessage(),
e);
}
} else {
// mutualy exclusive to freq.domain.explicitQueries
handleFilterExclusions();
}
// Check filters... if we do have filters they apply after domain changes.
// We still calculate them first because we can use it in a parent->child domain change.
evalFilters();
handleJoinField();
handleGraphField();
boolean appliedFilters = handleBlockJoin();
if (this.filter != null && !appliedFilters) {
fcontext.base = fcontext.base.intersection( filter );
}
}
private void handleFilterExclusions() throws IOException {
List<String> excludeTags = freq.domain.excludeTags;
if (excludeTags == null || excludeTags.size() == 0) {
return;
}
@SuppressWarnings({"rawtypes"})
Map tagMap = (Map) fcontext.req.getContext().get("tags");
if (tagMap == null) {
// no filters were tagged
return;
}
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
for (String excludeTag : excludeTags) {
Object olst = tagMap.get(excludeTag);
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
if (!(olst instanceof Collection)) continue;
for (Object o : (Collection<?>)olst) {
if (!(o instanceof QParser)) continue;
QParser qp = (QParser)o;
try {
excludeSet.put(qp.getQuery(), Boolean.TRUE);
} catch (SyntaxError syntaxError) {
// This should not happen since we should only be retrieving a previously parsed query
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
}
}
}
if (excludeSet.size() == 0) return;
List<Query> qlist = new ArrayList<>();
// TODO: somehow remove responsebuilder dependency
ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
// add the base query
if (!excludeSet.containsKey(rb.getQuery())) {
qlist.add(rb.getQuery());
}
// add the filters
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
if (!excludeSet.containsKey(q)) {
qlist.add(q);
}
}
}
// now walk back up the context tree
// TODO: we lose parent exclusions...
for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
if (curr.filter != null) {
qlist.add( curr.filter );
}
}
// recompute the base domain
fcontext.base = fcontext.searcher.getDocSet(qlist);
}
/** modifies the context base if there is a join field domain change */
private void handleJoinField() throws IOException {
if (null == freq.domain.joinField) return;
final Query domainQuery = freq.domain.joinField.createDomainQuery(fcontext);
fcontext.base = fcontext.searcher.getDocSet(domainQuery);
}
/** modifies the context base if there is a graph field domain change */
private void handleGraphField() throws IOException {
if (null == freq.domain.graphField) return;
final Query domainQuery = freq.domain.graphField.createDomainQuery(fcontext);
fcontext.base = fcontext.searcher.getDocSet(domainQuery);
}
// returns "true" if filters were applied to fcontext.base already
private boolean handleBlockJoin() throws IOException {
boolean appliedFilters = false;
if (!(freq.domain.toChildren || freq.domain.toParent)) return appliedFilters;
// TODO: avoid query parsing per-bucket somehow...
String parentStr = freq.domain.parents;
Query parentQuery;
try {
QParser parser = QParser.getParser(parentStr, fcontext.req);
parser.setIsFilter(true);
parentQuery = parser.getQuery();
} catch (SyntaxError err) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
}
BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
DocSet input = fcontext.base;
DocSet result;
if (freq.domain.toChildren) {
// If there are filters on this facet, then use them as acceptDocs when executing toChildren.
// We need to remember to not redundantly re-apply these filters after.
DocSet acceptDocs = this.filter;
if (acceptDocs == null) {
acceptDocs = fcontext.searcher.getLiveDocSet();
} else {
appliedFilters = true;
}
result = BlockJoin.toChildren(input, parents, acceptDocs, fcontext.qcontext);
} else {
result = BlockJoin.toParents(input, parents, fcontext.qcontext);
}
fcontext.base = result;
return appliedFilters;
}
protected void processStats(SimpleOrderedMap<Object> bucket, Query bucketQ, DocSet docs, int docCount) throws IOException {
if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
bucket.add("count", docCount);
return;
}
createAccs(docCount, 1);
int collected = collect(docs, 0, slotNum -> { return new SlotContext(bucketQ); });
countAcc.incrementCount(0, collected);
assert collected == docCount;
addStats(bucket, 0);
}
protected void createAccs(int docCount, int slotCount) throws IOException {
accMap = new LinkedHashMap<>();
// allow a custom count acc to be used
if (countAcc == null) {
countAcc = new SlotAcc.CountSlotArrAcc(fcontext, slotCount);
}
for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
acc.key = entry.getKey();
accMap.put(acc.key, acc);
}
accs = new SlotAcc[accMap.size()];
int i=0;
for (SlotAcc acc : accMap.values()) {
accs[i++] = acc;
}
}
// note: only called by enum/stream prior to collect
void resetStats() throws IOException {
countAcc.reset();
for (SlotAcc acc : accs) {
acc.reset();
}
}
int collect(DocSet docs, int slot, IntFunction<SlotContext> slotContext) throws IOException {
int count = 0;
SolrIndexSearcher searcher = fcontext.searcher;
if (0 == docs.size()) {
// we may be in a "processEmpty" type situation where the client still cares about this bucket
// either way, we should let our accumulators know about the empty set, so they can collect &
// compute the slot (ie: let them decide if they care even when it's size==0)
if (accs != null) {
for (SlotAcc acc : accs) {
acc.collect(docs, slot, slotContext); // NOT per-seg collectors
}
}
return count;
}
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
int segBase = 0;
int segMax;
int adjustedMax = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (doc >= adjustedMax) {
do {
ctx = ctxIt.next();
if (ctx == null) {
// should be impossible
throw new RuntimeException("INTERNAL FACET ERROR");
}
segBase = ctx.docBase;
segMax = ctx.reader().maxDoc();
adjustedMax = segBase + segMax;
} while (doc >= adjustedMax);
assert doc >= ctx.docBase;
setNextReader(ctx);
}
count++;
collect(doc - segBase, slot, slotContext); // per-seg collectors
}
return count;
}
void collect(int segDoc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
if (accs != null) {
for (SlotAcc acc : accs) {
acc.collect(segDoc, slot, slotContext);
}
}
}
void setNextReader(LeafReaderContext ctx) throws IOException {
// countAcc.setNextReader is a no-op
for (SlotAcc acc : accs) {
acc.setNextReader(ctx);
}
}
void addStats(SimpleOrderedMap<Object> target, int slotNum) throws IOException {
int count = countAcc.getCount(slotNum);
target.add("count", count);
if (count > 0 || freq.processEmpty) {
for (SlotAcc acc : accs) {
acc.setValues(target, slotNum);
}
}
}
void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result, boolean skip, Map<String,Object> facetInfo) throws IOException {
boolean needDocSet = (skip==false && freq.getFacetStats().size() > 0) || freq.getSubFacets().size() > 0;
int count;
if (result != null) {
count = result.size();
} else if (needDocSet) {
if (q == null) {
result = fcontext.base;
// result.incref(); // OFF-HEAP
} else {
result = fcontext.searcher.getDocSet(q, fcontext.base);
}
count = result.size(); // don't really need this if we are skipping, but it's free.
} else {
if (q == null) {
count = fcontext.base.size();
} else {
count = fcontext.searcher.numDocs(q, fcontext.base);
}
}
try {
if (!skip) {
processStats(bucket, q, result, count);
}
processSubs(bucket, q, result, skip, facetInfo);
} finally {
if (result != null) {
// result.decref(); // OFF-HEAP
result = null;
}
}
}
@SuppressWarnings({"unchecked"})
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain, boolean skip, Map<String,Object> facetInfo) throws IOException {
boolean emptyDomain = domain == null || domain.size() == 0;
for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
FacetRequest subRequest = sub.getValue();
// This includes a static check if a sub-facet can possibly produce something from
// an empty domain. Should this be changed to a dynamic check as well? That would
// probably require actually executing the facet anyway, and dropping it at the
// end if it was unproductive.
if (emptyDomain && !freq.processEmpty && !subRequest.canProduceFromEmpty()) {
continue;
}
Map<String,Object>facetInfoSub = null;
if (facetInfo != null) {
facetInfoSub = (Map<String,Object>)facetInfo.get(sub.getKey());
}
// If we're skipping this node, then we only need to process sub-facets that have facet info specified.
if (skip && facetInfoSub == null) continue;
// make a new context for each sub-facet since they can change the domain
FacetContext subContext = fcontext.sub(filter, domain);
subContext.facetInfo = facetInfoSub;
if (!skip) subContext.flags &= ~FacetContext.SKIP_FACET; // turn off the skip flag if we're not skipping this bucket
if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true
FacetDebugInfo fdebug = new FacetDebugInfo();
subContext.setDebugInfo(fdebug);
fcontext.getDebugInfo().addChild(fdebug);
}
Object result = subRequest.process(subContext);
response.add( sub.getKey(), result);
}
}
@SuppressWarnings("unused")
static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
SchemaField sf = searcher.getSchema().getField(fieldName);
DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
DocSet answer = docs.andNot(hasVal);
// hasVal.decref(); // OFF-HEAP
return answer;
}
static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
SchemaField sf = searcher.getSchema().getField(fieldName);
Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
BooleanQuery.Builder noVal = new BooleanQuery.Builder();
noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
return noVal.build();
}
}