blob: bbaadd4861e5728abdf1b29e97ff981a6b676ae2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.ProductFloatFunction;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.parser.SolrQueryParserBase.MagicFieldName;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.ExtendedDismaxQParser.ExtendedSolrQueryParser.Alias;
import org.apache.solr.util.SolrPluginUtils;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
/**
* Query parser that generates DisjunctionMaxQueries based on user configuration.
* See Wiki page http://wiki.apache.org/solr/ExtendedDisMax
*/
public class ExtendedDismaxQParser extends QParser {
/**
* A field we can't ever find in any schema, so we can safely tell
* DisjunctionMaxQueryParser to use it as our defaultField, and
* map aliases from it to any field in our schema.
*/
private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
/** shorten the class references for utilities */
private static class U extends SolrPluginUtils {
/* :NOOP */
}
/** shorten the class references for utilities */
private static interface DMP extends DisMaxParams {
/**
* User fields. The fields that can be used by the end user to create field-specific queries.
*/
public static String UF = "uf";
/**
* Lowercase Operators. If set to true, 'or' and 'and' will be considered OR and AND, otherwise
* lowercase operators will be considered terms to search for.
*/
public static String LOWERCASE_OPS = "lowercaseOperators";
/**
* Multiplicative boost. Boost functions which scores are going to be multiplied to the score
* of the main query (instead of just added, like with bf)
*/
public static String MULT_BOOST = "boost";
/**
* If set to true, stopwords are removed from the query.
*/
public static String STOPWORDS = "stopwords";
}
private ExtendedDismaxConfiguration config;
private Query parsedUserQuery;
private Query altUserQuery;
private List<Query> boostQueries;
private boolean parsed = false;
public ExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
config = this.createConfiguration(qstr,localParams,params,req);
}
@Override
public Query parse() throws SyntaxError {
parsed = true;
/* the main query we will execute. we disable the coord because
* this query is an artificial construct
*/
BooleanQuery.Builder query = new BooleanQuery.Builder();
/* * * Main User Query * * */
parsedUserQuery = null;
String userQuery = getString();
altUserQuery = null;
if (StringUtils.isBlank(userQuery)) {
// If no query is specified, we may have an alternate
if (config.altQ != null) {
QParser altQParser = subQuery(config.altQ, null);
altUserQuery = altQParser.getQuery();
query.add( altUserQuery , BooleanClause.Occur.MUST );
} else {
return null;
// throw new SyntaxError("missing query string" );
}
} else {
// There is a valid query string
ExtendedSolrQueryParser up = createEdismaxQueryParser(this, IMPOSSIBLE_FIELD_NAME);
up.addAlias(IMPOSSIBLE_FIELD_NAME, config.tiebreaker, config.queryFields);
addAliasesFromRequest(up, config.tiebreaker);
validateQueryFields(up);
up.setPhraseSlop(config.qslop); // slop for explicit user phrase queries
up.setAllowLeadingWildcard(true);
up.setAllowSubQueryParsing(config.userFields.isAllowed(MagicFieldName.QUERY.field));
// defer escaping and only do if lucene parsing fails, or we need phrases
// parsing fails. Need to sloppy phrase queries anyway though.
List<Clause> clauses = splitIntoClauses(userQuery, false);
// Always rebuild mainUserQuery from clauses to catch modifications from splitIntoClauses
// This was necessary for userFields modifications to get propagated into the query.
// Convert lower or mixed case operators to uppercase if we saw them.
// only do this for the lucene query part and not for phrase query boosting
// since some fields might not be case insensitive.
// We don't use a regex for this because it might change and AND or OR in
// a phrase query in a case sensitive field.
String mainUserQuery = rebuildUserQuery(clauses, config.lowercaseOperators);
// but always for unstructured implicit bqs created by getFieldQuery
up.minShouldMatch = config.minShouldMatch;
up.setSplitOnWhitespace(config.splitOnWhitespace);
parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config);
if (parsedUserQuery == null) {
parsedUserQuery = parseEscapedQuery(up, escapeUserQuery(clauses), config);
}
query.add(parsedUserQuery, BooleanClause.Occur.MUST);
addPhraseFieldQueries(query, clauses, config);
}
/* * * Boosting Query * * */
boostQueries = getBoostQueries();
for(Query f : boostQueries) {
query.add(f, BooleanClause.Occur.SHOULD);
}
/* * * Boosting Functions * * */
List<Query> boostFunctions = getBoostFunctions();
for(Query f : boostFunctions) {
query.add(f, BooleanClause.Occur.SHOULD);
}
//
// create a boosted query (scores multiplied by boosts)
//
Query topQuery = QueryUtils.build(query, this);
List<ValueSource> boosts = getMultiplicativeBoosts();
if (boosts.size()>1) {
ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()]));
topQuery = FunctionScoreQuery.boostByValue(topQuery, prod.asDoubleValuesSource());
} else if (boosts.size() == 1) {
topQuery = FunctionScoreQuery.boostByValue(topQuery, boosts.get(0).asDoubleValuesSource());
}
return topQuery;
}
/**
* Validate query field names. Must be explicitly defined in the schema or match a dynamic field pattern.
* Checks source field(s) represented by a field alias
*
* @param up parser used
* @throws SyntaxError for invalid field name
*/
protected void validateQueryFields(ExtendedSolrQueryParser up) throws SyntaxError {
List<String> flds = new ArrayList<>(config.queryFields.keySet().size());
for (String fieldName : config.queryFields.keySet()) {
buildQueryFieldList(fieldName, up.getAlias(fieldName), flds, up);
}
checkFieldsInSchema(flds);
}
/**
* Build list of source (non-alias) query field names. Recursive through aliases.
*
* @param fieldName query field name
* @param alias field alias
* @param flds list of query field names
* @param up parser used
* @throws SyntaxError for invalid field name
*/
private void buildQueryFieldList(String fieldName, Alias alias, List<String> flds, ExtendedSolrQueryParser up) throws SyntaxError {
if (null == alias) {
flds.add(fieldName);
return;
}
up.validateCyclicAliasing(fieldName);
flds.addAll(getFieldsFromAlias(up, alias));
}
/**
* Return list of source (non-alias) field names from an alias
*
* @param up parser used
* @param a field alias
* @return list of source fields
* @throws SyntaxError for invalid field name
*/
private List<String> getFieldsFromAlias(ExtendedSolrQueryParser up, Alias a) throws SyntaxError {
List<String> lst = new ArrayList<>();
for (String s : a.fields.keySet()) {
buildQueryFieldList(s, up.getAlias(s), lst, up);
}
return lst;
}
/**
* Verify field name exists in schema, explicit or dynamic field pattern
*
* @param fieldName source field name to verify
* @throws SyntaxError for invalid field name
*/
private void checkFieldInSchema(String fieldName) throws SyntaxError {
try {
config.schema.getField(fieldName);
} catch (SolrException se) {
throw new SyntaxError("Query Field '" + fieldName + "' is not a valid field name", se);
}
}
/**
* Verify list of source field names
*
* @param flds list of source field names to verify
* @throws SyntaxError for invalid field name
*/
private void checkFieldsInSchema(List<String> flds) throws SyntaxError {
for (String fieldName : flds) {
checkFieldInSchema(fieldName);
}
}
/**
* Adds shingled phrase queries to all the fields specified in the pf, pf2 anf pf3 parameters
*
*/
protected void addPhraseFieldQueries(BooleanQuery.Builder query, List<Clause> clauses,
ExtendedDismaxConfiguration config) throws SyntaxError {
// sloppy phrase queries for proximity
List<FieldParams> allPhraseFields = config.getAllPhraseFields();
if (allPhraseFields.size() > 0) {
// find non-field clauses
List<Clause> normalClauses = new ArrayList<>(clauses.size());
for (Clause clause : clauses) {
if (clause.field != null || clause.isPhrase) continue;
// check for keywords "AND,OR,TO"
if (clause.isBareWord()) {
String s = clause.val;
// avoid putting explicit operators in the phrase query
if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue;
}
normalClauses.add(clause);
}
// create a map of {wordGram, [phraseField]}
Multimap<Integer, FieldParams> phraseFieldsByWordGram = Multimaps.index(allPhraseFields, FieldParams::getWordGrams);
// for each {wordGram, [phraseField]} entry, create and add shingled field queries to the main user query
for (Map.Entry<Integer, Collection<FieldParams>> phraseFieldsByWordGramEntry : phraseFieldsByWordGram.asMap().entrySet()) {
// group the fields within this wordGram collection by their associated slop (it's possible that the same
// field appears multiple times for the same wordGram count but with different slop values. In this case, we
// should take the *sum* of those phrase queries, rather than the max across them).
Multimap<Integer, FieldParams> phraseFieldsBySlop = Multimaps.index(phraseFieldsByWordGramEntry.getValue(), FieldParams::getSlop);
for (Map.Entry<Integer, Collection<FieldParams>> phraseFieldsBySlopEntry : phraseFieldsBySlop.asMap().entrySet()) {
addShingledPhraseQueries(query, normalClauses, phraseFieldsBySlopEntry.getValue(),
phraseFieldsByWordGramEntry.getKey(), config.tiebreaker, phraseFieldsBySlopEntry.getKey());
}
}
}
}
/**
* Creates an instance of ExtendedDismaxConfiguration. It will contain all
* the necessary parameters to parse the query
*/
protected ExtendedDismaxConfiguration createConfiguration(String qstr,
SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new ExtendedDismaxConfiguration(localParams,params,req);
}
/**
* Creates an instance of ExtendedSolrQueryParser, the query parser that's going to be used
* to parse the query.
*/
protected ExtendedSolrQueryParser createEdismaxQueryParser(QParser qParser, String field) {
return new ExtendedSolrQueryParser(qParser, field);
}
/**
* Parses an escaped version of the user's query. This method is called
* in the event that the original query encounters exceptions during parsing.
*
* @param up parser used
* @param escapedUserQuery query that is parsed, should already be escaped so that no trivial parse errors are encountered
* @param config Configuration options for this parse request
* @return the resulting query (flattened if needed) with "min should match" rules applied as specified in the config.
* @see #parseOriginalQuery
* @see SolrPluginUtils#flattenBooleanQuery
*/
protected Query parseEscapedQuery(ExtendedSolrQueryParser up,
String escapedUserQuery, ExtendedDismaxConfiguration config) throws SyntaxError {
Query query = up.parse(escapedUserQuery);
if (query instanceof BooleanQuery) {
BooleanQuery.Builder t = new BooleanQuery.Builder();
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)query);
SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax);
query = QueryUtils.build(t, this);
}
return query;
}
/**
* Parses the user's original query. This method attempts to cleanly parse the specified query string using the specified parser, any Exceptions are ignored resulting in null being returned.
*
* @param up parser used
* @param mainUserQuery query string that is parsed
* @param clauses used to dictate "min should match" logic
* @param config Configuration options for this parse request
* @return the resulting query with "min should match" rules applied as specified in the config.
* @see #parseEscapedQuery
*/
protected Query parseOriginalQuery(ExtendedSolrQueryParser up,
String mainUserQuery, List<Clause> clauses, ExtendedDismaxConfiguration config) {
Query query = null;
try {
up.setRemoveStopFilter(!config.stopwords);
up.exceptions = true;
query = up.parse(mainUserQuery);
if (shouldRemoveStopFilter(config, query)) {
// if the query was all stop words, remove none of them
up.setRemoveStopFilter(true);
query = up.parse(mainUserQuery);
}
} catch (Exception e) {
// ignore failure and reparse later after escaping reserved chars
up.exceptions = false;
}
if(query == null) {
return null;
}
// For correct lucene queries, turn off mm processing if no explicit mm spec was provided
// and there were explicit operators (except for AND).
if (query instanceof BooleanQuery) {
// config.minShouldMatch holds the value of mm which MIGHT have come from the user,
// but could also have been derived from q.op.
String mmSpec = config.minShouldMatch;
if (foundOperators(clauses, config.lowercaseOperators)) {
mmSpec = config.solrParams.get(DisMaxParams.MM, "0%"); // Use provided mm spec if present, otherwise turn off mm processing
}
query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, mmSpec, config.mmAutoRelax);
}
return query;
}
/**
* Determines if query should be re-parsed removing the stop filter.
* @return true if there are stopwords configured and the parsed query was empty
* false in any other case.
*/
protected boolean shouldRemoveStopFilter(ExtendedDismaxConfiguration config,
Query query) {
return config.stopwords && isEmpty(query);
}
private String escapeUserQuery(List<Clause> clauses) {
StringBuilder sb = new StringBuilder();
for (Clause clause : clauses) {
boolean doQuote = clause.isPhrase;
String s=clause.val;
if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) {
doQuote=true;
}
if (clause.must != 0) {
sb.append(clause.must);
}
if (clause.field != null) {
sb.append(clause.field);
sb.append(':');
}
if (doQuote) {
sb.append('"');
}
sb.append(clause.val);
if (doQuote) {
sb.append('"');
}
if (clause.field != null) {
// Add the default user field boost, if any
Float boost = config.userFields.getBoost(clause.field);
if(boost != null)
sb.append("^").append(boost);
}
sb.append(' ');
}
return sb.toString();
}
/**
* Returns true if at least one of the clauses is/has an explicit operator (except for AND)
*/
private boolean foundOperators(List<Clause> clauses, boolean lowercaseOperators) {
for (Clause clause : clauses) {
if (clause.must == '+') return true;
if (clause.must == '-') return true;
if (clause.isBareWord()) {
String s = clause.val;
if ("OR".equals(s)) {
return true;
} else if ("NOT".equals(s)) {
return true;
} else if (lowercaseOperators && "or".equals(s)) {
return true;
}
}
}
return false;
}
/**
* Generates a query string from the raw clauses, uppercasing
* 'and' and 'or' as needed.
* @param clauses the clauses of the query string to be rebuilt
* @param lowercaseOperators if true, lowercase 'and' and 'or' clauses will
* be recognized as operators and uppercased in the final query string.
* @return the generated query string.
*/
protected String rebuildUserQuery(List<Clause> clauses, boolean lowercaseOperators) {
StringBuilder sb = new StringBuilder();
for (int i=0; i<clauses.size(); i++) {
Clause clause = clauses.get(i);
String s = clause.raw;
// and and or won't be operators at the start or end
if (lowercaseOperators && i>0 && i+1<clauses.size()) {
if ("AND".equalsIgnoreCase(s)) {
s="AND";
} else if ("OR".equalsIgnoreCase(s)) {
s="OR";
}
}
sb.append(s);
sb.append(' ');
}
return sb.toString();
}
/**
* Parses all multiplicative boosts
*/
protected List<ValueSource> getMultiplicativeBoosts() throws SyntaxError {
List<ValueSource> boosts = new ArrayList<>();
if (config.hasMultiplicativeBoosts()) {
for (String boostStr : config.multBoosts) {
if (boostStr==null || boostStr.length()==0) continue;
Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery();
ValueSource vs;
if (boost instanceof FunctionQuery) {
vs = ((FunctionQuery)boost).getValueSource();
} else {
vs = new QueryValueSource(boost, 1.0f);
}
boosts.add(vs);
}
}
return boosts;
}
/**
* Parses all function queries
*/
protected List<Query> getBoostFunctions() throws SyntaxError {
List<Query> boostFunctions = new LinkedList<>();
if (config.hasBoostFunctions()) {
for (String boostFunc : config.boostFuncs) {
if(null == boostFunc || "".equals(boostFunc)) continue;
Map<String,Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc);
for (Map.Entry<String, Float> entry : ff.entrySet()) {
Query fq = subQuery(entry.getKey(), FunctionQParserPlugin.NAME).getQuery();
Float b = entry.getValue();
if (null != b && b.floatValue() != 1f) {
fq = new BoostQuery(fq, b);
}
boostFunctions.add(fq);
}
}
}
return boostFunctions;
}
/**
* Parses all boost queries
*/
protected List<Query> getBoostQueries() throws SyntaxError {
List<Query> boostQueries = new LinkedList<>();
if (config.hasBoostParams()) {
for (String qs : config.boostParams) {
if (qs.trim().length()==0) continue;
Query q = subQuery(qs, null).getQuery();
boostQueries.add(q);
}
}
return boostQueries;
}
/**
* Extracts all the aliased fields from the requests and adds them to up
*/
private void addAliasesFromRequest(ExtendedSolrQueryParser up, float tiebreaker) {
Iterator<String> it = config.solrParams.getParameterNamesIterator();
while(it.hasNext()) {
String param = it.next();
if(param.startsWith("f.") && param.endsWith(".qf")) {
// Add the alias
String fname = param.substring(2,param.length()-3);
String qfReplacement = config.solrParams.get(param);
Map<String,Float> parsedQf = SolrPluginUtils.parseFieldBoosts(qfReplacement);
if(parsedQf.size() == 0)
return;
up.addAlias(fname, tiebreaker, parsedQf);
}
}
}
/**
* Modifies the main query by adding a new optional Query consisting
* of shingled phrase queries across the specified clauses using the
* specified field =&gt; boost mappings.
*
* @param mainQuery Where the phrase boosting queries will be added
* @param clauses Clauses that will be used to construct the phrases
* @param fields Field =&gt; boost mappings for the phrase queries
* @param shingleSize how big the phrases should be, 0 means a single phrase
* @param tiebreaker tie breaker value for the DisjunctionMaxQueries
*/
protected void addShingledPhraseQueries(final BooleanQuery.Builder mainQuery,
final List<Clause> clauses,
final Collection<FieldParams> fields,
int shingleSize,
final float tiebreaker,
final int slop)
throws SyntaxError {
if (null == fields || fields.isEmpty() ||
null == clauses || clauses.size() < shingleSize )
return;
if (0 == shingleSize) shingleSize = clauses.size();
final int lastClauseIndex = shingleSize-1;
StringBuilder userPhraseQuery = new StringBuilder();
for (int i=0; i < clauses.size() - lastClauseIndex; i++) {
userPhraseQuery.append('"');
for (int j=0; j <= lastClauseIndex; j++) {
userPhraseQuery.append(clauses.get(i + j).val);
userPhraseQuery.append(' ');
}
userPhraseQuery.append('"');
userPhraseQuery.append(' ');
}
/* for parsing sloppy phrases using DisjunctionMaxQueries */
ExtendedSolrQueryParser pp = createEdismaxQueryParser(this, IMPOSSIBLE_FIELD_NAME);
pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields));
pp.setPhraseSlop(slop);
pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
pp.setSplitOnWhitespace(config.splitOnWhitespace);
/* :TODO: reevaluate using makeDismax=true vs false...
*
* The DismaxQueryParser always used DisjunctionMaxQueries for the
* pf boost, for the same reasons it used them for the qf fields.
* When Yonik first wrote the ExtendedDismaxQParserPlugin, he added
* the "makeDismax=false" property to use BooleanQueries instead, but
* when asked why his response was "I honestly don't recall" ...
*
* https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813
*
* so for now, we continue to use dismax style queries because it
* seems the most logical and is back compatible, but we should
* try to figure out what Yonik was thinking at the time (because he
* rarely does things for no reason)
*/
pp.makeDismax = true;
// minClauseSize is independent of the shingleSize because of stop words
// (if they are removed from the middle, so be it, but we need at least
// two or there shouldn't be a boost)
pp.minClauseSize = 2;
// TODO: perhaps we shouldn't use synonyms either...
Query phrase = pp.parse(userPhraseQuery.toString());
if (phrase != null) {
mainQuery.add(phrase, BooleanClause.Occur.SHOULD);
}
}
/**
* @return a {fieldName, fieldBoost} map for the given fields.
*/
private Map<String, Float> getFieldBoosts(Collection<FieldParams> fields) {
Map<String, Float> fieldBoostMap = new LinkedHashMap<>(fields.size());
for (FieldParams field : fields) {
fieldBoostMap.put(field.getField(), field.getBoost());
}
return fieldBoostMap;
}
@Override
public String[] getDefaultHighlightFields() {
return config.queryFields.keySet().toArray(new String[0]);
}
@Override
public Query getHighlightQuery() throws SyntaxError {
if (!parsed)
parse();
return parsedUserQuery == null ? altUserQuery : parsedUserQuery;
}
@Override
public void addDebugInfo(NamedList<Object> debugInfo) {
super.addDebugInfo(debugInfo);
debugInfo.add("altquerystring", altUserQuery);
if (null != boostQueries) {
debugInfo.add("boost_queries", config.boostParams);
debugInfo.add("parsed_boost_queries",
QueryParsing.toString(boostQueries, getReq().getSchema()));
}
debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF));
}
protected static class Clause {
boolean isBareWord() {
return must==0 && !isPhrase;
}
protected String field;
protected String rawField; // if the clause is +(foo:bar) then rawField=(foo
protected boolean isPhrase;
protected boolean hasWhitespace;
protected boolean hasSpecialSyntax;
protected boolean syntaxError;
protected char must; // + or -
protected String val; // the field value (minus the field name, +/-, quotes)
protected String raw; // the raw clause w/o leading/trailing whitespace
}
public List<Clause> splitIntoClauses(String s, boolean ignoreQuote) {
ArrayList<Clause> lst = new ArrayList<>(4);
Clause clause;
int pos=0;
int end=s.length();
char ch=0;
int start;
boolean disallowUserField;
while (pos < end) {
clause = new Clause();
disallowUserField = true;
ch = s.charAt(pos);
while (Character.isWhitespace(ch)) {
if (++pos >= end) break;
ch = s.charAt(pos);
}
start = pos;
if ((ch=='+' || ch=='-') && (pos+1)<end) {
clause.must = ch;
pos++;
}
clause.field = getFieldName(s, pos, end);
if(clause.field != null && !config.userFields.isAllowed(clause.field)) {
clause.field = null;
}
if (clause.field != null) {
disallowUserField = false;
int colon = s.indexOf(':',pos);
clause.rawField = s.substring(pos, colon);
pos += colon - pos; // skip the field name
pos++; // skip the ':'
}
if (pos>=end) break;
char inString=0;
ch = s.charAt(pos);
if (!ignoreQuote && ch=='"') {
clause.isPhrase = true;
inString = '"';
pos++;
}
StringBuilder sb = new StringBuilder();
while (pos < end) {
ch = s.charAt(pos++);
if (ch=='\\') { // skip escaped chars, but leave escaped
sb.append(ch);
if (pos >= end) {
sb.append(ch); // double backslash if we are at the end of the string
break;
}
ch = s.charAt(pos++);
sb.append(ch);
continue;
} else if (inString != 0 && ch == inString) {
inString=0;
break;
} else if (Character.isWhitespace(ch)) {
clause.hasWhitespace=true;
if (inString == 0) {
// end of the token if we aren't in a string, backing
// up the position.
pos--;
break;
}
}
if (inString == 0) {
if (!ignoreQuote && ch == '"') {
// end of the token if we aren't in a string, backing
// up the position.
pos--;
break;
}
switch (ch) {
case '!':
case '(':
case ')':
case ':':
case '^':
case '[':
case ']':
case '{':
case '}':
case '~':
case '*':
case '?':
case '"':
case '+':
case '-':
case '\\':
case '|':
case '&':
case '/':
clause.hasSpecialSyntax = true;
sb.append('\\');
}
} else if (ch=='"') {
// only char we need to escape in a string is double quote
sb.append('\\');
}
sb.append(ch);
}
clause.val = sb.toString();
if (clause.isPhrase) {
if (inString != 0) {
// detected bad quote balancing... retry
// parsing with quotes like any other char
return splitIntoClauses(s, true);
}
// special syntax in a string isn't special
clause.hasSpecialSyntax = false;
} else {
// an empty clause... must be just a + or - on its own
if (clause.val.length() == 0) {
clause.syntaxError = true;
if (clause.must != 0) {
clause.val="\\"+clause.must;
clause.must = 0;
clause.hasSpecialSyntax = true;
} else {
// uh.. this shouldn't happen.
clause=null;
}
}
}
if (clause != null) {
if(disallowUserField) {
clause.raw = s.substring(start, pos);
// escape colons, except for "match all" query
if(!"*:*".equals(clause.raw)) {
clause.raw = clause.raw.replaceAll("([^\\\\]):", "$1\\\\:");
}
} else {
clause.raw = s.substring(start, pos);
// Add default userField boost if no explicit boost exists
if(config.userFields.isAllowed(clause.field) && !clause.raw.contains("^")) {
Float boost = config.userFields.getBoost(clause.field);
if(boost != null)
clause.raw += "^" + boost;
}
}
lst.add(clause);
}
}
return lst;
}
/**
* returns a field name or legal field alias from the current
* position of the string
*/
public String getFieldName(String s, int pos, int end) {
if (pos >= end) return null;
int p=pos;
int colon = s.indexOf(':',pos);
// make sure there is space after the colon, but not whitespace
if (colon<=pos || colon+1>=end || Character.isWhitespace(s.charAt(colon+1))) return null;
char ch = s.charAt(p++);
while ((ch=='(' || ch=='+' || ch=='-') && (pos<end)) {
ch = s.charAt(p++);
pos++;
}
if (!Character.isJavaIdentifierPart(ch)) return null;
while (p<colon) {
ch = s.charAt(p++);
if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) return null;
}
String fname = s.substring(pos, p);
boolean isInSchema = getReq().getSchema().getFieldTypeNoEx(fname) != null;
boolean isAlias = config.solrParams.get("f."+fname+".qf") != null;
boolean isMagic = (null != MagicFieldName.get(fname));
return (isInSchema || isAlias || isMagic) ? fname : null;
}
public static List<String> split(String s, boolean ignoreQuote) {
ArrayList<String> lst = new ArrayList<>(4);
int pos=0, start=0, end=s.length();
char inString=0;
char ch=0;
while (pos < end) {
char prevChar=ch;
ch = s.charAt(pos++);
if (ch=='\\') { // skip escaped chars
pos++;
} else if (inString != 0 && ch==inString) {
inString=0;
} else if (!ignoreQuote && ch=='"') {
// If char is directly preceeded by a number or letter
// then don't treat it as the start of a string.
if (!Character.isLetterOrDigit(prevChar)) {
inString=ch;
}
} else if (Character.isWhitespace(ch) && inString==0) {
lst.add(s.substring(start,pos-1));
start=pos;
}
}
if (start < end) {
lst.add(s.substring(start,end));
}
if (inString != 0) {
// unbalanced quote... ignore them
return split(s, true);
}
return lst;
}
enum QType {
FIELD,
PHRASE,
PREFIX,
WILDCARD,
FUZZY,
RANGE
}
static final RuntimeException unknownField = new RuntimeException("UnknownField");
static {
unknownField.fillInStackTrace();
}
/**
* A subclass of SolrQueryParser that supports aliasing fields for
* constructing DisjunctionMaxQueries.
*/
public static class ExtendedSolrQueryParser extends SolrQueryParser {
/** A simple container for storing alias info
*/
protected static class Alias {
public float tie;
public Map<String,Float> fields;
}
boolean makeDismax=true;
boolean allowWildcard=true;
int minClauseSize = 0; // minimum number of clauses per phrase query...
// used when constructing boosting part of query via sloppy phrases
boolean exceptions; // allow exceptions to be thrown (for example on a missing field)
private Map<String, Analyzer> nonStopFilterAnalyzerPerField;
private boolean removeStopFilter;
String minShouldMatch; // for inner boolean queries produced from a single fieldQuery
/**
* Where we store a map from field name we expect to see in our query
* string, to Alias object containing the fields to use in our
* DisjunctionMaxQuery and the tiebreaker to use.
*/
protected Map<String,Alias> aliases = new HashMap<>(3);
private QType type;
private String field;
private String val;
private String val2;
private List<String> vals;
private boolean bool;
private boolean bool2;
private float flt;
private int slop;
public ExtendedSolrQueryParser(QParser parser, String defaultField) {
super(parser, defaultField);
// Respect the q.op parameter before mm will be applied later
SolrParams defaultParams = SolrParams.wrapDefaults(parser.getLocalParams(), parser.getParams());
QueryParser.Operator defaultOp = QueryParsing.parseOP(defaultParams.get(QueryParsing.OP));
setDefaultOperator(defaultOp);
}
public void setRemoveStopFilter(boolean remove) {
removeStopFilter = remove;
}
@Override
protected Query getBooleanQuery(List<BooleanClause> clauses) throws SyntaxError {
Query q = super.getBooleanQuery(clauses);
if (q != null) {
q = QueryUtils.makeQueryable(q);
}
return q;
}
/**
* Add an alias to this query parser.
*
* @param field the field name that should trigger alias mapping
* @param fieldBoosts the mapping from fieldname to boost value that
* should be used to build up the clauses of the
* DisjunctionMaxQuery.
* @param tiebreaker to the tiebreaker to be used in the
* DisjunctionMaxQuery
* @see SolrPluginUtils#parseFieldBoosts
*/
public void addAlias(String field, float tiebreaker,
Map<String,Float> fieldBoosts) {
Alias a = new Alias();
a.tie = tiebreaker;
a.fields = fieldBoosts;
aliases.put(field, a);
}
/**
* Returns the aliases found for a field.
* Returns null if there are no aliases for the field
* @return Alias
*/
protected Alias getAlias(String field) {
return aliases.get(field);
}
@Override
protected Query getFieldQuery(String field, String val, boolean quoted, boolean raw) throws SyntaxError {
this.type = quoted ? QType.PHRASE : QType.FIELD;
this.field = field;
this.val = val;
this.vals = null;
this.slop = getPhraseSlop(); // unspecified
return getAliasedQuery();
}
@Override
protected Query getFieldQuery(String field, String val, int slop) throws SyntaxError {
this.type = QType.PHRASE;
this.field = field;
this.val = val;
this.vals = null;
this.slop = slop;
return getAliasedQuery();
}
@Override
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
this.type = QType.FIELD;
this.field = field;
this.val = null;
this.vals = queryTerms;
this.slop = getPhraseSlop();
return getAliasedMultiTermQuery();
}
@Override
protected Query getPrefixQuery(String field, String val) throws SyntaxError {
if (val.equals("") && field.equals("*")) {
return new MatchAllDocsQuery();
}
this.type = QType.PREFIX;
this.field = field;
this.val = val;
this.vals = null;
return getAliasedQuery();
}
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries,
SynonymQueryStyle synonymQueryStyle)
throws SyntaxError {
Analyzer actualAnalyzer;
if (removeStopFilter) {
if (nonStopFilterAnalyzerPerField == null) {
nonStopFilterAnalyzerPerField = new HashMap<>();
}
actualAnalyzer = nonStopFilterAnalyzerPerField.get(field);
if (actualAnalyzer == null) {
actualAnalyzer = noStopwordFilterAnalyzer(field);
}
} else {
actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer();
}
return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, enableGraphQueries, synonymQueryStyle);
}
@Override
protected Query getRangeQuery(String field, String a, String b, boolean startInclusive, boolean endInclusive) throws SyntaxError {
this.type = QType.RANGE;
this.field = field;
this.val = a;
this.val2 = b;
this.vals = null;
this.bool = startInclusive;
this.bool2 = endInclusive;
return getAliasedQuery();
}
@Override
protected Query getWildcardQuery(String field, String val) throws SyntaxError {
if (val.equals("*")) {
if (field.equals("*") || getExplicitField() == null) {
return new MatchAllDocsQuery();
} else{
return getPrefixQuery(field,"");
}
}
this.type = QType.WILDCARD;
this.field = field;
this.val = val;
this.vals = null;
return getAliasedQuery();
}
@Override
protected Query getFuzzyQuery(String field, String val, float minSimilarity) throws SyntaxError {
this.type = QType.FUZZY;
this.field = field;
this.val = val;
this.vals = null;
this.flt = minSimilarity;
return getAliasedQuery();
}
/**
* Delegates to the super class unless the field has been specified
* as an alias -- in which case we recurse on each of
* the aliased fields, and the results are composed into a
* DisjunctionMaxQuery. (so yes: aliases which point at other
* aliases should work)
*/
protected Query getAliasedQuery() throws SyntaxError {
Alias a = aliases.get(field);
this.validateCyclicAliasing(field);
if (a != null) {
List<Query> lst = getQueries(a);
if (lst == null || lst.size()==0)
return getQuery();
// make a DisjunctionMaxQuery in this case too... it will stop
// the "mm" processing from making everything required in the case
// that the query expanded to multiple clauses.
// DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
// if (lst.size()==1) return lst.get(0);
if (makeDismax) {
DisjunctionMaxQuery q = new DisjunctionMaxQuery(lst, a.tie);
return q;
} else {
BooleanQuery.Builder q = new BooleanQuery.Builder();
for (Query sub : lst) {
q.add(sub, BooleanClause.Occur.SHOULD);
}
return QueryUtils.build(q, parser);
}
} else {
// verify that a fielded query is actually on a field that exists... if not,
// then throw an exception to get us out of here, and we'll treat it like a
// literal when we try the escape+re-parse.
if (exceptions) {
FieldType ft = schema.getFieldTypeNoEx(field);
if (ft == null && null == MagicFieldName.get(field)) {
throw unknownField;
}
}
return getQuery();
}
}
/**
* Delegates to the super class unless the field has been specified
* as an alias -- in which case we recurse on each of
* the aliased fields, and the results are composed into a
* DisjunctionMaxQuery. (so yes: aliases which point at other
* aliases should work)
*/
protected Query getAliasedMultiTermQuery() throws SyntaxError {
Alias a = aliases.get(field);
this.validateCyclicAliasing(field);
if (a != null) {
List<Query> lst = getMultiTermQueries(a);
if (lst == null || lst.size() == 0) {
return getQuery();
}
// make a DisjunctionMaxQuery in this case too... it will stop
// the "mm" processing from making everything required in the case
// that the query expanded to multiple clauses.
// DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
// if (lst.size()==1) return lst.get(0);
if (makeDismax) {
Query firstQuery = lst.get(0);
if ((firstQuery instanceof BooleanQuery
|| (firstQuery instanceof BoostQuery && ((BoostQuery)firstQuery).getQuery() instanceof BooleanQuery))
&& allSameQueryStructure(lst)) {
BooleanQuery.Builder q = new BooleanQuery.Builder();
List<Query> subs = new ArrayList<>(lst.size());
BooleanQuery firstBooleanQuery = firstQuery instanceof BoostQuery
? (BooleanQuery)((BoostQuery)firstQuery).getQuery() : (BooleanQuery)firstQuery;
for (int c = 0 ; c < firstBooleanQuery.clauses().size() ; ++c) {
subs.clear();
// Make a dismax query for each clause position in the boolean per-field queries.
for (int n = 0 ; n < lst.size() ; ++n) {
if (lst.get(n) instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery)lst.get(n);
BooleanQuery booleanQuery = (BooleanQuery)boostQuery.getQuery();
subs.add(new BoostQuery(booleanQuery.clauses().get(c).getQuery(), boostQuery.getBoost()));
} else {
subs.add(((BooleanQuery)lst.get(n)).clauses().get(c).getQuery());
}
}
q.add(newBooleanClause(new DisjunctionMaxQuery(subs, a.tie), BooleanClause.Occur.SHOULD));
}
return QueryUtils.build(q, parser);
} else {
return new DisjunctionMaxQuery(lst, a.tie);
}
} else {
BooleanQuery.Builder q = new BooleanQuery.Builder();
for (Query sub : lst) {
q.add(sub, BooleanClause.Occur.SHOULD);
}
return QueryUtils.build(q, parser);
}
} else {
// verify that a fielded query is actually on a field that exists... if not,
// then throw an exception to get us out of here, and we'll treat it like a
// literal when we try the escape+re-parse.
if (exceptions) {
FieldType ft = schema.getFieldTypeNoEx(field);
if (ft == null && null == MagicFieldName.get(field)) {
throw unknownField;
}
}
return getQuery();
}
}
/**
* Recursively examines the given query list for identical structure in all queries.
* Boosts on BoostQuery-s are ignored, and the contained queries are instead used as the basis for comparison.
**/
private boolean allSameQueryStructure(List<Query> lst) {
boolean allSame = true;
Query firstQuery = lst.get(0);
if (firstQuery instanceof BoostQuery) {
firstQuery = ((BoostQuery)firstQuery).getQuery(); // ignore boost; compare contained query
}
for (int n = 1 ; n < lst.size(); ++n) {
Query nthQuery = lst.get(n);
if (nthQuery instanceof BoostQuery) {
nthQuery = ((BoostQuery)nthQuery).getQuery();
}
if (nthQuery.getClass() != firstQuery.getClass()) {
allSame = false;
break;
}
if (firstQuery instanceof BooleanQuery) {
List<BooleanClause> firstBooleanClauses = ((BooleanQuery)firstQuery).clauses();
List<BooleanClause> nthBooleanClauses = ((BooleanQuery)nthQuery).clauses();
if (firstBooleanClauses.size() != nthBooleanClauses.size()) {
allSame = false;
break;
}
for (int c = 0 ; c < firstBooleanClauses.size() ; ++c) {
if (nthBooleanClauses.get(c).getQuery().getClass() != firstBooleanClauses.get(c).getQuery().getClass()
|| nthBooleanClauses.get(c).getOccur() != firstBooleanClauses.get(c).getOccur()) {
allSame = false;
break;
}
if (firstBooleanClauses.get(c).getQuery() instanceof BooleanQuery && ! allSameQueryStructure
(Arrays.asList(firstBooleanClauses.get(c).getQuery(), nthBooleanClauses.get(c).getQuery()))) {
allSame = false;
break;
}
}
}
}
return allSame;
}
@Override
protected void addMultiTermClause(List<BooleanClause> clauses, Query q) {
// We might have been passed a null query; the terms might have been filtered away by the analyzer.
if (q == null) {
return;
}
boolean required = operator == AND_OPERATOR;
BooleanClause.Occur occur = required ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
if (q instanceof BooleanQuery) {
boolean allOptionalDisMaxQueries = true;
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
if (c.getOccur() != BooleanClause.Occur.SHOULD || ! (c.getQuery() instanceof DisjunctionMaxQuery)) {
allOptionalDisMaxQueries = false;
break;
}
}
if (allOptionalDisMaxQueries) {
// getAliasedMultiTermQuery() constructed a BooleanQuery containing only SHOULD DisjunctionMaxQuery-s.
// Unwrap the query and add a clause for each contained DisMax query.
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
clauses.add(newBooleanClause(c.getQuery(), occur));
}
return;
}
}
clauses.add(newBooleanClause(q, occur));
}
/**
* Validate there is no cyclic referencing in the aliasing
*/
private void validateCyclicAliasing(String field) throws SyntaxError {
Set<String> set = new HashSet<>();
set.add(field);
if(validateField(field, set)) {
throw new SyntaxError("Field aliases lead to a cycle");
}
}
private boolean validateField(String field, Set<String> set) {
if(this.getAlias(field) == null) {
return false;
}
boolean hascycle = false;
for(String referencedField:this.getAlias(field).fields.keySet()) {
if(!set.add(referencedField)) {
hascycle = true;
} else {
if(validateField(referencedField, set)) {
hascycle = true;
}
set.remove(referencedField);
}
}
return hascycle;
}
protected List<Query> getQueries(Alias a) throws SyntaxError {
if (a == null) return null;
if (a.fields.size()==0) return null;
List<Query> lst= new ArrayList<>(4);
for (String f : a.fields.keySet()) {
this.field = f;
Query sub = getAliasedQuery();
if (sub != null) {
Float boost = a.fields.get(f);
if (boost != null && boost.floatValue() != 1f) {
sub = new BoostQuery(sub, boost);
}
lst.add(sub);
}
}
return lst;
}
protected List<Query> getMultiTermQueries(Alias a) throws SyntaxError {
if (a == null) return null;
if (a.fields.size()==0) return null;
List<Query> lst= new ArrayList<>(4);
for (String f : a.fields.keySet()) {
this.field = f;
Query sub = getAliasedMultiTermQuery();
if (sub != null) {
Float boost = a.fields.get(f);
if (boost != null && boost.floatValue() != 1f) {
sub = new BoostQuery(sub, boost);
}
lst.add(sub);
}
}
return lst;
}
private Query getQuery() {
try {
switch (type) {
case FIELD: // fallthrough
case PHRASE:
Query query;
if (val == null) {
query = super.getFieldQuery(field, vals, false);
} else {
query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
}
// Boolean query on a whitespace-separated string
// If these were synonyms we would have a SynonymQuery
if (query instanceof BooleanQuery) {
if (type == QType.FIELD) { // Don't set mm for boolean query containing phrase queries
BooleanQuery bq = (BooleanQuery) query;
query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch, false);
}
} else if (query instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery)query;
if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
PhraseQuery.Builder builder = new PhraseQuery.Builder();
Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
builder.setSlop(slop);
query = builder.build();
} else if (query instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery)query;
if (minClauseSize > 1 && mpq.getTermArrays().length < minClauseSize) return null;
if (slop != mpq.getSlop()) {
query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
} else if (query instanceof SpanQuery) {
return query;
} else if (minClauseSize > 1) {
// if it's not a type of phrase query, it doesn't meet the minClauseSize requirements
return null;
}
return query;
case PREFIX: return super.getPrefixQuery(field, val);
case WILDCARD: return super.getWildcardQuery(field, val);
case FUZZY: return super.getFuzzyQuery(field, val, flt);
case RANGE: return super.getRangeQuery(field, val, val2, bool, bool2);
}
return null;
} catch (Exception e) {
// an exception here is due to the field query not being compatible with the input text
// for example, passing a string to a numeric field.
return null;
}
}
private Analyzer noStopwordFilterAnalyzer(String fieldName) {
FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
Analyzer qa = ft.getQueryAnalyzer();
if (!(qa instanceof TokenizerChain)) {
return qa;
}
TokenizerChain tcq = (TokenizerChain) qa;
Analyzer ia = ft.getIndexAnalyzer();
if (ia == qa || !(ia instanceof TokenizerChain)) {
return qa;
}
TokenizerChain tci = (TokenizerChain) ia;
// make sure that there isn't a stop filter in the indexer
for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
if (tf instanceof StopFilterFactory) {
return qa;
}
}
// now if there is a stop filter in the query analyzer, remove it
int stopIdx = -1;
TokenFilterFactory[] facs = tcq.getTokenFilterFactories();
for (int i = 0; i < facs.length; i++) {
TokenFilterFactory tf = facs[i];
if (tf instanceof StopFilterFactory) {
stopIdx = i;
break;
}
}
if (stopIdx == -1) {
// no stop filter exists
return qa;
}
TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1];
for (int i = 0, j = 0; i < facs.length; i++) {
if (i == stopIdx) continue;
newtf[j++] = facs[i];
}
TokenizerChain newa = new TokenizerChain(tcq.getCharFilterFactories(), tcq.getTokenizerFactory(), newtf);
newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));
return newa;
}
}
static boolean isEmpty(Query q) {
if (q==null) return true;
if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size()==0) return true;
return false;
}
/**
* Class that encapsulates the input from userFields parameter and can answer whether
* a field allowed or disallowed as fielded query in the query string
*/
static class UserFields {
private Map<String,Float> userFieldsMap;
private DynamicField[] dynamicUserFields;
private DynamicField[] negativeDynamicUserFields;
UserFields(Map<String, Float> ufm, boolean forbidSubQueryByDefault) {
userFieldsMap = ufm;
if (0 == userFieldsMap.size()) {
userFieldsMap.put("*", null);
}
// Process dynamic patterns in userFields
ArrayList<DynamicField> dynUserFields = new ArrayList<>();
ArrayList<DynamicField> negDynUserFields = new ArrayList<>();
for(String f : userFieldsMap.keySet()) {
if(f.contains("*")) {
if(f.startsWith("-"))
negDynUserFields.add(new DynamicField(f.substring(1)));
else
dynUserFields.add(new DynamicField(f));
}
}
// unless "_query_" was expressly allowed, we forbid it.
if (forbidSubQueryByDefault && !userFieldsMap.containsKey(MagicFieldName.QUERY.field)) {
userFieldsMap.put("-" + MagicFieldName.QUERY.field, null);
}
Collections.sort(dynUserFields);
dynamicUserFields = dynUserFields.toArray(new DynamicField[dynUserFields.size()]);
Collections.sort(negDynUserFields);
negativeDynamicUserFields = negDynUserFields.toArray(new DynamicField[negDynUserFields.size()]);
}
/**
* Is the given field name allowed according to UserFields spec given in the uf parameter?
* @param fname the field name to examine
* @return true if the fielded queries are allowed on this field
*/
public boolean isAllowed(String fname) {
boolean res = ((userFieldsMap.containsKey(fname) || isDynField(fname, false)) &&
!userFieldsMap.containsKey("-"+fname) &&
!isDynField(fname, true));
return res;
}
private boolean isDynField(String field, boolean neg) {
return getDynFieldForName(field, neg) == null ? false : true;
}
private String getDynFieldForName(String f, boolean neg) {
for( DynamicField df : neg?negativeDynamicUserFields:dynamicUserFields ) {
if( df.matches( f ) ) return df.wildcard;
}
return null;
}
/**
* Finds the default user field boost associated with the given field.
* This is parsed from the uf parameter, and may be specified as wildcards, e.g. *name^2.0 or *^3.0
* @param field the field to find boost for
* @return the float boost value associated with the given field or a wildcard matching the field
*/
public Float getBoost(String field) {
return (userFieldsMap.containsKey(field)) ?
userFieldsMap.get(field) : // Exact field
userFieldsMap.get(getDynFieldForName(field, false)); // Dynamic field
}
}
/* Represents a dynamic field, for easier matching, inspired by same class in IndexSchema */
static class DynamicField implements Comparable<DynamicField> {
final static int STARTS_WITH=1;
final static int ENDS_WITH=2;
final static int CATCHALL=3;
final String wildcard;
final int type;
final String str;
protected DynamicField(String wildcard) {
this.wildcard = wildcard;
if (wildcard.equals("*")) {
type=CATCHALL;
str=null;
}
else if (wildcard.startsWith("*")) {
type=ENDS_WITH;
str=wildcard.substring(1);
}
else if (wildcard.endsWith("*")) {
type=STARTS_WITH;
str=wildcard.substring(0,wildcard.length()-1);
}
else {
throw new SolrException(ErrorCode.BAD_REQUEST, "dynamic field name must start or end with *");
}
}
/*
* Returns true if the regex wildcard for this DynamicField would match the input field name
*/
public boolean matches(String name) {
if (type==CATCHALL) return true;
else if (type==STARTS_WITH && name.startsWith(str)) return true;
else if (type==ENDS_WITH && name.endsWith(str)) return true;
else return false;
}
/**
* Sort order is based on length of regex. Longest comes first.
* @param other The object to compare to.
* @return a negative integer, zero, or a positive integer
* as this object is less than, equal to, or greater than
* the specified object.
*/
@Override
public int compareTo(DynamicField other) {
return other.wildcard.length() - wildcard.length();
}
@Override
public String toString() {
return this.wildcard;
}
}
/**
* Simple container for configuration information used when parsing queries
*/
public static class ExtendedDismaxConfiguration {
/**
* The field names specified by 'qf' that (most) clauses will
* be queried against
*/
protected Map<String,Float> queryFields;
/**
* The field names specified by 'uf' that users are
* allowed to include literally in their query string. The Float
* boost values will be applied automatically to any clause using that
* field name. '*' will be treated as an alias for any
* field that exists in the schema. Wildcards are allowed to
* express dynamicFields.
*/
protected UserFields userFields;
protected String[] boostParams;
protected String[] multBoosts;
protected SolrParams solrParams;
protected String minShouldMatch;
protected List<FieldParams> allPhraseFields;
protected float tiebreaker;
protected int qslop;
protected boolean stopwords;
protected boolean mmAutoRelax;
protected String altQ;
protected boolean lowercaseOperators;
protected String[] boostFuncs;
protected boolean splitOnWhitespace;
protected IndexSchema schema;
public ExtendedDismaxConfiguration(SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
solrParams = SolrParams.wrapDefaults(localParams, params);
schema = req.getSchema();
minShouldMatch = DisMaxQParser.parseMinShouldMatch(schema, solrParams); // req.getSearcher() here causes searcher refcount imbalance
final boolean forbidSubQueryByDefault = req.getCore().getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_7_2_0);
userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF)), forbidSubQueryByDefault);
try {
queryFields = DisMaxQParser.parseQueryFields(schema, solrParams); // req.getSearcher() here causes searcher refcount imbalance
} catch (SyntaxError e) {
throw new RuntimeException(e);
}
// Phrase slop array
int pslop[] = new int[4];
pslop[0] = solrParams.getInt(DisMaxParams.PS, 0);
pslop[2] = solrParams.getInt(DisMaxParams.PS2, pslop[0]);
pslop[3] = solrParams.getInt(DisMaxParams.PS3, pslop[0]);
List<FieldParams> phraseFields = U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF),0,pslop[0]);
List<FieldParams> phraseFields2 = U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF2),2,pslop[2]);
List<FieldParams> phraseFields3 = U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF3),3,pslop[3]);
allPhraseFields = new ArrayList<>(phraseFields.size() + phraseFields2.size() + phraseFields3.size());
allPhraseFields.addAll(phraseFields);
allPhraseFields.addAll(phraseFields2);
allPhraseFields.addAll(phraseFields3);
tiebreaker = solrParams.getFloat(DisMaxParams.TIE, 0.0f);
qslop = solrParams.getInt(DisMaxParams.QS, 0);
stopwords = solrParams.getBool(DMP.STOPWORDS, true);
mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false);
altQ = solrParams.get( DisMaxParams.ALTQ );
// lowercaseOperators defaults to true for luceneMatchVersion < 7.0 and to false for >= 7.0
lowercaseOperators = solrParams.getBool(DMP.LOWERCASE_OPS,
!req.getCore().getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_7_0_0));
/* * * Boosting Query * * */
boostParams = solrParams.getParams(DisMaxParams.BQ);
boostFuncs = solrParams.getParams(DisMaxParams.BF);
multBoosts = solrParams.getParams(DMP.MULT_BOOST);
splitOnWhitespace = solrParams.getBool(QueryParsing.SPLIT_ON_WHITESPACE, SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE);
}
/**
*
* @return true if there are valid multiplicative boost queries
*/
public boolean hasMultiplicativeBoosts() {
return multBoosts!=null && multBoosts.length>0;
}
/**
*
* @return true if there are valid boost functions
*/
public boolean hasBoostFunctions() {
return null != boostFuncs && 0 != boostFuncs.length;
}
/**
*
* @return true if there are valid boost params
*/
public boolean hasBoostParams() {
return boostParams!=null && boostParams.length>0;
}
public List<FieldParams> getAllPhraseFields() {
return allPhraseFields;
}
}
}