blob: 4798745ef67f529ac91c954e847ec16d5fe2da06 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause.Occur;
/**
* A Query that matches documents matching boolean combinations of other queries, e.g. {@link
* TermQuery}s, {@link PhraseQuery}s or other BooleanQuerys.
*/
public class BooleanQuery extends Query implements Iterable<BooleanClause> {
/** A builder for boolean queries. */
public static class Builder {
private int minimumNumberShouldMatch;
private final List<BooleanClause> clauses = new ArrayList<>();
/** Sole constructor. */
public Builder() {}
/**
* Specifies a minimum number of the optional BooleanClauses which must be satisfied.
*
* <p>By default no optional clauses are necessary for a match (unless there are no required
* clauses). If this method is used, then the specified number of clauses is required.
*
* <p>Use of this method is totally independent of specifying that any specific clauses are
* required (or prohibited). This number will only be compared against the number of matching
* optional clauses.
*
* @param min the number of optional clauses that must match
*/
public Builder setMinimumNumberShouldMatch(int min) {
this.minimumNumberShouldMatch = min;
return this;
}
/**
* Add a new clause to this {@link Builder}. Note that the order in which clauses are added does
* not have any impact on matching documents or query performance.
*
* @throws IndexSearcher.TooManyClauses if the new number of clauses exceeds the maximum clause
* number
*/
public Builder add(BooleanClause clause) {
// We do the final deep check for max clauses count limit during
// <code>IndexSearcher.rewrite</code> but do this check to short
// circuit in case a single query holds more than numClauses
//
// NOTE: this is not just an early check for optimization -- it's
// neccessary to prevent run-away 'rewriting' of bad queries from
// creating BQ objects that might eat up all the Heap.
if (clauses.size() >= IndexSearcher.maxClauseCount) {
throw new IndexSearcher.TooManyClauses();
}
clauses.add(clause);
return this;
}
/**
* Add a new clause to this {@link Builder}. Note that the order in which clauses are added does
* not have any impact on matching documents or query performance.
*
* @throws IndexSearcher.TooManyClauses if the new number of clauses exceeds the maximum clause
* number
*/
public Builder add(Query query, Occur occur) {
return add(new BooleanClause(query, occur));
}
/**
* Create a new {@link BooleanQuery} based on the parameters that have been set on this builder.
*/
public BooleanQuery build() {
return new BooleanQuery(minimumNumberShouldMatch, clauses.toArray(new BooleanClause[0]));
}
}
private final int minimumNumberShouldMatch;
private final List<BooleanClause> clauses; // used for toString() and getClauses()
private final Map<Occur, Collection<Query>> clauseSets; // used for equals/hashcode
private BooleanQuery(int minimumNumberShouldMatch, BooleanClause[] clauses) {
this.minimumNumberShouldMatch = minimumNumberShouldMatch;
this.clauses = Collections.unmodifiableList(Arrays.asList(clauses));
clauseSets = new EnumMap<>(Occur.class);
// duplicates matter for SHOULD and MUST
clauseSets.put(Occur.SHOULD, new Multiset<>());
clauseSets.put(Occur.MUST, new Multiset<>());
// but not for FILTER and MUST_NOT
clauseSets.put(Occur.FILTER, new HashSet<>());
clauseSets.put(Occur.MUST_NOT, new HashSet<>());
for (BooleanClause clause : clauses) {
clauseSets.get(clause.occur()).add(clause.query());
}
}
/** Gets the minimum number of the optional BooleanClauses which must be satisfied. */
public int getMinimumNumberShouldMatch() {
return minimumNumberShouldMatch;
}
/** Return a list of the clauses of this {@link BooleanQuery}. */
public List<BooleanClause> clauses() {
return clauses;
}
/** Return the collection of queries for the given {@link Occur}. */
Collection<Query> getClauses(Occur occur) {
return clauseSets.get(occur);
}
/**
* Whether this query is a pure disjunction, ie. it only has SHOULD clauses and it is enough for a
* single clause to match for this boolean query to match.
*/
boolean isPureDisjunction() {
return clauses.size() == getClauses(Occur.SHOULD).size() && minimumNumberShouldMatch <= 1;
}
/** Whether this query is a two clause disjunction with two term query clauses. */
boolean isTwoClausePureDisjunctionWithTerms() {
return clauses.size() == 2
&& isPureDisjunction()
&& clauses.get(0).query() instanceof TermQuery
&& clauses.get(1).query() instanceof TermQuery;
}
/**
* Rewrite a single two clause disjunction query with terms to two term queries and a conjunction
* query using the inclusion–exclusion principle.
*/
Query[] rewriteTwoClauseDisjunctionWithTermsForCount(IndexSearcher indexSearcher)
throws IOException {
BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
Query[] queries = new Query[3];
for (int i = 0; i < clauses.size(); i++) {
TermQuery termQuery = (TermQuery) clauses.get(i).query();
// Optimization will count term query several times so use cache to avoid multiple terms
// dictionary lookups
if (termQuery.getTermStates() == null) {
termQuery =
new TermQuery(
termQuery.getTerm(), TermStates.build(indexSearcher, termQuery.getTerm(), false));
}
newQuery.add(termQuery, Occur.MUST);
queries[i] = termQuery;
}
queries[2] = newQuery.build();
return queries;
}
/**
* Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface
* to make it possible to do:
*
* <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre>
*/
@Override
public final Iterator<BooleanClause> iterator() {
return clauses.iterator();
}
// Utility method for rewriting BooleanQuery when scores are not needed.
// This is called from ConstantScoreQuery#rewrite
BooleanQuery rewriteNoScoring() {
boolean actuallyRewritten = false;
BooleanQuery.Builder newQuery =
new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
final boolean keepShould =
getMinimumNumberShouldMatch() > 0
|| (clauseSets.get(Occur.MUST).size() + clauseSets.get(Occur.FILTER).size() == 0);
for (BooleanClause clause : clauses) {
Query query = clause.query();
// NOTE: rewritingNoScoring() should not call rewrite(), otherwise this
// method could run in exponential time with the depth of the query as
// every new level would rewrite 2x more than its parent level.
Query rewritten = query;
if (rewritten instanceof BoostQuery) {
rewritten = ((BoostQuery) rewritten).getQuery();
}
if (rewritten instanceof ConstantScoreQuery) {
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
}
if (rewritten instanceof BooleanQuery) {
rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
}
BooleanClause.Occur occur = clause.occur();
if (occur == Occur.SHOULD && keepShould == false) {
// ignore clause
actuallyRewritten = true;
} else if (occur == Occur.MUST) {
// replace MUST clauses with FILTER clauses
newQuery.add(rewritten, Occur.FILTER);
actuallyRewritten = true;
} else if (query != rewritten) {
newQuery.add(rewritten, occur);
actuallyRewritten = true;
} else {
newQuery.add(clause);
}
}
if (actuallyRewritten == false) {
return this;
}
return newQuery.build();
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new BooleanWeight(this, searcher, scoreMode, boost);
}
@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (clauses.size() == 0) {
return new MatchNoDocsQuery("empty BooleanQuery");
}
// optimize 1-clause queries
if (clauses.size() == 1) {
BooleanClause c = clauses.get(0);
Query query = c.query();
if (minimumNumberShouldMatch == 1 && c.occur() == Occur.SHOULD) {
return query;
} else if (minimumNumberShouldMatch == 0) {
switch (c.occur()) {
case SHOULD:
case MUST:
return query;
case FILTER:
// no scoring clauses, so return a score of 0
return new BoostQuery(new ConstantScoreQuery(query), 0);
case MUST_NOT:
// no positive clauses
return new MatchNoDocsQuery("pure negative BooleanQuery");
default:
throw new AssertionError();
}
}
}
// recursively rewrite
{
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
boolean actuallyRewritten = false;
for (BooleanClause clause : this) {
Query query = clause.query();
BooleanClause.Occur occur = clause.occur();
Query rewritten;
if (occur == Occur.FILTER || occur == Occur.MUST_NOT) {
// Clauses that are not involved in scoring can get some extra simplifications
rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher);
if (rewritten instanceof ConstantScoreQuery) {
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
}
} else {
rewritten = query.rewrite(indexSearcher);
}
if (rewritten != query || query.getClass() == MatchNoDocsQuery.class) {
// rewrite clause
actuallyRewritten = true;
if (rewritten.getClass() == MatchNoDocsQuery.class) {
switch (occur) {
case SHOULD:
case MUST_NOT:
// the clause can be safely ignored
break;
case MUST:
case FILTER:
return rewritten;
}
} else {
builder.add(rewritten, occur);
}
} else {
// leave as-is
builder.add(clause);
}
}
if (actuallyRewritten) {
return builder.build();
}
}
// remove duplicate FILTER and MUST_NOT clauses
{
int clauseCount = 0;
for (Collection<Query> queries : clauseSets.values()) {
clauseCount += queries.size();
}
if (clauseCount != clauses.size()) {
// since clauseSets implicitly deduplicates FILTER and MUST_NOT
// clauses, this means there were duplicates
BooleanQuery.Builder rewritten = new BooleanQuery.Builder();
rewritten.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Occur, Collection<Query>> entry : clauseSets.entrySet()) {
final Occur occur = entry.getKey();
for (Query query : entry.getValue()) {
rewritten.add(query, occur);
}
}
return rewritten.build();
}
}
// Check whether some clauses are both required and excluded
final Collection<Query> mustNotClauses = clauseSets.get(Occur.MUST_NOT);
if (!mustNotClauses.isEmpty()) {
final Predicate<Query> p = clauseSets.get(Occur.MUST)::contains;
if (mustNotClauses.stream().anyMatch(p.or(clauseSets.get(Occur.FILTER)::contains))) {
return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT");
}
if (mustNotClauses.contains(new MatchAllDocsQuery())) {
return new MatchNoDocsQuery("MUST_NOT clause is MatchAllDocsQuery");
}
}
// remove FILTER clauses that are also MUST clauses or that match all documents
if (clauseSets.get(Occur.FILTER).size() > 0) {
final Set<Query> filters = new HashSet<>(clauseSets.get(Occur.FILTER));
boolean modified = false;
if (filters.size() > 1 || clauseSets.get(Occur.MUST).isEmpty() == false) {
modified = filters.remove(new MatchAllDocsQuery());
}
modified |= filters.removeAll(clauseSets.get(Occur.MUST));
if (modified) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
for (BooleanClause clause : clauses) {
if (clause.occur() != Occur.FILTER) {
builder.add(clause);
}
}
for (Query filter : filters) {
builder.add(filter, Occur.FILTER);
}
return builder.build();
}
}
// convert FILTER clauses that are also SHOULD clauses to MUST clauses
if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) {
final Collection<Query> filters = clauseSets.get(Occur.FILTER);
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
Set<Query> intersection = new HashSet<>(filters);
intersection.retainAll(shoulds);
if (intersection.isEmpty() == false) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
int minShouldMatch = getMinimumNumberShouldMatch();
for (BooleanClause clause : clauses) {
if (intersection.contains(clause.query())) {
if (clause.occur() == Occur.SHOULD) {
builder.add(new BooleanClause(clause.query(), Occur.MUST));
minShouldMatch--;
}
} else {
builder.add(clause);
}
}
builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch));
return builder.build();
}
}
// Deduplicate SHOULD clauses by summing up their boosts
if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) {
Map<Query, Double> shouldClauses = new HashMap<>();
for (Query query : clauseSets.get(Occur.SHOULD)) {
double boost = 1;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
boost *= bq.getBoost();
query = bq.getQuery();
}
shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost);
}
if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) {
BooleanQuery.Builder builder =
new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Query, Double> entry : shouldClauses.entrySet()) {
Query query = entry.getKey();
float boost = entry.getValue().floatValue();
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
builder.add(query, Occur.SHOULD);
}
for (BooleanClause clause : clauses) {
if (clause.occur() != Occur.SHOULD) {
builder.add(clause);
}
}
return builder.build();
}
}
// Deduplicate MUST clauses by summing up their boosts
if (clauseSets.get(Occur.MUST).size() > 0) {
Map<Query, Double> mustClauses = new HashMap<>();
for (Query query : clauseSets.get(Occur.MUST)) {
double boost = 1;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
boost *= bq.getBoost();
query = bq.getQuery();
}
mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost);
}
if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) {
BooleanQuery.Builder builder =
new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Query, Double> entry : mustClauses.entrySet()) {
Query query = entry.getKey();
float boost = entry.getValue().floatValue();
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
builder.add(query, Occur.MUST);
}
for (BooleanClause clause : clauses) {
if (clause.occur() != Occur.MUST) {
builder.add(clause);
}
}
return builder.build();
}
}
// Rewrite queries whose single scoring clause is a MUST clause on a
// MatchAllDocsQuery to a ConstantScoreQuery
{
final Collection<Query> musts = clauseSets.get(Occur.MUST);
final Collection<Query> filters = clauseSets.get(Occur.FILTER);
if (musts.size() == 1 && filters.size() > 0) {
Query must = musts.iterator().next();
float boost = 1f;
if (must instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) must;
must = boostQuery.getQuery();
boost = boostQuery.getBoost();
}
if (must.getClass() == MatchAllDocsQuery.class) {
// our single scoring clause matches everything: rewrite to a CSQ on the filter
// ignore SHOULD clause for now
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (BooleanClause clause : clauses) {
switch (clause.occur()) {
case FILTER:
case MUST_NOT:
builder.add(clause);
break;
case MUST:
case SHOULD:
default:
// ignore
break;
}
}
Query rewritten = builder.build();
rewritten = new ConstantScoreQuery(rewritten);
if (boost != 1f) {
rewritten = new BoostQuery(rewritten, boost);
}
// now add back the SHOULD clauses
builder =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch())
.add(rewritten, Occur.MUST);
for (Query query : clauseSets.get(Occur.SHOULD)) {
builder.add(query, Occur.SHOULD);
}
rewritten = builder.build();
return rewritten;
}
}
}
// Flatten nested disjunctions, this is important for block-max WAND to perform well
if (minimumNumberShouldMatch <= 1) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false;
for (BooleanClause clause : clauses) {
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery) {
BooleanQuery innerQuery = (BooleanQuery) clause.query();
if (innerQuery.isPureDisjunction()) {
actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery.clauses()) {
builder.add(innerClause);
}
} else {
builder.add(clause);
}
} else {
builder.add(clause);
}
}
if (actuallyRewritten) {
return builder.build();
}
}
// SHOULD clause count less than or equal to minimumNumberShouldMatch
// Important(this can only be processed after nested clauses have been flattened)
{
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
if (shoulds.size() > 0) {
if (shoulds.size() < minimumNumberShouldMatch) {
return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
}
if (shoulds.size() == minimumNumberShouldMatch) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (BooleanClause clause : clauses) {
if (clause.occur() == Occur.SHOULD) {
builder.add(clause.query(), Occur.MUST);
} else {
builder.add(clause);
}
}
return builder.build();
}
}
}
return super.rewrite(indexSearcher);
}
@Override
public void visit(QueryVisitor visitor) {
QueryVisitor sub = visitor.getSubVisitor(Occur.MUST, this);
for (BooleanClause.Occur occur : clauseSets.keySet()) {
if (clauseSets.get(occur).size() > 0) {
if (occur == Occur.MUST) {
for (Query q : clauseSets.get(occur)) {
q.visit(sub);
}
} else {
QueryVisitor v = sub.getSubVisitor(occur, this);
for (Query q : clauseSets.get(occur)) {
q.visit(v);
}
}
}
}
}
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
boolean needParens = getMinimumNumberShouldMatch() > 0;
if (needParens) {
buffer.append("(");
}
int i = 0;
for (BooleanClause c : this) {
buffer.append(c.occur().toString());
Query subQuery = c.query();
if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens
buffer.append("(");
buffer.append(subQuery.toString(field));
buffer.append(")");
} else {
buffer.append(subQuery.toString(field));
}
if (i != clauses.size() - 1) {
buffer.append(" ");
}
i += 1;
}
if (needParens) {
buffer.append(")");
}
if (getMinimumNumberShouldMatch() > 0) {
buffer.append('~');
buffer.append(getMinimumNumberShouldMatch());
}
return buffer.toString();
}
/**
* Compares the specified object with this boolean query for equality. Returns true if and only if
* the provided object
*
* <ul>
* <li>is also a {@link BooleanQuery},
* <li>has the same value of {@link #getMinimumNumberShouldMatch()}
* <li>has the same {@link Occur#SHOULD} clauses, regardless of the order
* <li>has the same {@link Occur#MUST} clauses, regardless of the order
* <li>has the same set of {@link Occur#FILTER} clauses, regardless of the order and regardless
* of duplicates
* <li>has the same set of {@link Occur#MUST_NOT} clauses, regardless of the order and
* regardless of duplicates
* </ul>
*/
@Override
public boolean equals(Object o) {
return sameClassAs(o) && equalsTo(getClass().cast(o));
}
private boolean equalsTo(BooleanQuery other) {
return getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch()
&& clauseSets.equals(other.clauseSets);
}
private int computeHashCode() {
int hashCode = Objects.hash(minimumNumberShouldMatch, clauseSets);
if (hashCode == 0) {
hashCode = 1;
}
return hashCode;
}
// cached hash code is ok since boolean queries are immutable
private int hashCode;
@Override
public int hashCode() {
// no need for synchronization, in the worst case we would just compute the hash several times.
if (hashCode == 0) {
hashCode = computeHashCode();
assert hashCode != 0;
}
assert hashCode == computeHashCode();
return hashCode;
}
}