blob: 12ba8b1a0369a0000556aece7c92e9920c1ad7ac [file] [log] [blame]
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
/**
* A query that applies a filter to the results of another query.
*
* <p>Note: the bits are retrieved from the filter each time this
* query is used in a search - use a CachingWrapperFilter to avoid
* regenerating the bits every time.
* @since 1.4
* @see CachingWrapperQuery
* @deprecated FilteredQuery will be removed in Lucene 6.0. It should
* be replaced with a {@link BooleanQuery} with one
* {@link Occur#MUST} clause for the query and one
* {@link Occur#FILTER} clause for the filter.
*/
@Deprecated
public class FilteredQuery extends Query {
private final Query query;
private final Filter filter;
private final FilterStrategy strategy;
/**
* Constructs a new query which applies a filter to the results of the original query.
* {@link Filter#getDocIdSet} will be called every time this query is used in a search.
* @param query Query to be filtered, cannot be <code>null</code>.
* @param filter Filter to apply to query results, cannot be <code>null</code>.
*/
public FilteredQuery(Query query, Filter filter) {
this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY);
}
/**
* Expert: Constructs a new query which applies a filter to the results of the original query.
* {@link Filter#getDocIdSet} will be called every time this query is used in a search.
* @param query Query to be filtered, cannot be <code>null</code>.
* @param filter Filter to apply to query results, cannot be <code>null</code>.
* @param strategy a filter strategy used to create a filtered scorer.
*
* @see FilterStrategy
*/
public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
this.strategy = Objects.requireNonNull(strategy, "FilterStrategy must not be null");
this.query = Objects.requireNonNull(query, "Query must not be null");
this.filter = Objects.requireNonNull(filter, "Filter must not be null");
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (getBoost() != 1f) {
return super.rewrite(reader);
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(query, Occur.MUST);
builder.add(strategy.rewrite(filter), Occur.FILTER);
return builder.build();
}
/** Returns this FilteredQuery's (unfiltered) Query */
public final Query getQuery() {
return query;
}
/** Returns this FilteredQuery's filter */
public final Filter getFilter() {
return filter;
}
/** Returns this FilteredQuery's {@link FilterStrategy} */
public FilterStrategy getFilterStrategy() {
return this.strategy;
}
/** Prints a user-readable version of this query. */
@Override
public String toString (String s) {
StringBuilder buffer = new StringBuilder();
buffer.append("filtered(");
buffer.append(query.toString(s));
buffer.append(")->");
buffer.append(filter);
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object o) {
if (o == this)
return true;
if (!super.equals(o))
return false;
assert o instanceof FilteredQuery;
final FilteredQuery fq = (FilteredQuery) o;
return fq.query.equals(this.query) && fq.filter.equals(this.filter) && fq.strategy.equals(this.strategy);
}
/** Returns a hash code value for this object. */
@Override
public int hashCode() {
int hash = super.hashCode();
hash = hash * 31 + strategy.hashCode();
hash = hash * 31 + query.hashCode();
hash = hash * 31 + filter.hashCode();
return hash;
}
/**
* A {@link FilterStrategy} that conditionally uses a random access filter if
* the given {@link DocIdSet} supports random access (returns a non-null value
* from {@link DocIdSet#bits()}) and
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, long)} returns
* <code>true</code>. Otherwise this strategy falls back to a "zig-zag join" (
* {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy.
*
* <p>
* Note: this strategy is the default strategy in {@link FilteredQuery}
* </p>
*/
public static final FilterStrategy RANDOM_ACCESS_FILTER_STRATEGY = new RandomAccessFilterStrategy();
/**
* A filter strategy that uses a "leap-frog" approach (also called "zig-zag join").
* In spite of the name of this constant, which one will be iterated first depends
* on the {@link DocIdSetIterator#cost() cost} of the filter compared to the query.
*/
public static final FilterStrategy LEAP_FROG_FILTER_FIRST_STRATEGY = new RandomAccessFilterStrategy() {
protected boolean useRandomAccess(Bits bits, long filterCost) {
return false;
}
};
/**
* A filter strategy that uses a "leap-frog" approach (also called "zig-zag join").
* In spite of the name of this constant, which one will be iterated first depends
* on the {@link DocIdSetIterator#cost() cost} of the filter compared to the query.
*/
public static final FilterStrategy LEAP_FROG_QUERY_FIRST_STRATEGY = LEAP_FROG_FILTER_FIRST_STRATEGY;
/**
* A filter strategy that advances the Query or rather its {@link Scorer} first and consults the
* filter {@link DocIdSet} for each matched document.
* <p>
* Note: this strategy requires a {@link DocIdSet#bits()} to return a non-null value. Otherwise
* this strategy falls back to {@link FilteredQuery#LEAP_FROG_QUERY_FIRST_STRATEGY}
* </p>
* <p>
* Use this strategy if the filter computation is more expensive than document
* scoring or if the filter has a linear running time to compute the next
* matching doc like exact geo distances.
* </p>
*/
public static final FilterStrategy QUERY_FIRST_FILTER_STRATEGY = new RandomAccessFilterStrategy() {
@Override
boolean alwaysUseRandomAccess() {
return true;
}
};
/** Abstract class that defines how the filter ({@link DocIdSet}) applied during document collection. */
public static abstract class FilterStrategy {
/** Rewrite the filter. */
public abstract Query rewrite(Filter filter);
}
/**
* A {@link FilterStrategy} that conditionally uses a random access filter if
* the given {@link DocIdSet} supports random access (returns a non-null value
* from {@link DocIdSet#bits()}) and
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, long)} returns
* <code>true</code>. Otherwise this strategy falls back to a "zig-zag join" (
* {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy .
*/
public static class RandomAccessFilterStrategy extends FilterStrategy {
@Override
public Query rewrite(Filter filter) {
return new RandomAccessFilterWrapperQuery(filter, this);
}
/**
* Expert: decides if a filter should be executed as "random-access" or not.
* random-access means the filter "filters" in a similar way as deleted docs are filtered
* in Lucene. This is faster when the filter accepts many documents.
* However, when the filter is very sparse, it can be faster to execute the query+filter
* as a conjunction in some cases.
*
* The default implementation returns <code>true</code> if the filter matches more than 1%
* of documents
*
* @lucene.internal
*/
protected boolean useRandomAccess(Bits bits, long filterCost) {
// if the filter matches more than 1% of documents, we use random-access
return filterCost * 100 > bits.length();
}
// back door for QUERY_FIRST_FILTER_STRATEGY, when this returns true we
// will try to use the random-access API regardless of the iterator
boolean alwaysUseRandomAccess() {
return false;
}
}
private static class RandomAccessFilterWrapperQuery extends Query {
final Filter filter;
final RandomAccessFilterStrategy strategy;
private RandomAccessFilterWrapperQuery(Filter filter, RandomAccessFilterStrategy strategy) {
this.filter = Objects.requireNonNull(filter);
this.strategy = Objects.requireNonNull(strategy);
}
@Override
public boolean equals(Object obj) {
if (super.equals(obj) == false) {
return false;
}
RandomAccessFilterWrapperQuery that = (RandomAccessFilterWrapperQuery) obj;
return filter.equals(that.filter) && strategy.equals(that.strategy);
}
@Override
public int hashCode() {
return 31 * super.hashCode() + Objects.hash(filter, strategy);
}
@Override
public String toString(String field) {
return filter.toString(field);
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public float getValueForNormalization() throws IOException {
return 0f;
}
@Override
public void normalize(float norm, float topLevelBoost) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
final Scorer s = scorer(context);
final boolean match;
if (s == null) {
match = false;
} else {
final TwoPhaseIterator twoPhase = s.asTwoPhaseIterator();
if (twoPhase == null) {
match = s.advance(doc) == doc;
} else {
match = twoPhase.approximation().advance(doc) == doc && twoPhase.matches();
}
}
if (match) {
assert s.score() == 0f;
return Explanation.match(0f, "Match on id " + doc);
} else {
return Explanation.match(0f, "No match on id " + doc);
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final DocIdSet set = filter.getDocIdSet(context, null);
if (set == null) {
return null;
}
final Bits bits = set.bits();
boolean useRandomAccess = bits != null && strategy.alwaysUseRandomAccess();
final DocIdSetIterator iterator;
if (useRandomAccess) {
// we don't need the iterator
iterator = null;
} else {
iterator = set.iterator();
if (iterator == null) {
return null;
}
if (bits != null) {
useRandomAccess = strategy.useRandomAccess(bits, iterator.cost());
}
}
if (useRandomAccess) {
// use the random-access API
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
final int doc = approximation.docID();
return bits.get(doc);
}
@Override
public float matchCost() {
return 10; // TODO use cost of bits.get()
}
};
return new ConstantScoreScorer(this, 0f, twoPhase);
} else {
// use the iterator API
return new ConstantScoreScorer(this, 0f, iterator);
}
}
};
}
}
}