blob: 0b03546733c609b621fe2c4aef9b7463657bf121 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
/**
* A query that uses either an index structure (points or terms) or doc values
* in order to run a query, depending which one is more efficient. This is
* typically useful for range queries, whose {@link Weight#scorer} is costly
* to create since it usually needs to sort large lists of doc ids. For
* instance, for a field that both indexed {@link LongPoint}s and
* {@link SortedNumericDocValuesField}s with the same values, an efficient
* range query could be created by doing:
* <pre class="prettyprint">
* String field;
* long minValue, maxValue;
* Query pointQuery = LongPoint.newRangeQuery(field, minValue, maxValue);
* Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, minValue, maxValue);
* Query query = new IndexOrDocValuesQuery(pointQuery, dvQuery);
* </pre>
* The above query will be efficient as it will use points in the case that they
* perform better, ie. when we need a good lead iterator that will be almost
* entirely consumed; and doc values otherwise, ie. in the case that another
* part of the query is already leading iteration but we still need the ability
* to verify that some documents match.
* <p><b>NOTE</b>This query currently only works well with point range/exact
* queries and their equivalent doc values queries.
* @lucene.experimental
*/
public final class IndexOrDocValuesQuery extends Query {
private final Query indexQuery, dvQuery;
/**
* Create an {@link IndexOrDocValuesQuery}. Both provided queries must match
* the same documents and give the same scores.
* @param indexQuery a query that has a good iterator but whose scorer may be costly to create
* @param dvQuery a query whose scorer is cheap to create that can quickly check whether a given document matches
*/
public IndexOrDocValuesQuery(Query indexQuery, Query dvQuery) {
this.indexQuery = indexQuery;
this.dvQuery = dvQuery;
}
/** Return the wrapped query that may be costly to initialize but has a good
* iterator. */
public Query getIndexQuery() {
return indexQuery;
}
/** Return the wrapped query that may be slow at identifying all matching
* documents, but which is cheap to initialize and can efficiently
* verify that some documents match. */
public Query getRandomAccessQuery() {
return dvQuery;
}
@Override
public String toString(String field) {
return indexQuery.toString(field);
}
@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
IndexOrDocValuesQuery that = (IndexOrDocValuesQuery) obj;
return indexQuery.equals(that.indexQuery) && dvQuery.equals(that.dvQuery);
}
@Override
public int hashCode() {
int h = classHash();
h = 31 * h + indexQuery.hashCode();
h = 31 * h + dvQuery.hashCode();
return h;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query indexRewrite = indexQuery.rewrite(reader);
Query dvRewrite = dvQuery.rewrite(reader);
if (indexQuery != indexRewrite || dvQuery != dvRewrite) {
return new IndexOrDocValuesQuery(indexRewrite, dvRewrite);
}
return this;
}
@Override
public void visit(QueryVisitor visitor) {
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
indexQuery.visit(v);
dvQuery.visit(v);
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost);
final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
indexWeight.extractTerms(terms);
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
// We need to check a single doc, so the dv query should perform better
return dvWeight.matches(context, doc);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
// We need to check a single doc, so the dv query should perform better
return dvWeight.explain(context, doc);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
// Bulk scorers need to consume the entire set of docs, so using an
// index structure should perform better
return indexWeight.bulkScorer(context);
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context);
final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context);
if (indexScorerSupplier == null || dvScorerSupplier == null) {
return null;
}
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
// At equal costs, doc values tend to be worse than points since they
// still need to perform one comparison per document while points can
// do much better than that given how values are organized. So we give
// an arbitrary 8x penalty to doc values.
final long threshold = cost() >>> 3;
if (threshold <= leadCost) {
return indexScorerSupplier.get(leadCost);
} else {
return dvScorerSupplier.get(leadCost);
}
}
@Override
public long cost() {
return indexScorerSupplier.cost();
}
};
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
// Both index and dv query should return the same values, so we can use
// the index query's cachehelper here
return indexWeight.isCacheable(ctx);
}
};
}
}