| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search; |
| |
| import java.io.IOException; |
| import java.util.Set; |
| |
| import org.apache.lucene.document.LongPoint; |
| import org.apache.lucene.document.SortedNumericDocValuesField; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.Term; |
| |
| /** |
| * A query that uses either an index structure (points or terms) or doc values |
| * in order to run a query, depending which one is more efficient. This is |
| * typically useful for range queries, whose {@link Weight#scorer} is costly |
| * to create since it usually needs to sort large lists of doc ids. For |
| * instance, for a field that both indexed {@link LongPoint}s and |
| * {@link SortedNumericDocValuesField}s with the same values, an efficient |
| * range query could be created by doing: |
| * <pre class="prettyprint"> |
| * String field; |
| * long minValue, maxValue; |
| * Query pointQuery = LongPoint.newRangeQuery(field, minValue, maxValue); |
| * Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, minValue, maxValue); |
| * Query query = new IndexOrDocValuesQuery(pointQuery, dvQuery); |
| * </pre> |
| * The above query will be efficient as it will use points in the case that they |
| * perform better, ie. when we need a good lead iterator that will be almost |
| * entirely consumed; and doc values otherwise, ie. in the case that another |
| * part of the query is already leading iteration but we still need the ability |
| * to verify that some documents match. |
| * <p><b>NOTE</b>This query currently only works well with point range/exact |
| * queries and their equivalent doc values queries. |
| * @lucene.experimental |
| */ |
| public final class IndexOrDocValuesQuery extends Query { |
| |
| private final Query indexQuery, dvQuery; |
| |
| /** |
| * Create an {@link IndexOrDocValuesQuery}. Both provided queries must match |
| * the same documents and give the same scores. |
| * @param indexQuery a query that has a good iterator but whose scorer may be costly to create |
| * @param dvQuery a query whose scorer is cheap to create that can quickly check whether a given document matches |
| */ |
| public IndexOrDocValuesQuery(Query indexQuery, Query dvQuery) { |
| this.indexQuery = indexQuery; |
| this.dvQuery = dvQuery; |
| } |
| |
| /** Return the wrapped query that may be costly to initialize but has a good |
| * iterator. */ |
| public Query getIndexQuery() { |
| return indexQuery; |
| } |
| |
| /** Return the wrapped query that may be slow at identifying all matching |
| * documents, but which is cheap to initialize and can efficiently |
| * verify that some documents match. */ |
| public Query getRandomAccessQuery() { |
| return dvQuery; |
| } |
| |
| @Override |
| public String toString(String field) { |
| return indexQuery.toString(field); |
| } |
| |
| @Override |
| public boolean equals(Object obj) { |
| if (sameClassAs(obj) == false) { |
| return false; |
| } |
| IndexOrDocValuesQuery that = (IndexOrDocValuesQuery) obj; |
| return indexQuery.equals(that.indexQuery) && dvQuery.equals(that.dvQuery); |
| } |
| |
| @Override |
| public int hashCode() { |
| int h = classHash(); |
| h = 31 * h + indexQuery.hashCode(); |
| h = 31 * h + dvQuery.hashCode(); |
| return h; |
| } |
| |
| @Override |
| public Query rewrite(IndexReader reader) throws IOException { |
| Query indexRewrite = indexQuery.rewrite(reader); |
| Query dvRewrite = dvQuery.rewrite(reader); |
| if (indexQuery != indexRewrite || dvQuery != dvRewrite) { |
| return new IndexOrDocValuesQuery(indexRewrite, dvRewrite); |
| } |
| return this; |
| } |
| |
| @Override |
| public void visit(QueryVisitor visitor) { |
| QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this); |
| indexQuery.visit(v); |
| dvQuery.visit(v); |
| } |
| |
| @Override |
| public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { |
| final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost); |
| final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost); |
| return new Weight(this) { |
| @Override |
| public void extractTerms(Set<Term> terms) { |
| indexWeight.extractTerms(terms); |
| } |
| |
| @Override |
| public Matches matches(LeafReaderContext context, int doc) throws IOException { |
| // We need to check a single doc, so the dv query should perform better |
| return dvWeight.matches(context, doc); |
| } |
| |
| @Override |
| public Explanation explain(LeafReaderContext context, int doc) throws IOException { |
| // We need to check a single doc, so the dv query should perform better |
| return dvWeight.explain(context, doc); |
| } |
| |
| @Override |
| public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { |
| // Bulk scorers need to consume the entire set of docs, so using an |
| // index structure should perform better |
| return indexWeight.bulkScorer(context); |
| } |
| |
| @Override |
| public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { |
| final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context); |
| final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context); |
| if (indexScorerSupplier == null || dvScorerSupplier == null) { |
| return null; |
| } |
| return new ScorerSupplier() { |
| @Override |
| public Scorer get(long leadCost) throws IOException { |
| // At equal costs, doc values tend to be worse than points since they |
| // still need to perform one comparison per document while points can |
| // do much better than that given how values are organized. So we give |
| // an arbitrary 8x penalty to doc values. |
| final long threshold = cost() >>> 3; |
| if (threshold <= leadCost) { |
| return indexScorerSupplier.get(leadCost); |
| } else { |
| return dvScorerSupplier.get(leadCost); |
| } |
| } |
| |
| @Override |
| public long cost() { |
| return indexScorerSupplier.cost(); |
| } |
| }; |
| } |
| |
| @Override |
| public Scorer scorer(LeafReaderContext context) throws IOException { |
| ScorerSupplier scorerSupplier = scorerSupplier(context); |
| if (scorerSupplier == null) { |
| return null; |
| } |
| return scorerSupplier.get(Long.MAX_VALUE); |
| } |
| |
| @Override |
| public boolean isCacheable(LeafReaderContext ctx) { |
| // Both index and dv query should return the same values, so we can use |
| // the index query's cachehelper here |
| return indexWeight.isCacheable(ctx); |
| } |
| |
| }; |
| } |
| |
| } |