lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search;

 import java.io.IOException;
 import java.util.Set;

 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;

 /**
  * A query that uses either an index structure (points or terms) or doc values
  * in order to run a query, depending which one is more efficient. This is
  * typically useful for range queries, whose {@link Weight#scorer} is costly
  * to create since it usually needs to sort large lists of doc ids. For
  * instance, for a field that both indexed {@link LongPoint}s and
  * {@link SortedNumericDocValuesField}s with the same values, an efficient
  * range query could be created by doing:
  * <pre class="prettyprint">
  *   String field;
  *   long minValue, maxValue;
  *   Query pointQuery = LongPoint.newRangeQuery(field, minValue, maxValue);
  *   Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, minValue, maxValue);
  *   Query query = new IndexOrDocValuesQuery(pointQuery, dvQuery);
  * </pre>
  * The above query will be efficient as it will use points in the case that they
  * perform better, ie. when we need a good lead iterator that will be almost
  * entirely consumed; and doc values otherwise, ie. in the case that another
  * part of the query is already leading iteration but we still need the ability
  * to verify that some documents match.
  * <p><b>NOTE</b>This query currently only works well with point range/exact
  * queries and their equivalent doc values queries.
  * @lucene.experimental
  */
 public final class IndexOrDocValuesQuery extends Query {

   private final Query indexQuery, dvQuery;

   /**
    * Create an {@link IndexOrDocValuesQuery}. Both provided queries must match
    * the same documents and give the same scores.
    * @param indexQuery a query that has a good iterator but whose scorer may be costly to create
    * @param dvQuery a query whose scorer is cheap to create that can quickly check whether a given document matches
    */
   public IndexOrDocValuesQuery(Query indexQuery, Query dvQuery) {
     this.indexQuery = indexQuery;
     this.dvQuery = dvQuery;
   }

   /** Return the wrapped query that may be costly to initialize but has a good
    *  iterator. */
   public Query getIndexQuery() {
     return indexQuery;
   }

   /** Return the wrapped query that may be slow at identifying all matching
    *  documents, but which is cheap to initialize and can efficiently
    *  verify that some documents match. */
   public Query getRandomAccessQuery() {
     return dvQuery;
   }

   @Override
   public String toString(String field) {
     return indexQuery.toString(field);
   }

   @Override
   public boolean equals(Object obj) {
     if (sameClassAs(obj) == false) {
       return false;
     }
     IndexOrDocValuesQuery that = (IndexOrDocValuesQuery) obj;
     return indexQuery.equals(that.indexQuery) && dvQuery.equals(that.dvQuery);
   }

   @Override
   public int hashCode() {
     int h = classHash();
     h = 31 * h + indexQuery.hashCode();
     h = 31 * h + dvQuery.hashCode();
     return h;
   }

   @Override
   public Query rewrite(IndexReader reader) throws IOException {
     Query indexRewrite = indexQuery.rewrite(reader);
     Query dvRewrite = dvQuery.rewrite(reader);
     if (indexQuery != indexRewrite || dvQuery != dvRewrite) {
       return new IndexOrDocValuesQuery(indexRewrite, dvRewrite);
     }
     return this;
   }

   @Override
   public void visit(QueryVisitor visitor) {
     QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
     indexQuery.visit(v);
     dvQuery.visit(v);
   }

   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
     final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost);
     final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost);
     return new Weight(this) {
       @Override
       public void extractTerms(Set<Term> terms) {
         indexWeight.extractTerms(terms);
       }

       @Override
       public Matches matches(LeafReaderContext context, int doc) throws IOException {
         // We need to check a single doc, so the dv query should perform better
         return dvWeight.matches(context, doc);
       }

       @Override
       public Explanation explain(LeafReaderContext context, int doc) throws IOException {
         // We need to check a single doc, so the dv query should perform better
         return dvWeight.explain(context, doc);
       }

       @Override
       public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
         // Bulk scorers need to consume the entire set of docs, so using an
         // index structure should perform better
         return indexWeight.bulkScorer(context);
       }

       @Override
       public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
         final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context);
         final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context);
         if (indexScorerSupplier == null || dvScorerSupplier == null) {
           return null;
         }
         return new ScorerSupplier() {
           @Override
           public Scorer get(long leadCost) throws IOException {
             // At equal costs, doc values tend to be worse than points since they
             // still need to perform one comparison per document while points can
             // do much better than that given how values are organized. So we give
             // an arbitrary 8x penalty to doc values.
             final long threshold = cost() >>> 3;
             if (threshold <= leadCost) {
               return indexScorerSupplier.get(leadCost);
             } else {
               return dvScorerSupplier.get(leadCost);
             }
           }

           @Override
           public long cost() {
             return indexScorerSupplier.cost();
           }
         };
       }

       @Override
       public Scorer scorer(LeafReaderContext context) throws IOException {
         ScorerSupplier scorerSupplier = scorerSupplier(context);
         if (scorerSupplier == null) {
           return null;
         }
         return scorerSupplier.get(Long.MAX_VALUE);
       }

       @Override
       public boolean isCacheable(LeafReaderContext ctx) {
         // Both index and dv query should return the same values, so we can use
         // the index query's cachehelper here
         return indexWeight.isCacheable(ctx);
       }

     };
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.search;

	import java.io.IOException;
	import java.util.Set;

	import org.apache.lucene.document.LongPoint;
	import org.apache.lucene.document.SortedNumericDocValuesField;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.index.Term;

	/**
	* A query that uses either an index structure (points or terms) or doc values
	* in order to run a query, depending which one is more efficient. This is
	* typically useful for range queries, whose {@link Weight#scorer} is costly
	* to create since it usually needs to sort large lists of doc ids. For
	* instance, for a field that both indexed {@link LongPoint}s and
	* {@link SortedNumericDocValuesField}s with the same values, an efficient
	* range query could be created by doing:
	* <pre class="prettyprint">
	* String field;
	* long minValue, maxValue;
	* Query pointQuery = LongPoint.newRangeQuery(field, minValue, maxValue);
	* Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, minValue, maxValue);
	* Query query = new IndexOrDocValuesQuery(pointQuery, dvQuery);
	* </pre>
	* The above query will be efficient as it will use points in the case that they
	* perform better, ie. when we need a good lead iterator that will be almost
	* entirely consumed; and doc values otherwise, ie. in the case that another
	* part of the query is already leading iteration but we still need the ability
	* to verify that some documents match.
	* <p><b>NOTE</b>This query currently only works well with point range/exact
	* queries and their equivalent doc values queries.
	* @lucene.experimental
	*/
	public final class IndexOrDocValuesQuery extends Query {

	private final Query indexQuery, dvQuery;

	/**
	* Create an {@link IndexOrDocValuesQuery}. Both provided queries must match
	* the same documents and give the same scores.
	* @param indexQuery a query that has a good iterator but whose scorer may be costly to create
	* @param dvQuery a query whose scorer is cheap to create that can quickly check whether a given document matches
	*/
	public IndexOrDocValuesQuery(Query indexQuery, Query dvQuery) {
	this.indexQuery = indexQuery;
	this.dvQuery = dvQuery;
	}

	/** Return the wrapped query that may be costly to initialize but has a good
	* iterator. */
	public Query getIndexQuery() {
	return indexQuery;
	}

	/** Return the wrapped query that may be slow at identifying all matching
	* documents, but which is cheap to initialize and can efficiently
	* verify that some documents match. */
	public Query getRandomAccessQuery() {
	return dvQuery;
	}

	@Override
	public String toString(String field) {
	return indexQuery.toString(field);
	}

	@Override
	public boolean equals(Object obj) {
	if (sameClassAs(obj) == false) {
	return false;
	}
	IndexOrDocValuesQuery that = (IndexOrDocValuesQuery) obj;
	return indexQuery.equals(that.indexQuery) && dvQuery.equals(that.dvQuery);
	}

	@Override
	public int hashCode() {
	int h = classHash();
	h = 31 * h + indexQuery.hashCode();
	h = 31 * h + dvQuery.hashCode();
	return h;
	}

	@Override
	public Query rewrite(IndexReader reader) throws IOException {
	Query indexRewrite = indexQuery.rewrite(reader);
	Query dvRewrite = dvQuery.rewrite(reader);
	if (indexQuery != indexRewrite \|\| dvQuery != dvRewrite) {
	return new IndexOrDocValuesQuery(indexRewrite, dvRewrite);
	}
	return this;
	}

	@Override
	public void visit(QueryVisitor visitor) {
	QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
	indexQuery.visit(v);
	dvQuery.visit(v);
	}

	@Override
	public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
	final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost);
	final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost);
	return new Weight(this) {
	@Override
	public void extractTerms(Set<Term> terms) {
	indexWeight.extractTerms(terms);
	}

	@Override
	public Matches matches(LeafReaderContext context, int doc) throws IOException {
	// We need to check a single doc, so the dv query should perform better
	return dvWeight.matches(context, doc);
	}

	@Override
	public Explanation explain(LeafReaderContext context, int doc) throws IOException {
	// We need to check a single doc, so the dv query should perform better
	return dvWeight.explain(context, doc);
	}

	@Override
	public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
	// Bulk scorers need to consume the entire set of docs, so using an
	// index structure should perform better
	return indexWeight.bulkScorer(context);
	}

	@Override
	public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
	final ScorerSupplier indexScorerSupplier = indexWeight.scorerSupplier(context);
	final ScorerSupplier dvScorerSupplier = dvWeight.scorerSupplier(context);
	if (indexScorerSupplier == null \|\| dvScorerSupplier == null) {
	return null;
	}
	return new ScorerSupplier() {
	@Override
	public Scorer get(long leadCost) throws IOException {
	// At equal costs, doc values tend to be worse than points since they
	// still need to perform one comparison per document while points can
	// do much better than that given how values are organized. So we give
	// an arbitrary 8x penalty to doc values.
	final long threshold = cost() >>> 3;
	if (threshold <= leadCost) {
	return indexScorerSupplier.get(leadCost);
	} else {
	return dvScorerSupplier.get(leadCost);
	}
	}

	@Override
	public long cost() {
	return indexScorerSupplier.cost();
	}
	};
	}

	@Override
	public Scorer scorer(LeafReaderContext context) throws IOException {
	ScorerSupplier scorerSupplier = scorerSupplier(context);
	if (scorerSupplier == null) {
	return null;
	}
	return scorerSupplier.get(Long.MAX_VALUE);
	}

	@Override
	public boolean isCacheable(LeafReaderContext ctx) {
	// Both index and dv query should return the same values, so we can use
	// the index query's cachehelper here
	return indexWeight.isCacheable(ctx);
	}

	};
	}

	}