blob: 445988d7e14208521539344e4dcd31c1eb2ffded [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.intervals;
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterMatchesIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
/**
* A query that retrieves documents containing intervals returned from an
* {@link IntervalsSource}
*
* Static constructor functions for various different sources can be found in the
* {@link Intervals} class
*
* Scores for this query are computed as a function of the sloppy frequency of
* intervals appearing in a particular document. Sloppy frequency is calculated
* from the number of matching intervals, and their width, with wider intervals
* contributing lower values. The scores can be adjusted with two optional
* parameters:
* <ul>
* <li>pivot - the sloppy frequency value at which the overall score of the
* document will equal 0.5. The default value is 1</li>
* <li>exp - higher values of this parameter make the function grow more slowly
* below the pivot and faster higher than the pivot. The default value is 1</li>
* </ul>
*
* Optimal values for both pivot and exp depend on the type of queries and corpus of
* documents being queried.
*
* Scores are bounded to between 0 and 1. For higher contributions, wrap the query
* in a {@link org.apache.lucene.search.BoostQuery}
*/
public final class IntervalQuery extends Query {
private final String field;
private final IntervalsSource intervalsSource;
private final IntervalScoreFunction scoreFunction;
/**
* Create a new IntervalQuery
* @param field the field to query
* @param intervalsSource an {@link IntervalsSource} to retrieve intervals from
*/
public IntervalQuery(String field, IntervalsSource intervalsSource) {
this(field, intervalsSource, IntervalScoreFunction.saturationFunction(1));
}
/**
* Create a new IntervalQuery with a scoring pivot
*
* @param field the field to query
* @param intervalsSource an {@link IntervalsSource} to retrieve intervals from
* @param pivot the sloppy frequency value at which the score will be 0.5, must be within (0, +Infinity)
*/
public IntervalQuery(String field, IntervalsSource intervalsSource, float pivot) {
this(field, intervalsSource, IntervalScoreFunction.saturationFunction(pivot));
}
/**
* Create a new IntervalQuery with a scoring pivot and exponent
* @param field the field to query
* @param intervalsSource an {@link IntervalsSource} to retrieve intervals from
* @param pivot the sloppy frequency value at which the score will be 0.5, must be within (0, +Infinity)
* @param exp exponent, higher values make the function grow slower before 'pivot' and faster
* after 'pivot', must be in (0, +Infinity)
*/
public IntervalQuery(String field, IntervalsSource intervalsSource, float pivot, float exp) {
this(field, intervalsSource, IntervalScoreFunction.sigmoidFunction(pivot, exp));
}
private IntervalQuery(String field, IntervalsSource intervalsSource, IntervalScoreFunction scoreFunction) {
Objects.requireNonNull(field, "null field aren't accepted");
Objects.requireNonNull(intervalsSource, "null intervalsSource aren't accepted");
Objects.requireNonNull(scoreFunction, "null scoreFunction aren't accepted");
this.field = field;
this.intervalsSource = intervalsSource;
this.scoreFunction = scoreFunction;
}
/**
* The field to query
*/
public String getField() {
return field;
}
@Override
public String toString(String field) {
return (!getField().equals(field) ? getField() + ":" : "") + intervalsSource.toString();
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new IntervalWeight(this, boost, scoreMode);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
intervalsSource.visit(field, visitor);
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
IntervalQuery that = (IntervalQuery) o;
return Objects.equals(field, that.field) &&
Objects.equals(intervalsSource, that.intervalsSource);
}
@Override
public int hashCode() {
return Objects.hash(field, intervalsSource);
}
private class IntervalWeight extends Weight {
final ScoreMode scoreMode;
final float boost;
public IntervalWeight(Query query, float boost, ScoreMode scoreMode) {
super(query);
this.scoreMode = scoreMode;
this.boost = boost;
}
@Override
public void extractTerms(Set<Term> terms) {
intervalsSource.visit(field, QueryVisitor.termCollector(terms));
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
IntervalScorer scorer = (IntervalScorer) scorer(context);
if (scorer != null) {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
return scoreFunction.explain(IntervalQuery.this.toString(), boost, freq);
}
}
return Explanation.noMatch("no matching intervals");
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
return MatchesUtils.forField(field, () -> {
MatchesIterator mi = intervalsSource.matches(field, context, doc);
if (mi == null) {
return null;
}
return new FilterMatchesIterator(mi) {
@Override
public Query getQuery() {
return new IntervalQuery(field, intervalsSource);
}
};
});
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
IntervalIterator intervals = intervalsSource.intervals(field, context);
if (intervals == null)
return null;
return new IntervalScorer(this, intervals, intervalsSource.minExtent(), boost, scoreFunction);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
}
}