blob: f9787fd1e4fbb5907197f8e2322c47ff3a8ae2a6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
/**
* Expert-only. Public for use by other weight implementations
*/
public abstract class SpanWeight extends Weight {
/**
* Enumeration defining what postings information should be retrieved from the
* index for a given Spans
*/
public enum Postings {
POSITIONS {
@Override
public int getRequiredPostings() {
return PostingsEnum.POSITIONS;
}
},
PAYLOADS {
@Override
public int getRequiredPostings() {
return PostingsEnum.PAYLOADS;
}
},
OFFSETS {
@Override
public int getRequiredPostings() {
return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
}
};
public abstract int getRequiredPostings();
public Postings atLeast(Postings postings) {
if (postings.compareTo(this) > 0)
return postings;
return this;
}
}
protected final Similarity similarity;
protected final Similarity.SimScorer simScorer;
protected final String field;
/**
* Create a new SpanWeight
* @param query the parent query
* @param searcher the IndexSearcher to query against
* @param termStates a map of terms to {@link TermStates} for use in building the similarity. May
* be null if scores are not required
* @throws IOException on error
*/
public SpanWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
super(query);
this.field = query.getField();
this.similarity = searcher.getSimilarity();
this.simScorer = buildSimWeight(query, searcher, termStates, boost);
}
private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
if (termStates == null || termStates.size() == 0 || query.getField() == null)
return null;
TermStatistics[] termStats = new TermStatistics[termStates.size()];
int termUpTo = 0;
for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
TermStates ts = entry.getValue();
if (ts.docFreq() > 0) {
termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
}
}
CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
if (termUpTo > 0) {
return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
} else {
return null; // no terms at all exist, we won't use similarity
}
}
/**
* Collect all TermStates used by this Weight
* @param contexts a map to add the TermStates to
*/
public abstract void extractTermStates(Map<Term, TermStates> contexts);
/**
* Expert: Return a Spans object iterating over matches from this Weight
* @param ctx a LeafReaderContext for this Spans
* @return a Spans
* @throws IOException on error
*/
public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException;
@Override
public SpanScorer scorer(LeafReaderContext context) throws IOException {
final Spans spans = getSpans(context, Postings.POSITIONS);
if (spans == null) {
return null;
}
final LeafSimScorer docScorer = getSimScorer(context);
return new SpanScorer(this, spans, docScorer);
}
/**
* Return a LeafSimScorer for this context
* @param context the LeafReaderContext
* @return a SimWeight
* @throws IOException on error
*/
public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SpanScorer scorer = scorer(context);
if (scorer != null) {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
if (simScorer != null) {
float freq = scorer.sloppyFreq();
LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
return Explanation.match(scoreExplanation.getValue(),
"weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:",
scoreExplanation);
} else {
// simScorer won't be set when scoring isn't needed
return Explanation.match(0f, String.format(Locale.ROOT,
"match %s in %s without score", getQuery(), doc));
}
}
}
return Explanation.noMatch("no matching term");
}
private static class TermMatch {
Term term;
int position;
int startOffset;
int endOffset;
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
return MatchesUtils.forField(field, () -> {
Spans spans = getSpans(context, Postings.OFFSETS);
if (spans == null || spans.advance(doc) != doc) {
return null;
}
return new MatchesIterator() {
int innerTermCount = 0;
TermMatch[] innerTerms = new TermMatch[0];
SpanCollector termCollector = new SpanCollector() {
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
innerTermCount++;
if (innerTermCount > innerTerms.length) {
TermMatch[] temp = new TermMatch[innerTermCount];
System.arraycopy(innerTerms, 0, temp, 0, innerTermCount - 1);
innerTerms = temp;
innerTerms[innerTermCount - 1] = new TermMatch();
}
innerTerms[innerTermCount - 1].term = term;
innerTerms[innerTermCount - 1].position = position;
innerTerms[innerTermCount - 1].startOffset = postings.startOffset();
innerTerms[innerTermCount - 1].endOffset = postings.endOffset();
}
@Override
public void reset() {
innerTermCount = 0;
}
};
@Override
public boolean next() throws IOException {
innerTermCount = 0;
return spans.nextStartPosition() != Spans.NO_MORE_POSITIONS;
}
@Override
public int startPosition() {
return spans.startPosition();
}
@Override
public int endPosition() {
return spans.endPosition() - 1;
}
@Override
public int startOffset() throws IOException {
if (innerTermCount == 0) {
collectInnerTerms();
}
return innerTerms[0].startOffset;
}
@Override
public int endOffset() throws IOException {
if (innerTermCount == 0) {
collectInnerTerms();
}
return innerTerms[innerTermCount - 1].endOffset;
}
@Override
public MatchesIterator getSubMatches() throws IOException {
if (innerTermCount == 0) {
collectInnerTerms();
}
return new MatchesIterator() {
int upto = -1;
@Override
public boolean next() throws IOException {
upto++;
return upto < innerTermCount;
}
@Override
public int startPosition() {
return innerTerms[upto].position;
}
@Override
public int endPosition() {
return innerTerms[upto].position;
}
@Override
public int startOffset() throws IOException {
return innerTerms[upto].startOffset;
}
@Override
public int endOffset() throws IOException {
return innerTerms[upto].endOffset;
}
@Override
public MatchesIterator getSubMatches() throws IOException {
return null;
}
@Override
public Query getQuery() {
return new TermQuery(innerTerms[upto].term);
}
};
}
@Override
public Query getQuery() {
return SpanWeight.this.getQuery();
}
void collectInnerTerms() throws IOException {
termCollector.reset();
spans.collect(termCollector);
Arrays.sort(innerTerms, 0, innerTermCount, Comparator.comparing(a -> a.position));
}
};
});
}
}