| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.spans; |
| |
| |
| import java.io.IOException; |
| import java.util.Collections; |
| import java.util.Map; |
| import java.util.Objects; |
| import java.util.Set; |
| |
| import org.apache.lucene.index.IndexReaderContext; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.ReaderUtil; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.TermStates; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.QueryVisitor; |
| import org.apache.lucene.search.ScoreMode; |
| |
| /** Matches spans containing a term. |
| * This should not be used for terms that are indexed at position Integer.MAX_VALUE. |
| */ |
| public class SpanTermQuery extends SpanQuery { |
| |
| protected final Term term; |
| protected final TermStates termStates; |
| |
| /** Construct a SpanTermQuery matching the named term's spans. */ |
| public SpanTermQuery(Term term) { |
| this.term = Objects.requireNonNull(term); |
| this.termStates = null; |
| } |
| |
| /** |
| * Expert: Construct a SpanTermQuery matching the named term's spans, using |
| * the provided TermStates |
| */ |
| public SpanTermQuery(Term term, TermStates termStates) { |
| this.term = Objects.requireNonNull(term); |
| this.termStates = termStates; |
| } |
| |
| /** Return the term whose spans are matched. */ |
| public Term getTerm() { return term; } |
| |
| /** Returns the {@link TermStates} passed to the constructor, or null if it was not passed. |
| * |
| * @lucene.experimental */ |
| public TermStates getTermStates() { |
| return termStates; |
| } |
| |
| @Override |
| public String getField() { return term.field(); } |
| |
| @Override |
| public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { |
| final TermStates context; |
| final IndexReaderContext topContext = searcher.getTopReaderContext(); |
| if (termStates == null || termStates.wasBuiltFor(topContext) == false) { |
| context = TermStates.build(topContext, term, scoreMode.needsScores()); |
| } |
| else { |
| context = termStates; |
| } |
| return new SpanTermWeight(context, searcher, scoreMode.needsScores() ? Collections.singletonMap(term, context) : null, boost); |
| } |
| |
| @Override |
| public void visit(QueryVisitor visitor) { |
| if (visitor.acceptField(term.field())) { |
| visitor.consumeTerms(this, term); |
| } |
| } |
| |
| public class SpanTermWeight extends SpanWeight { |
| |
| final TermStates termStates; |
| |
| public SpanTermWeight(TermStates termStates, IndexSearcher searcher, Map<Term, TermStates> terms, float boost) throws IOException { |
| super(SpanTermQuery.this, searcher, terms, boost); |
| this.termStates = termStates; |
| assert termStates != null : "TermStates must not be null"; |
| } |
| |
| @Override |
| public void extractTerms(Set<Term> terms) { |
| terms.add(term); |
| } |
| |
| @Override |
| public boolean isCacheable(LeafReaderContext ctx) { |
| return true; |
| } |
| |
| @Override |
| public void extractTermStates(Map<Term, TermStates> contexts) { |
| contexts.put(term, termStates); |
| } |
| |
| @Override |
| public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { |
| |
| assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); |
| |
| final TermState state = termStates.get(context); |
| if (state == null) { // term is not present in that reader |
| assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term; |
| return null; |
| } |
| |
| final Terms terms = context.reader().terms(term.field()); |
| if (terms == null) |
| return null; |
| if (terms.hasPositions() == false) |
| throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); |
| |
| final TermsEnum termsEnum = terms.iterator(); |
| termsEnum.seekExact(term.bytes(), state); |
| |
| final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings()); |
| float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST; |
| return new TermSpans(getSimScorer(context), postings, term, positionsCost); |
| } |
| } |
| |
| /** A guess of |
| * the relative cost of dealing with the term positions |
| * when using a SpanNearQuery instead of a PhraseQuery. |
| */ |
| private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f; |
| |
| private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128; |
| |
| private static final int TERM_OPS_PER_POS = 7; |
| |
| /** Returns an expected cost in simple operations |
| * of processing the occurrences of a term |
| * in a document that contains the term. |
| * @param termsEnum The term is the term at which this TermsEnum is positioned. |
| * <p> |
| * This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost(). |
| * <br> |
| * TODO: keep only a single copy of this method and the constants used in it |
| * when SpanTermQuery moves to the o.a.l.search package. |
| */ |
| static float termPositionsCost(TermsEnum termsEnum) throws IOException { |
| int docFreq = termsEnum.docFreq(); |
| assert docFreq > 0; |
| long totalTermFreq = termsEnum.totalTermFreq(); |
| assert totalTermFreq > 0; |
| float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq; |
| return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; |
| } |
| |
| @Override |
| public String toString(String field) { |
| StringBuilder buffer = new StringBuilder(); |
| if (term.field().equals(field)) |
| buffer.append(term.text()); |
| else |
| buffer.append(term.toString()); |
| return buffer.toString(); |
| } |
| |
| @Override |
| public int hashCode() { |
| return classHash() ^ term.hashCode(); |
| } |
| |
| @Override |
| public boolean equals(Object other) { |
| return sameClassAs(other) && |
| term.equals(((SpanTermQuery) other).term); |
| } |
| |
| } |