lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search.spans;


 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Locale;
 import java.util.Map;

 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermStates;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LeafSimScorer;
 import org.apache.lucene.search.Matches;
 import org.apache.lucene.search.MatchesIterator;
 import org.apache.lucene.search.MatchesUtils;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.ArrayUtil;

 /**
  * Expert-only.  Public for use by other weight implementations
  */
 public abstract class SpanWeight extends Weight {

   /**
    * Enumeration defining what postings information should be retrieved from the
    * index for a given Spans
    */
   public enum Postings {
     POSITIONS {
       @Override
       public int getRequiredPostings() {
         return PostingsEnum.POSITIONS;
       }
     },
     PAYLOADS {
       @Override
       public int getRequiredPostings() {
         return PostingsEnum.PAYLOADS;
       }
     },
     OFFSETS {
       @Override
       public int getRequiredPostings() {
         return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
       }
     };

     public abstract int getRequiredPostings();

     public Postings atLeast(Postings postings) {
       if (postings.compareTo(this) > 0)
         return postings;
       return this;
     }
   }

   protected final Similarity similarity;
   protected final Similarity.SimScorer simScorer;
   protected final String field;

   /**
    * Create a new SpanWeight
    * @param query the parent query
    * @param searcher the IndexSearcher to query against
    * @param termStates a map of terms to {@link TermStates} for use in building the similarity.  May
    *                     be null if scores are not required
    * @throws IOException on error
    */
   public SpanWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
     super(query);
     this.field = query.getField();
     this.similarity = searcher.getSimilarity();
     this.simScorer = buildSimWeight(query, searcher, termStates, boost);
   }

   private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
     if (termStates == null || termStates.size() == 0 || query.getField() == null)
       return null;
     TermStatistics[] termStats = new TermStatistics[termStates.size()];
     int termUpTo = 0;
     for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
       TermStates ts = entry.getValue();
       if (ts.docFreq() > 0) {
         termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
       }
     }
     CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
     if (termUpTo > 0) {
       return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
     } else {
       return null; // no terms at all exist, we won't use similarity
     }
   }

   /**
    * Collect all TermStates used by this Weight
    * @param contexts a map to add the TermStates to
    */
   public abstract void extractTermStates(Map<Term, TermStates> contexts);

   /**
    * Expert: Return a Spans object iterating over matches from this Weight
    * @param ctx a LeafReaderContext for this Spans
    * @return a Spans
    * @throws IOException on error
    */
   public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException;

   @Override
   public SpanScorer scorer(LeafReaderContext context) throws IOException {
     final Spans spans = getSpans(context, Postings.POSITIONS);
     if (spans == null) {
       return null;
     }
     final LeafSimScorer docScorer = getSimScorer(context);
     return new SpanScorer(this, spans, docScorer);
   }

   /**
    * Return a LeafSimScorer for this context
    * @param context the LeafReaderContext
    * @return a SimWeight
    * @throws IOException on error
    */
   public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
     return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
   }

   @Override
   public Explanation explain(LeafReaderContext context, int doc) throws IOException {
     SpanScorer scorer = scorer(context);
     if (scorer != null) {
       int newDoc = scorer.iterator().advance(doc);
       if (newDoc == doc) {
         if (simScorer != null) {
           float freq = scorer.sloppyFreq();
           LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
           Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
           Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
           return Explanation.match(scoreExplanation.getValue(),
               "weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:",
               scoreExplanation);
         } else {
           // simScorer won't be set when scoring isn't needed
           return Explanation.match(0f, String.format(Locale.ROOT,
               "match %s in %s without score", getQuery(), doc));
         }
       }
     }

     return Explanation.noMatch("no matching term");
   }

   private static class TermMatch {
     Term term;
     int position;
     int startOffset;
     int endOffset;
   }

   @Override
   public Matches matches(LeafReaderContext context, int doc) throws IOException {
     return MatchesUtils.forField(field, () -> {
       Spans spans = getSpans(context, Postings.OFFSETS);
       if (spans == null || spans.advance(doc) != doc) {
         return null;
       }
       return new MatchesIterator() {

         int innerTermCount = 0;
         TermMatch[] innerTerms = new TermMatch[0];

         SpanCollector termCollector = new SpanCollector() {
           @Override
           public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
             innerTermCount++;
             if (innerTermCount > innerTerms.length) {
               TermMatch[] temp = new TermMatch[innerTermCount];
               System.arraycopy(innerTerms, 0, temp, 0, innerTermCount - 1);
               innerTerms = temp;
               innerTerms[innerTermCount - 1] = new TermMatch();
             }
             innerTerms[innerTermCount - 1].term = term;
             innerTerms[innerTermCount - 1].position = position;
             innerTerms[innerTermCount - 1].startOffset = postings.startOffset();
             innerTerms[innerTermCount - 1].endOffset = postings.endOffset();
           }

           @Override
           public void reset() {
             innerTermCount = 0;
           }
         };

         @Override
         public boolean next() throws IOException {
           innerTermCount = 0;
           return spans.nextStartPosition() != Spans.NO_MORE_POSITIONS;
         }

         @Override
         public int startPosition() {
           return spans.startPosition();
         }

         @Override
         public int endPosition() {
           return spans.endPosition() - 1;
         }

         @Override
         public int startOffset() throws IOException {
           if (innerTermCount == 0) {
             collectInnerTerms();
           }
           return innerTerms[0].startOffset;
         }

         @Override
         public int endOffset() throws IOException {
           if (innerTermCount == 0) {
             collectInnerTerms();
           }
           return innerTerms[innerTermCount - 1].endOffset;
         }

         @Override
         public MatchesIterator getSubMatches() throws IOException {
           if (innerTermCount == 0) {
             collectInnerTerms();
           }
           return new MatchesIterator() {

             int upto = -1;

             @Override
             public boolean next() throws IOException {
               upto++;
               return upto < innerTermCount;
             }

             @Override
             public int startPosition() {
               return innerTerms[upto].position;
             }

             @Override
             public int endPosition() {
               return innerTerms[upto].position;
             }

             @Override
             public int startOffset() throws IOException {
               return innerTerms[upto].startOffset;
             }

             @Override
             public int endOffset() throws IOException {
               return innerTerms[upto].endOffset;
             }

             @Override
             public MatchesIterator getSubMatches() throws IOException {
               return null;
             }

             @Override
             public Query getQuery() {
               return new TermQuery(innerTerms[upto].term);
             }
           };
         }

         @Override
         public Query getQuery() {
           return SpanWeight.this.getQuery();
         }

         void collectInnerTerms() throws IOException {
           termCollector.reset();
           spans.collect(termCollector);
           Arrays.sort(innerTerms, 0, innerTermCount, Comparator.comparing(a -> a.position));
         }
       };
     });
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.search.spans;


	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Comparator;
	import java.util.Locale;
	import java.util.Map;

	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.index.PostingsEnum;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.TermStates;
	import org.apache.lucene.search.CollectionStatistics;
	import org.apache.lucene.search.Explanation;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.LeafSimScorer;
	import org.apache.lucene.search.Matches;
	import org.apache.lucene.search.MatchesIterator;
	import org.apache.lucene.search.MatchesUtils;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.TermQuery;
	import org.apache.lucene.search.TermStatistics;
	import org.apache.lucene.search.Weight;
	import org.apache.lucene.search.similarities.Similarity;
	import org.apache.lucene.util.ArrayUtil;

	/**
	* Expert-only. Public for use by other weight implementations
	*/
	public abstract class SpanWeight extends Weight {

	/**
	* Enumeration defining what postings information should be retrieved from the
	* index for a given Spans
	*/
	public enum Postings {
	POSITIONS {
	@Override
	public int getRequiredPostings() {
	return PostingsEnum.POSITIONS;
	}
	},
	PAYLOADS {
	@Override
	public int getRequiredPostings() {
	return PostingsEnum.PAYLOADS;
	}
	},
	OFFSETS {
	@Override
	public int getRequiredPostings() {
	return PostingsEnum.PAYLOADS \| PostingsEnum.OFFSETS;
	}
	};

	public abstract int getRequiredPostings();

	public Postings atLeast(Postings postings) {
	if (postings.compareTo(this) > 0)
	return postings;
	return this;
	}
	}

	protected final Similarity similarity;
	protected final Similarity.SimScorer simScorer;
	protected final String field;

	/**
	* Create a new SpanWeight
	* @param query the parent query
	* @param searcher the IndexSearcher to query against
	* @param termStates a map of terms to {@link TermStates} for use in building the similarity. May
	* be null if scores are not required
	* @throws IOException on error
	*/
	public SpanWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
	super(query);
	this.field = query.getField();
	this.similarity = searcher.getSimilarity();
	this.simScorer = buildSimWeight(query, searcher, termStates, boost);
	}

	private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
	if (termStates == null \|\| termStates.size() == 0 \|\| query.getField() == null)
	return null;
	TermStatistics[] termStats = new TermStatistics[termStates.size()];
	int termUpTo = 0;
	for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
	TermStates ts = entry.getValue();
	if (ts.docFreq() > 0) {
	termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
	}
	}
	CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
	if (termUpTo > 0) {
	return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
	} else {
	return null; // no terms at all exist, we won't use similarity
	}
	}

	/**
	* Collect all TermStates used by this Weight
	* @param contexts a map to add the TermStates to
	*/
	public abstract void extractTermStates(Map<Term, TermStates> contexts);

	/**
	* Expert: Return a Spans object iterating over matches from this Weight
	* @param ctx a LeafReaderContext for this Spans
	* @return a Spans
	* @throws IOException on error
	*/
	public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException;

	@Override
	public SpanScorer scorer(LeafReaderContext context) throws IOException {
	final Spans spans = getSpans(context, Postings.POSITIONS);
	if (spans == null) {
	return null;
	}
	final LeafSimScorer docScorer = getSimScorer(context);
	return new SpanScorer(this, spans, docScorer);
	}

	/**
	* Return a LeafSimScorer for this context
	* @param context the LeafReaderContext
	* @return a SimWeight
	* @throws IOException on error
	*/
	public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
	return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
	}

	@Override
	public Explanation explain(LeafReaderContext context, int doc) throws IOException {
	SpanScorer scorer = scorer(context);
	if (scorer != null) {
	int newDoc = scorer.iterator().advance(doc);
	if (newDoc == doc) {
	if (simScorer != null) {
	float freq = scorer.sloppyFreq();
	LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
	Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
	Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
	return Explanation.match(scoreExplanation.getValue(),
	"weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:",
	scoreExplanation);
	} else {
	// simScorer won't be set when scoring isn't needed
	return Explanation.match(0f, String.format(Locale.ROOT,
	"match %s in %s without score", getQuery(), doc));
	}
	}
	}

	return Explanation.noMatch("no matching term");
	}

	private static class TermMatch {
	Term term;
	int position;
	int startOffset;
	int endOffset;
	}

	@Override
	public Matches matches(LeafReaderContext context, int doc) throws IOException {
	return MatchesUtils.forField(field, () -> {
	Spans spans = getSpans(context, Postings.OFFSETS);
	if (spans == null \|\| spans.advance(doc) != doc) {
	return null;
	}
	return new MatchesIterator() {

	int innerTermCount = 0;
	TermMatch[] innerTerms = new TermMatch[0];

	SpanCollector termCollector = new SpanCollector() {
	@Override
	public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
	innerTermCount++;
	if (innerTermCount > innerTerms.length) {
	TermMatch[] temp = new TermMatch[innerTermCount];
	System.arraycopy(innerTerms, 0, temp, 0, innerTermCount - 1);
	innerTerms = temp;
	innerTerms[innerTermCount - 1] = new TermMatch();
	}
	innerTerms[innerTermCount - 1].term = term;
	innerTerms[innerTermCount - 1].position = position;
	innerTerms[innerTermCount - 1].startOffset = postings.startOffset();
	innerTerms[innerTermCount - 1].endOffset = postings.endOffset();
	}

	@Override
	public void reset() {
	innerTermCount = 0;
	}
	};

	@Override
	public boolean next() throws IOException {
	innerTermCount = 0;
	return spans.nextStartPosition() != Spans.NO_MORE_POSITIONS;
	}

	@Override
	public int startPosition() {
	return spans.startPosition();
	}

	@Override
	public int endPosition() {
	return spans.endPosition() - 1;
	}

	@Override
	public int startOffset() throws IOException {
	if (innerTermCount == 0) {
	collectInnerTerms();
	}
	return innerTerms[0].startOffset;
	}

	@Override
	public int endOffset() throws IOException {
	if (innerTermCount == 0) {
	collectInnerTerms();
	}
	return innerTerms[innerTermCount - 1].endOffset;
	}

	@Override
	public MatchesIterator getSubMatches() throws IOException {
	if (innerTermCount == 0) {
	collectInnerTerms();
	}
	return new MatchesIterator() {

	int upto = -1;

	@Override
	public boolean next() throws IOException {
	upto++;
	return upto < innerTermCount;
	}

	@Override
	public int startPosition() {
	return innerTerms[upto].position;
	}

	@Override
	public int endPosition() {
	return innerTerms[upto].position;
	}

	@Override
	public int startOffset() throws IOException {
	return innerTerms[upto].startOffset;
	}

	@Override
	public int endOffset() throws IOException {
	return innerTerms[upto].endOffset;
	}

	@Override
	public MatchesIterator getSubMatches() throws IOException {
	return null;
	}

	@Override
	public Query getQuery() {
	return new TermQuery(innerTerms[upto].term);
	}
	};
	}

	@Override
	public Query getQuery() {
	return SpanWeight.this.getQuery();
	}

	void collectInnerTerms() throws IOException {
	termCollector.reset();
	spans.collect(termCollector);
	Arrays.sort(innerTerms, 0, innerTermCount, Comparator.comparing(a -> a.position));
	}
	};
	});
	}
	}