| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.uhighlight; |
| |
| |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.RamUsageEstimator; |
| |
| /** |
| * Represents a passage (typically a sentence of the document). |
| * <p> |
| * A passage contains {@link #getNumMatches} highlights from the query, |
| * and the offsets and query terms that correspond with each match. |
| * |
| * @lucene.experimental |
| */ |
| public class Passage { |
| private int startOffset = -1; |
| private int endOffset = -1; |
| private float score = 0.0f; |
| |
| private int[] matchStarts = new int[8]; |
| private int[] matchEnds = new int[8]; |
| private BytesRef[] matchTerms = new BytesRef[8]; |
| private int[] matchTermFreqInDoc = new int[8]; |
| private int numMatches = 0; |
| |
| /** @lucene.internal */ |
| public void addMatch(int startOffset, int endOffset, BytesRef term, int termFreqInDoc) { |
| assert startOffset >= this.startOffset && startOffset <= this.endOffset; |
| if (numMatches == matchStarts.length) { |
| int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); |
| int newMatchStarts[] = new int[newLength]; |
| int newMatchEnds[] = new int[newLength]; |
| int newMatchTermFreqInDoc[] = new int[newLength]; |
| BytesRef newMatchTerms[] = new BytesRef[newLength]; |
| System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches); |
| System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches); |
| System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches); |
| System.arraycopy(matchTermFreqInDoc, 0, newMatchTermFreqInDoc, 0, numMatches); |
| matchStarts = newMatchStarts; |
| matchEnds = newMatchEnds; |
| matchTerms = newMatchTerms; |
| matchTermFreqInDoc = newMatchTermFreqInDoc; |
| } |
| assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length; |
| matchStarts[numMatches] = startOffset; |
| matchEnds[numMatches] = endOffset; |
| matchTerms[numMatches] = term; |
| matchTermFreqInDoc[numMatches] = termFreqInDoc; |
| numMatches++; |
| } |
| |
| /** @lucene.internal */ |
| public void reset() { |
| startOffset = endOffset = -1; |
| score = 0.0f; |
| numMatches = 0; |
| } |
| |
| /** For debugging. ex: Passage[0-22]{yin[0-3],yang[4-8],yin[10-13]}score=2.4964213 */ |
| @Override |
| public String toString() { |
| StringBuilder buf = new StringBuilder(); |
| buf.append("Passage[").append(startOffset).append('-').append(endOffset).append(']'); |
| buf.append('{'); |
| for (int i = 0; i < numMatches; i++) { |
| if (i != 0) { |
| buf.append(','); |
| } |
| buf.append(matchTerms[i].utf8ToString()); |
| buf.append('[').append(matchStarts[i] - startOffset).append('-').append(matchEnds[i] - startOffset).append(']'); |
| } |
| buf.append('}'); |
| buf.append("score=").append(score); |
| return buf.toString(); |
| } |
| |
| /** |
| * Start offset of this passage. |
| * |
| * @return start index (inclusive) of the passage in the |
| * original content: always >= 0. |
| */ |
| public int getStartOffset() { |
| return startOffset; |
| } |
| |
| /** |
| * End offset of this passage. |
| * |
| * @return end index (exclusive) of the passage in the |
| * original content: always >= {@link #getStartOffset()} |
| */ |
| public int getEndOffset() { |
| return endOffset; |
| } |
| |
| public int getLength() { |
| return endOffset - startOffset; |
| } |
| |
| /** |
| * Passage's score. |
| */ |
| public float getScore() { |
| return score; |
| } |
| |
| public void setScore(float score) { |
| this.score = score; |
| } |
| |
| /** |
| * Number of term matches available in |
| * {@link #getMatchStarts}, {@link #getMatchEnds}, |
| * {@link #getMatchTerms} |
| */ |
| public int getNumMatches() { |
| return numMatches; |
| } |
| |
| /** |
| * Start offsets of the term matches, in increasing order. |
| * <p> |
| * Only {@link #getNumMatches} are valid. Note that these |
| * offsets are absolute (not relative to {@link #getStartOffset()}). |
| */ |
| public int[] getMatchStarts() { |
| return matchStarts; |
| } |
| |
| /** |
| * End offsets of the term matches, corresponding with {@link #getMatchStarts}. |
| * <p> |
| * Only {@link #getNumMatches} are valid. Note that its possible that an end offset |
| * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the |
| * Analyzer produced a term which spans a passage boundary. |
| */ |
| public int[] getMatchEnds() { |
| return matchEnds; |
| } |
| |
| /** |
| * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}. The primary purpose of this |
| * method is to expose the number of unique terms per passage for use in passage scoring. |
| * The actual term byte content is not well defined by this highlighter, and thus use of it is more subject to |
| * change. |
| * <p> |
| * The term might be simply the analyzed term at this position. |
| * Depending on the highlighter's configuration, the match term may be a phrase (instead of a word), and in such |
| * a case might be a series of space-separated analyzed terms. |
| * If the match is from a {@link org.apache.lucene.search.MultiTermQuery} then the match term may be the toString() of |
| * that query. |
| * <p> |
| * Only {@link #getNumMatches()} are valid. |
| */ |
| public BytesRef[] getMatchTerms() { |
| return matchTerms; |
| } |
| |
| public int[] getMatchTermFreqsInDoc() { |
| return matchTermFreqInDoc; |
| } |
| |
| /** @lucene.internal */ |
| public void setStartOffset(int startOffset) { |
| this.startOffset = startOffset; |
| } |
| |
| /** @lucene.internal */ |
| public void setEndOffset(int endOffset) { |
| assert startOffset <= endOffset; |
| this.endOffset = endOffset; |
| } |
| |
| } |