blob: 73997ae7d27adcaacd7330d8c4488e0deaf655fe [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Encoder;
/** Another highlighter implementation. */
public class FastVectorHighlighter {
public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true;
public static final boolean DEFAULT_FIELD_MATCH = true;
protected final boolean phraseHighlight;
protected final boolean fieldMatch;
private final FragListBuilder fragListBuilder;
private final FragmentsBuilder fragmentsBuilder;
private int phraseLimit = Integer.MAX_VALUE;
/** the default constructor. */
public FastVectorHighlighter() {
this(DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH);
}
/**
* a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}.
*
* @param phraseHighlight true or false for phrase highlighting
* @param fieldMatch true of false for field matching
*/
public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch) {
this(
phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder());
}
/**
* a constructor. A {@link FragListBuilder} and a {@link FragmentsBuilder} can be specified
* (plugins).
*
* @param phraseHighlight true of false for phrase highlighting
* @param fieldMatch true of false for field matching
* @param fragListBuilder an instance of {@link FragListBuilder}
* @param fragmentsBuilder an instance of {@link FragmentsBuilder}
*/
public FastVectorHighlighter(
boolean phraseHighlight,
boolean fieldMatch,
FragListBuilder fragListBuilder,
FragmentsBuilder fragmentsBuilder) {
this.phraseHighlight = phraseHighlight;
this.fieldMatch = fieldMatch;
this.fragListBuilder = fragListBuilder;
this.fragmentsBuilder = fragmentsBuilder;
}
/**
* create a {@link FieldQuery} object.
*
* @param query a query
* @return the created {@link FieldQuery} object
*/
public FieldQuery getFieldQuery(Query query) {
// TODO: should we deprecate this?
// because if there is no reader, then we cannot rewrite MTQ.
try {
return getFieldQuery(query, null);
} catch (IOException e) {
// should never be thrown when reader is null
throw new RuntimeException(e);
}
}
/**
* create a {@link FieldQuery} object.
*
* @param query a query
* @return the created {@link FieldQuery} object
*/
public FieldQuery getFieldQuery(Query query, IndexReader reader) throws IOException {
return new FieldQuery(query, reader, phraseHighlight, fieldMatch);
}
/**
* return the best fragment.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @return the best fragment (snippet) string
* @throws IOException If there is a low-level I/O error
*/
public final String getBestFragment(
final FieldQuery fieldQuery,
IndexReader reader,
int docId,
String fieldName,
int fragCharSize)
throws IOException {
FieldFragList fieldFragList =
getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.createFragment(reader, docId, fieldName, fieldFragList);
}
/**
* return the best fragments.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @param maxNumFragments maximum number of fragments
* @return created fragments or null when no fragments created. size of the array can be less than
* maxNumFragments
* @throws IOException If there is a low-level I/O error
*/
public final String[] getBestFragments(
final FieldQuery fieldQuery,
IndexReader reader,
int docId,
String fieldName,
int fragCharSize,
int maxNumFragments)
throws IOException {
FieldFragList fieldFragList =
getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.createFragments(
reader, docId, fieldName, fieldFragList, maxNumFragments);
}
/**
* return the best fragment.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @param fragListBuilder {@link FragListBuilder} object
* @param fragmentsBuilder {@link FragmentsBuilder} object
* @param preTags pre-tags to be used to highlight terms
* @param postTags post-tags to be used to highlight terms
* @param encoder an encoder that generates encoded text
* @return the best fragment (snippet) string
* @throws IOException If there is a low-level I/O error
*/
public final String getBestFragment(
final FieldQuery fieldQuery,
IndexReader reader,
int docId,
String fieldName,
int fragCharSize,
FragListBuilder fragListBuilder,
FragmentsBuilder fragmentsBuilder,
String[] preTags,
String[] postTags,
Encoder encoder)
throws IOException {
FieldFragList fieldFragList =
getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.createFragment(
reader, docId, fieldName, fieldFragList, preTags, postTags, encoder);
}
/**
* return the best fragments.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @param maxNumFragments maximum number of fragments
* @param fragListBuilder {@link FragListBuilder} object
* @param fragmentsBuilder {@link FragmentsBuilder} object
* @param preTags pre-tags to be used to highlight terms
* @param postTags post-tags to be used to highlight terms
* @param encoder an encoder that generates encoded text
* @return created fragments or null when no fragments created. size of the array can be less than
* maxNumFragments
* @throws IOException If there is a low-level I/O error
*/
public final String[] getBestFragments(
final FieldQuery fieldQuery,
IndexReader reader,
int docId,
String fieldName,
int fragCharSize,
int maxNumFragments,
FragListBuilder fragListBuilder,
FragmentsBuilder fragmentsBuilder,
String[] preTags,
String[] postTags,
Encoder encoder)
throws IOException {
FieldFragList fieldFragList =
getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.createFragments(
reader, docId, fieldName, fieldFragList, maxNumFragments, preTags, postTags, encoder);
}
/**
* Return the best fragments. Matches are scanned from matchedFields and turned into fragments
* against storedField. The highlighting may not make sense if matchedFields has matches with
* offsets that don't correspond features in storedField. It will outright throw a {@code
* StringIndexOutOfBoundsException} if matchedFields produces offsets outside of storedField. As
* such it is advisable that all matchedFields share the same source as storedField or are at
* least a prefix of it.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param storedField field of the document that stores the text
* @param matchedFields fields of the document to scan for matches
* @param fragCharSize the length (number of chars) of a fragment
* @param maxNumFragments maximum number of fragments
* @param fragListBuilder {@link FragListBuilder} object
* @param fragmentsBuilder {@link FragmentsBuilder} object
* @param preTags pre-tags to be used to highlight terms
* @param postTags post-tags to be used to highlight terms
* @param encoder an encoder that generates encoded text
* @return created fragments or null when no fragments created. size of the array can be less than
* maxNumFragments
* @throws IOException If there is a low-level I/O error
*/
public final String[] getBestFragments(
final FieldQuery fieldQuery,
IndexReader reader,
int docId,
String storedField,
Set<String> matchedFields,
int fragCharSize,
int maxNumFragments,
FragListBuilder fragListBuilder,
FragmentsBuilder fragmentsBuilder,
String[] preTags,
String[] postTags,
Encoder encoder)
throws IOException {
FieldFragList fieldFragList =
getFieldFragList(fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize);
return fragmentsBuilder.createFragments(
reader, docId, storedField, fieldFragList, maxNumFragments, preTags, postTags, encoder);
}
/** Build a FieldFragList for one field. */
private FieldFragList getFieldFragList(
FragListBuilder fragListBuilder,
final FieldQuery fieldQuery,
IndexReader reader,
int docId,
String matchedField,
int fragCharSize)
throws IOException {
FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, matchedField, fieldQuery);
FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit);
return fragListBuilder.createFieldFragList(fieldPhraseList, fragCharSize);
}
/** Build a FieldFragList for more than one field. */
private FieldFragList getFieldFragList(
FragListBuilder fragListBuilder,
final FieldQuery fieldQuery,
IndexReader reader,
int docId,
Set<String> matchedFields,
int fragCharSize)
throws IOException {
Iterator<String> matchedFieldsItr = matchedFields.iterator();
if (!matchedFieldsItr.hasNext()) {
throw new IllegalArgumentException("matchedFields must contain at least on field name.");
}
FieldPhraseList[] toMerge = new FieldPhraseList[matchedFields.size()];
int i = 0;
while (matchedFieldsItr.hasNext()) {
FieldTermStack stack = new FieldTermStack(reader, docId, matchedFieldsItr.next(), fieldQuery);
toMerge[i++] = new FieldPhraseList(stack, fieldQuery, phraseLimit);
}
return fragListBuilder.createFieldFragList(new FieldPhraseList(toMerge), fragCharSize);
}
/**
* return whether phraseHighlight or not.
*
* @return whether phraseHighlight or not
*/
public boolean isPhraseHighlight() {
return phraseHighlight;
}
/**
* return whether fieldMatch or not.
*
* @return whether fieldMatch or not
*/
public boolean isFieldMatch() {
return fieldMatch;
}
/**
* @return the maximum number of phrases to analyze when searching for the highest-scoring phrase.
*/
public int getPhraseLimit() {
return phraseLimit;
}
/**
* set the maximum number of phrases to analyze when searching for the highest-scoring phrase. The
* default is unlimited (Integer.MAX_VALUE).
*/
public void setPhraseLimit(int phraseLimit) {
this.phraseLimit = phraseLimit;
}
}