blob: ac7e9d0a3c4a4bbb26866de7a4b16d2702c54731 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Encoder;
/**
* Another highlighter implementation.
*
*/
public class FastVectorHighlighter {
public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true;
public static final boolean DEFAULT_FIELD_MATCH = true;
protected final boolean phraseHighlight;
protected final boolean fieldMatch;
private final FragListBuilder fragListBuilder;
private final FragmentsBuilder fragmentsBuilder;
private int phraseLimit = Integer.MAX_VALUE;
/**
* the default constructor.
*/
public FastVectorHighlighter(){
this( DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH );
}
/**
* a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}.
*
* @param phraseHighlight true or false for phrase highlighting
* @param fieldMatch true of false for field matching
*/
public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch ){
this( phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder() );
}
/**
* a constructor. A {@link FragListBuilder} and a {@link FragmentsBuilder} can be specified (plugins).
*
* @param phraseHighlight true of false for phrase highlighting
* @param fieldMatch true of false for field matching
* @param fragListBuilder an instance of {@link FragListBuilder}
* @param fragmentsBuilder an instance of {@link FragmentsBuilder}
*/
public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder ){
this.phraseHighlight = phraseHighlight;
this.fieldMatch = fieldMatch;
this.fragListBuilder = fragListBuilder;
this.fragmentsBuilder = fragmentsBuilder;
}
/**
* create a {@link FieldQuery} object.
*
* @param query a query
* @return the created {@link FieldQuery} object
*/
public FieldQuery getFieldQuery( Query query ) {
// TODO: should we deprecate this?
// because if there is no reader, then we cannot rewrite MTQ.
try {
return getFieldQuery(query, null);
} catch (IOException e) {
// should never be thrown when reader is null
throw new RuntimeException (e);
}
}
/**
* create a {@link FieldQuery} object.
*
* @param query a query
* @return the created {@link FieldQuery} object
*/
public FieldQuery getFieldQuery( Query query, IndexReader reader ) throws IOException {
return new FieldQuery( query, reader, phraseHighlight, fieldMatch );
}
/**
* return the best fragment.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @return the best fragment (snippet) string
* @throws IOException If there is a low-level I/O error
*/
public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize ) throws IOException {
FieldFragList fieldFragList =
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList );
}
/**
* return the best fragments.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @param maxNumFragments maximum number of fragments
* @return created fragments or null when no fragments created.
* size of the array can be less than maxNumFragments
* @throws IOException If there is a low-level I/O error
*/
public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize, int maxNumFragments ) throws IOException {
FieldFragList fieldFragList =
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments );
}
/**
* return the best fragment.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @param fragListBuilder {@link FragListBuilder} object
* @param fragmentsBuilder {@link FragmentsBuilder} object
* @param preTags pre-tags to be used to highlight terms
* @param postTags post-tags to be used to highlight terms
* @param encoder an encoder that generates encoded text
* @return the best fragment (snippet) string
* @throws IOException If there is a low-level I/O error
*/
public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder,
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
FieldFragList fieldFragList = getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList, preTags, postTags, encoder );
}
/**
* return the best fragments.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
* @param maxNumFragments maximum number of fragments
* @param fragListBuilder {@link FragListBuilder} object
* @param fragmentsBuilder {@link FragmentsBuilder} object
* @param preTags pre-tags to be used to highlight terms
* @param postTags post-tags to be used to highlight terms
* @param encoder an encoder that generates encoded text
* @return created fragments or null when no fragments created.
* size of the array can be less than maxNumFragments
* @throws IOException If there is a low-level I/O error
*/
public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize, int maxNumFragments,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder,
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
FieldFragList fieldFragList =
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
preTags, postTags, encoder );
}
/**
* Return the best fragments. Matches are scanned from matchedFields and turned into fragments against
* storedField. The highlighting may not make sense if matchedFields has matches with offsets that don't
* correspond features in storedField. It will outright throw a {@code StringIndexOutOfBoundsException}
* if matchedFields produces offsets outside of storedField. As such it is advisable that all
* matchedFields share the same source as storedField or are at least a prefix of it.
*
* @param fieldQuery {@link FieldQuery} object
* @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param storedField field of the document that stores the text
* @param matchedFields fields of the document to scan for matches
* @param fragCharSize the length (number of chars) of a fragment
* @param maxNumFragments maximum number of fragments
* @param fragListBuilder {@link FragListBuilder} object
* @param fragmentsBuilder {@link FragmentsBuilder} object
* @param preTags pre-tags to be used to highlight terms
* @param postTags post-tags to be used to highlight terms
* @param encoder an encoder that generates encoded text
* @return created fragments or null when no fragments created.
* size of the array can be less than maxNumFragments
* @throws IOException If there is a low-level I/O error
*/
public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
String storedField, Set< String > matchedFields, int fragCharSize, int maxNumFragments,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder,
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
FieldFragList fieldFragList =
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize );
return fragmentsBuilder.createFragments( reader, docId, storedField, fieldFragList, maxNumFragments,
preTags, postTags, encoder );
}
/**
* Build a FieldFragList for one field.
*/
private FieldFragList getFieldFragList( FragListBuilder fragListBuilder,
final FieldQuery fieldQuery, IndexReader reader, int docId,
String matchedField, int fragCharSize ) throws IOException {
FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, matchedField, fieldQuery );
FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery, phraseLimit );
return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize );
}
/**
* Build a FieldFragList for more than one field.
*/
private FieldFragList getFieldFragList( FragListBuilder fragListBuilder,
final FieldQuery fieldQuery, IndexReader reader, int docId,
Set< String > matchedFields, int fragCharSize ) throws IOException {
Iterator< String > matchedFieldsItr = matchedFields.iterator();
if ( !matchedFieldsItr.hasNext() ) {
throw new IllegalArgumentException( "matchedFields must contain at least on field name." );
}
FieldPhraseList[] toMerge = new FieldPhraseList[ matchedFields.size() ];
int i = 0;
while ( matchedFieldsItr.hasNext() ) {
FieldTermStack stack = new FieldTermStack( reader, docId, matchedFieldsItr.next(), fieldQuery );
toMerge[ i++ ] = new FieldPhraseList( stack, fieldQuery, phraseLimit );
}
return fragListBuilder.createFieldFragList( new FieldPhraseList( toMerge ), fragCharSize );
}
/**
* return whether phraseHighlight or not.
*
* @return whether phraseHighlight or not
*/
public boolean isPhraseHighlight(){ return phraseHighlight; }
/**
* return whether fieldMatch or not.
*
* @return whether fieldMatch or not
*/
public boolean isFieldMatch(){ return fieldMatch; }
/**
* @return the maximum number of phrases to analyze when searching for the highest-scoring phrase.
*/
public int getPhraseLimit () { return phraseLimit; }
/**
* set the maximum number of phrases to analyze when searching for the highest-scoring phrase.
* The default is unlimited (Integer.MAX_VALUE).
*/
public void setPhraseLimit (int phraseLimit) { this.phraseLimit = phraseLimit; }
}