blob: 2ae421b6bba4f5c4ec6a6105d544984c3ca8fbea [file] [log] [blame]
using Lucene.Net.Index;
using Lucene.Net.Search.Highlight;
using System;
using System.Collections.Generic;
using System.IO;
namespace Lucene.Net.Search.VectorHighlight
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Another highlighter implementation.
/// </summary>
public class FastVectorHighlighter
{
public static readonly bool DEFAULT_PHRASE_HIGHLIGHT = true;
public static readonly bool DEFAULT_FIELD_MATCH = true;
private readonly bool phraseHighlight;
private readonly bool fieldMatch;
private readonly IFragListBuilder fragListBuilder;
private readonly IFragmentsBuilder fragmentsBuilder;
private int phraseLimit = int.MaxValue;
/// <summary>
/// the default constructor.
/// </summary>
public FastVectorHighlighter()
: this(DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH)
{
}
/// <summary>
/// a constructor. Using <see cref="SimpleFragListBuilder"/> and <see cref="ScoreOrderFragmentsBuilder"/>.
/// </summary>
/// <param name="phraseHighlight">true or false for phrase highlighting</param>
/// <param name="fieldMatch">true of false for field matching</param>
public FastVectorHighlighter(bool phraseHighlight, bool fieldMatch)
: this(phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder())
{
}
/// <summary>
/// a constructor. A <see cref="IFragListBuilder"/> and a <see cref="IFragmentsBuilder"/> can be specified (plugins).
/// </summary>
/// <param name="phraseHighlight">true of false for phrase highlighting</param>
/// <param name="fieldMatch">true of false for field matching</param>
/// <param name="fragListBuilder">an instance of <see cref="IFragmentsBuilder"/></param>
/// <param name="fragmentsBuilder">an instance of <see cref="IFragmentsBuilder"/></param>
public FastVectorHighlighter(bool phraseHighlight, bool fieldMatch,
IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder)
{
this.phraseHighlight = phraseHighlight;
this.fieldMatch = fieldMatch;
this.fragListBuilder = fragListBuilder;
this.fragmentsBuilder = fragmentsBuilder;
}
/// <summary>
/// create a <see cref="FieldQuery"/> object.
/// </summary>
/// <param name="query">a query</param>
/// <returns>the created <see cref="FieldQuery"/> object</returns>
public virtual FieldQuery GetFieldQuery(Query query)
{
// TODO: should we deprecate this?
// because if there is no reader, then we cannot rewrite MTQ.
try
{
return new FieldQuery(query, null, phraseHighlight, fieldMatch);
}
catch (IOException e)
{
// should never be thrown when reader is null
throw new Exception(e.ToString(), e);
}
}
/// <summary>
/// create a <see cref="FieldQuery"/> object.
/// </summary>
/// <param name="query">a query</param>
/// <param name="reader"></param>
/// <returns>the created <see cref="FieldQuery"/> object</returns>
public virtual FieldQuery GetFieldQuery(Query query, IndexReader reader)
{
return new FieldQuery(query, reader, phraseHighlight, fieldMatch);
}
/// <summary>
/// return the best fragment.
/// </summary>
/// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
/// <param name="reader"><see cref="IndexReader"/> of the index</param>
/// <param name="docId">document id to be highlighted</param>
/// <param name="fieldName">field of the document to be highlighted</param>
/// <param name="fragCharSize">the length (number of chars) of a fragment</param>
/// <returns>the best fragment (snippet) string</returns>
/// <exception cref="IOException">If there is a low-level I/O error</exception>
public string GetBestFragment(FieldQuery fieldQuery, IndexReader reader, int docId,
string fieldName, int fragCharSize)
{
FieldFragList fieldFragList =
GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList);
}
/// <summary>
/// return the best fragments.
/// </summary>
/// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
/// <param name="reader"><see cref="IndexReader"/> of the index</param>
/// <param name="docId">document id to be highlighted</param>
/// <param name="fieldName">field of the document to be highlighted</param>
/// <param name="fragCharSize">the length (number of chars) of a fragment</param>
/// <param name="maxNumFragments">maximum number of fragments</param>
/// <returns>
/// created fragments or null when no fragments created.
/// size of the array can be less than maxNumFragments
/// </returns>
/// <exception cref="IOException">If there is a low-level I/O error</exception>
public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId,
string fieldName, int fragCharSize, int maxNumFragments)
{
FieldFragList fieldFragList =
GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.CreateFragments(reader, docId, fieldName, fieldFragList, maxNumFragments);
}
/// <summary>
/// return the best fragment.
/// </summary>
/// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
/// <param name="reader"><see cref="IndexReader"/> of the index</param>
/// <param name="docId">document id to be highlighted</param>
/// <param name="fieldName">field of the document to be highlighted</param>
/// <param name="fragCharSize">the length (number of chars) of a fragment</param>
/// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param>
/// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param>
/// <param name="preTags">pre-tags to be used to highlight terms</param>
/// <param name="postTags">post-tags to be used to highlight terms</param>
/// <param name="encoder">an encoder that generates encoded text</param>
/// <returns>the best fragment (snippet) string</returns>
/// <exception cref="IOException">If there is a low-level I/O error</exception>
public string GetBestFragment(FieldQuery fieldQuery, IndexReader reader, int docId,
string fieldName, int fragCharSize,
IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder,
string[] preTags, string[] postTags, IEncoder encoder)
{
FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, encoder);
}
/// <summary>
/// return the best fragments.
/// </summary>
/// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
/// <param name="reader"><see cref="IndexReader"/> of the index</param>
/// <param name="docId">document id to be highlighted</param>
/// <param name="fieldName">field of the document to be highlighted</param>
/// <param name="fragCharSize">the length (number of chars) of a fragment</param>
/// <param name="maxNumFragments">maximum number of fragments</param>
/// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param>
/// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param>
/// <param name="preTags">pre-tags to be used to highlight terms</param>
/// <param name="postTags">post-tags to be used to highlight terms</param>
/// <param name="encoder">an encoder that generates encoded text</param>
/// <returns>
/// created fragments or null when no fragments created.
/// size of the array can be less than maxNumFragments
/// </returns>
/// <exception cref="IOException">If there is a low-level I/O error</exception>
public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId,
string fieldName, int fragCharSize, int maxNumFragments,
IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder,
string[] preTags, string[] postTags, IEncoder encoder)
{
FieldFragList fieldFragList =
GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
return fragmentsBuilder.CreateFragments(reader, docId, fieldName, fieldFragList, maxNumFragments,
preTags, postTags, encoder);
}
/// <summary>
/// Return the best fragments. Matches are scanned from <paramref name="matchedFields"/> and turned into fragments against
/// <paramref name="storedField"/>. The highlighting may not make sense if <paramref name="matchedFields"/> has matches with offsets that don't
/// correspond features in <paramref name="storedField"/>. It will outright throw a <see cref="IndexOutOfRangeException"/>
/// if <paramref name="matchedFields"/> produces offsets outside of <paramref name="storedField"/>. As such it is advisable that all
/// <paramref name="matchedFields"/> share the same source as <paramref name="storedField"/> or are at least a prefix of it.
/// </summary>
/// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
/// <param name="reader"><see cref="IndexReader"/> of the index</param>
/// <param name="docId">document id to be highlighted</param>
/// <param name="storedField">field of the document that stores the text</param>
/// <param name="matchedFields">fields of the document to scan for matches</param>
/// <param name="fragCharSize">the length (number of chars) of a fragment</param>
/// <param name="maxNumFragments">maximum number of fragments</param>
/// <param name="fragListBuilder"><see cref="IFragListBuilder"/> object</param>
/// <param name="fragmentsBuilder"><see cref="IFragmentsBuilder"/> object</param>
/// <param name="preTags">pre-tags to be used to highlight terms</param>
/// <param name="postTags">post-tags to be used to highlight terms</param>
/// <param name="encoder">an encoder that generates encoded text</param>
/// <returns>
/// created fragments or null when no fragments created.
/// size of the array can be less than <paramref name="maxNumFragments"/>
/// </returns>
/// <exception cref="IOException">If there is a low-level I/O error</exception>
public string[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId,
string storedField, ISet<string> matchedFields, int fragCharSize, int maxNumFragments,
IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder,
string[] preTags, string[] postTags, IEncoder encoder)
{
FieldFragList fieldFragList =
GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize);
return fragmentsBuilder.CreateFragments(reader, docId, storedField, fieldFragList, maxNumFragments,
preTags, postTags, encoder);
}
/// <summary>
/// Build a <see cref="FieldFragList"/> for one field.
/// </summary>
private FieldFragList GetFieldFragList(IFragListBuilder fragListBuilder,
FieldQuery fieldQuery, IndexReader reader, int docId,
string matchedField, int fragCharSize)
{
FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, matchedField, fieldQuery);
FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit);
return fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize);
}
/// <summary>
/// Build a <see cref="FieldFragList"/> for more than one field.
/// </summary>
private FieldFragList GetFieldFragList(IFragListBuilder fragListBuilder,
FieldQuery fieldQuery, IndexReader reader, int docId,
ISet<string> matchedFields, int fragCharSize)
{
if (matchedFields.Count == 0)
{
throw new ArgumentException("matchedFields must contain at least on field name.");
}
FieldPhraseList[]
toMerge = new FieldPhraseList[matchedFields.Count];
int i = 0;
foreach (var matchedField in matchedFields)
{
FieldTermStack stack = new FieldTermStack(reader, docId, matchedField, fieldQuery);
toMerge[i++] = new FieldPhraseList(stack, fieldQuery, phraseLimit);
}
return fragListBuilder.CreateFieldFragList(new FieldPhraseList(toMerge), fragCharSize);
}
/// <summary>
/// return whether phraseHighlight or not.
/// </summary>
public virtual bool IsPhraseHighlight => phraseHighlight;
/// <summary>
/// return whether fieldMatch or not.
/// </summary>
public virtual bool IsFieldMatch => fieldMatch;
/// <summary>
/// Gets or Sets the maximum number of phrases to analyze when searching for the highest-scoring phrase.
/// The default is unlimited (int.MaxValue).
/// </summary>
public virtual int PhraseLimit
{
get => phraseLimit;
set => phraseLimit = value;
}
}
}