blob: 944bf43fab9e5fe6a94ddcffbc44a676cc388514 [file] [log] [blame]
using Lucene.Net.Diagnostics;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using WeightedPhraseInfo = Lucene.Net.Search.VectorHighlight.FieldPhraseList.WeightedPhraseInfo;
namespace Lucene.Net.Search.VectorHighlight
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// A abstract implementation of <see cref="IFragListBuilder"/>.
/// </summary>
public abstract class BaseFragListBuilder : IFragListBuilder
{
public static readonly int MARGIN_DEFAULT = 6;
public static readonly int MIN_FRAG_CHAR_SIZE_FACTOR = 3;
internal readonly int margin;
internal readonly int minFragCharSize;
protected BaseFragListBuilder(int margin) // LUCENENET: CA1012: Abstract types should not have constructors (marked protected)
{
if (margin < 0)
throw new ArgumentException("margin(" + margin + ") is too small. It must be 0 or higher.");
this.margin = margin;
this.minFragCharSize = Math.Max(1, margin * MIN_FRAG_CHAR_SIZE_FACTOR);
}
protected BaseFragListBuilder() // LUCENENET: CA1012: Abstract types should not have constructors (marked protected)
: this(MARGIN_DEFAULT)
{
}
// LUCENENET specific - need to make this overload of CreateFieldFragList abstract so it satisfies
// the interface contract.
public abstract FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize);
protected virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize)
{
if (fragCharSize < minFragCharSize)
throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher.");
List<WeightedPhraseInfo> wpil = new List<WeightedPhraseInfo>();
using (IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.PhraseList.GetEnumerator()))
{
WeightedPhraseInfo phraseInfo = null;
int startOffset = 0;
while ((phraseInfo = queue.Top()) != null)
{
// if the phrase violates the border of previous fragment, discard it and try next phrase
if (phraseInfo.StartOffset < startOffset)
{
queue.RemoveTop();
continue;
}
wpil.Clear();
int currentPhraseStartOffset = phraseInfo.StartOffset;
int currentPhraseEndOffset = phraseInfo.EndOffset;
int spanStart = Math.Max(currentPhraseStartOffset - margin, startOffset);
int spanEnd = Math.Max(currentPhraseEndOffset, spanStart + fragCharSize);
if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
{
wpil.Add(phraseInfo);
}
while ((phraseInfo = queue.Top()) != null)
{ // pull until we crossed the current spanEnd
if (phraseInfo.EndOffset <= spanEnd)
{
currentPhraseEndOffset = phraseInfo.EndOffset;
if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
{
wpil.Add(phraseInfo);
}
}
else
{
break;
}
}
if (wpil.Count == 0)
{
continue;
}
int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
// now recalculate the start and end position to "center" the result
int newMargin = Math.Max(0, (fragCharSize - matchLen) / 2); // matchLen can be > fragCharSize prevent IAOOB here
spanStart = currentPhraseStartOffset - newMargin;
if (spanStart < startOffset)
{
spanStart = startOffset;
}
// whatever is bigger here we grow this out
spanEnd = spanStart + Math.Max(matchLen, fragCharSize);
startOffset = spanEnd;
fieldFragList.Add(spanStart, spanEnd, wpil);
}
}
return fieldFragList;
}
/// <summary>
/// A predicate to decide if the given <see cref="WeightedPhraseInfo"/> should be
/// accepted as a highlighted phrase or if it should be discarded.
/// <para/>
/// The default implementation discards phrases that are composed of more than one term
/// and where the matchLength exceeds the fragment character size.
/// </summary>
/// <param name="info">the phrase info to accept</param>
/// <param name="matchLength">the match length of the current phrase</param>
/// <param name="fragCharSize">the configured fragment character size</param>
/// <returns><c>true</c> if this phrase info should be accepted as a highligh phrase</returns>
protected virtual bool AcceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize)
{
return info.TermsOffsets.Count <= 1 || matchLength <= fragCharSize;
}
private sealed class IteratorQueue<T> : IDisposable // LUCENENET specific - implemented IDisposable to dispose the IEnumerator<T>
{
private readonly IEnumerator<T> iter;
private T top;
public IteratorQueue(IEnumerator<T> iter)
{
this.iter = iter;
T removeTop = RemoveTop();
if (Debugging.AssertsEnabled) Debugging.Assert(removeTop == null);
}
public T Top()
{
return top;
}
public T RemoveTop()
{
T currentTop = top;
if (iter.MoveNext())
{
top = iter.Current;
}
else
{
top = default;
}
return currentTop;
}
public void Dispose()
{
iter.Dispose();
}
}
}
}