blob: 3c262f8c837025c708c7fe95acd9128a323eb621 [file] [log] [blame]
using System;
using System.Collections.Generic;
using JCG = J2N.Collections.Generic;
using SubInfo = Lucene.Net.Search.VectorHighlight.FieldFragList.WeightedFragInfo.SubInfo;
using TermInfo = Lucene.Net.Search.VectorHighlight.FieldTermStack.TermInfo;
using WeightedPhraseInfo = Lucene.Net.Search.VectorHighlight.FieldPhraseList.WeightedPhraseInfo;
namespace Lucene.Net.Search.VectorHighlight
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// A weighted implementation of <see cref="FieldFragList"/>.
/// </summary>
public class WeightedFieldFragList : FieldFragList
{
/// <summary>
/// a constructor.
/// </summary>
/// <param name="fragCharSize">the length (number of chars) of a fragment</param>
public WeightedFieldFragList(int fragCharSize)
: base(fragCharSize)
{
}
/// <summary>
/// <seealso cref="FieldFragList.Add(int, int, IList{WeightedPhraseInfo})"/>.
/// </summary>
public override void Add(int startOffset, int endOffset, IList<WeightedPhraseInfo> phraseInfoList)
{
IList<SubInfo> tempSubInfos = new List<SubInfo>();
IList<SubInfo> realSubInfos = new List<SubInfo>();
ISet<string> distinctTerms = new JCG.HashSet<string>();
int length = 0;
foreach (WeightedPhraseInfo phraseInfo in phraseInfoList)
{
float phraseTotalBoost = 0;
foreach (TermInfo ti in phraseInfo.TermsInfos)
{
if (distinctTerms.Add(ti.Text))
phraseTotalBoost += ti.Weight * phraseInfo.Boost;
length++;
}
tempSubInfos.Add(new SubInfo(phraseInfo.GetText(), phraseInfo.TermsOffsets,
phraseInfo.Seqnum, phraseTotalBoost));
}
// We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
// would cause an equal weight for all fragments regardless of how much words they contain.
// To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
// we "bend" the length with a standard-normalization a little bit.
float norm = length * (1 / (float)Math.Sqrt(length));
float totalBoost = 0;
foreach (SubInfo tempSubInfo in tempSubInfos)
{
float subInfoBoost = tempSubInfo.Boost * norm;
realSubInfos.Add(new SubInfo(tempSubInfo.Text, tempSubInfo.TermsOffsets,
tempSubInfo.Seqnum, subInfoBoost));
totalBoost += subInfoBoost;
}
FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, realSubInfos, totalBoost));
}
}
}