blob: c4682a0eaccdd2b545c5a2cd5014d1a273b7e6f3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Index;
using Lucene.Net.Index.Memory;
using Lucene.Net.Search.Spans;
using Lucene.Net.Support;
using Lucene.Net.Util;
namespace Lucene.Net.Search.Highlight
{
///<summary>
/// <see cref="IScorer"/> implementation which scores text fragments by the number of
/// unique query terms found. This class converts appropriate <see cref="Query"/>s to
/// <see cref="SpanQuery"/>s and attempts to score only those terms that participated in
/// generating the 'hit' on the document.
/// </summary>
public class QueryScorer : IScorer
{
private float totalScore;
private ISet<String> foundTerms;
private IDictionary<String, WeightedSpanTerm> fieldWeightedSpanTerms;
private float maxTermWeight;
private int position = -1;
private String defaultField;
private ITermAttribute termAtt;
private IPositionIncrementAttribute posIncAtt;
private bool expandMultiTermQuery = true;
private Query query;
private String field;
private IndexReader reader;
private bool skipInitExtractor;
private bool wrapToCaching = true;
/// <summary>
/// Constructs a new QueryScorer instance
/// </summary>
/// <param name="query">Query to use for highlighting</param>
public QueryScorer(Query query)
{
Init(query, null, null, true);
}
/// <summary>
/// Constructs a new QueryScorer instance
/// </summary>
/// <param name="query">Query to use for highlighting</param>
/// <param name="field">Field to highlight - pass null to ignore fields</param>
public QueryScorer(Query query, String field)
{
Init(query, field, null, true);
}
/// <summary>
/// Constructs a new QueryScorer instance
/// </summary>
/// <param name="query">Query to use for highlighting</param>
/// <param name="reader"><see cref="IndexReader"/> to use for quasi tf/idf scoring</param>
/// <param name="field">Field to highlight - pass null to ignore fields</param>
public QueryScorer(Query query, IndexReader reader, String field)
{
Init(query, field, reader, true);
}
/// <summary>
/// Constructs a new QueryScorer instance
/// </summary>
/// <param name="query">Query to use for highlighting</param>
/// <param name="reader"><see cref="IndexReader"/> to use for quasi tf/idf scoring</param>
/// <param name="field">Field to highlight - pass null to ignore fields</param>
/// <param name="defaultField">The default field for queries with the field name unspecified</param>
public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
{
this.defaultField = StringHelper.Intern(defaultField);
Init(query, field, reader, true);
}
/// <summary>
/// Constructs a new QueryScorer instance
/// </summary>
/// <param name="query">Query to use for highlighting</param>
/// <param name="field">Field to highlight - pass null to ignore fields</param>
/// <param name="defaultField">The default field for queries with the field name unspecified</param>
public QueryScorer(Query query, String field, String defaultField)
{
this.defaultField = StringHelper.Intern(defaultField);
Init(query, field, null, true);
}
/// <summary>
/// Constructs a new QueryScorer instance
/// </summary>
/// <param name="weightedTerms">an array of pre-created <see cref="WeightedSpanTerm"/>s</param>
public QueryScorer(WeightedSpanTerm[] weightedTerms)
{
this.fieldWeightedSpanTerms = new HashMap<String, WeightedSpanTerm>(weightedTerms.Length);
foreach (WeightedSpanTerm t in weightedTerms)
{
WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term];
if ((existingTerm == null) ||
(existingTerm.Weight < t.Weight))
{
// if a term is defined more than once, always use the highest
// scoring Weight
fieldWeightedSpanTerms[t.Term] = t;
maxTermWeight = Math.Max(maxTermWeight, t.Weight);
}
}
skipInitExtractor = true;
}
/// <seealso cref="IScorer.FragmentScore"/>
public float FragmentScore
{
get { return totalScore; }
}
/// <summary>
/// The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale).
/// </summary>
public float MaxTermWeight
{
get { return maxTermWeight; }
}
/// <seealso cref="IScorer.GetTokenScore"/>
public float GetTokenScore()
{
position += posIncAtt.PositionIncrement;
String termText = termAtt.Term;
WeightedSpanTerm weightedSpanTerm;
if ((weightedSpanTerm = fieldWeightedSpanTerms[termText]) == null)
{
return 0;
}
if (weightedSpanTerm.IsPositionSensitive() &&
!weightedSpanTerm.CheckPosition(position))
{
return 0;
}
float score = weightedSpanTerm.Weight;
// found a query term - is it unique in this doc?
if (!foundTerms.Contains(termText))
{
totalScore += score;
foundTerms.Add(termText);
}
return score;
}
/// <seealso cref="IScorer.Init"/>
public TokenStream Init(TokenStream tokenStream)
{
position = -1;
termAtt = tokenStream.AddAttribute<ITermAttribute>();
posIncAtt = tokenStream.AddAttribute<IPositionIncrementAttribute>();
if (!skipInitExtractor)
{
if (fieldWeightedSpanTerms != null)
{
fieldWeightedSpanTerms.Clear();
}
return InitExtractor(tokenStream);
}
return null;
}
/// <summary>
/// Retrieve the <see cref="WeightedSpanTerm"/> for the specified token. Useful for passing
/// Span information to a <see cref="IFragmenter"/>.
/// </summary>
/// <param name="token">token to get {@link WeightedSpanTerm} for</param>
/// <returns>WeightedSpanTerm for token</returns>
public WeightedSpanTerm GetWeightedSpanTerm(String token)
{
return fieldWeightedSpanTerms[token];
}
private void Init(Query query, String field, IndexReader reader, bool expandMultiTermQuery)
{
this.reader = reader;
this.expandMultiTermQuery = expandMultiTermQuery;
this.query = query;
this.field = field;
}
private TokenStream InitExtractor(TokenStream tokenStream)
{
WeightedSpanTermExtractor qse = defaultField == null
? new WeightedSpanTermExtractor()
: new WeightedSpanTermExtractor(defaultField);
qse.ExpandMultiTermQuery = expandMultiTermQuery;
qse.SetWrapIfNotCachingTokenFilter(wrapToCaching);
if (reader == null)
{
this.fieldWeightedSpanTerms = qse.GetWeightedSpanTerms(query,
tokenStream, field);
}
else
{
this.fieldWeightedSpanTerms = qse.GetWeightedSpanTermsWithScores(query,
tokenStream, field, reader);
}
if (qse.IsCachedTokenStream)
{
return qse.TokenStream;
}
return null;
}
/// <seealso cref="IScorer.StartFragment"/>
public void StartFragment(TextFragment newFragment)
{
foundTerms = Support.Compatibility.SetFactory.CreateHashSet<string>();
totalScore = 0;
}
/// <summary>
/// Controls whether or not multi-term queries are expanded
/// against a <see cref="MemoryIndex"/> <see cref="IndexReader"/>.
/// </summary>
public bool IsExpandMultiTermQuery
{
get { return expandMultiTermQuery; }
set { this.expandMultiTermQuery = value; }
}
/// <summary>
/// By default, <see cref="TokenStream"/>s that are not of the type
/// <see cref="CachingTokenFilter"/> are wrapped in a <see cref="CachingTokenFilter"/> to
/// ensure an efficient reset - if you are already using a different caching
/// <see cref="TokenStream"/> impl and you don't want it to be wrapped, set this to
/// false.
/// </summary>
public void SetWrapIfNotCachingTokenFilter(bool wrap)
{
this.wrapToCaching = wrap;
}
}
}