blob: ef411a74bf0d2b4c8141d48ca4f63d60b986b0ae [file] [log] [blame]
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Codecs.Lucene46;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Index.Sorter;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using JCG = J2N.Collections.Generic;
using Directory = Lucene.Net.Store.Directory;
namespace Lucene.Net.Search.Suggest.Analyzing
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// TODO:
// - a PostingsFormat that stores super-high-freq terms as
// a bitset should be a win for the prefix terms?
// (LUCENE-5052)
// - we could offer a better integration with
// DocumentDictionary and NRT? so that your suggester
// "automatically" keeps in sync w/ your index
/// <summary>
/// Analyzes the input text and then suggests matches based
/// on prefix matches to any tokens in the indexed text.
/// This also highlights the tokens that match.
///
/// <para>This suggester supports payloads. Matches are sorted only
/// by the suggest weight; it would be nice to support
/// blended score + weight sort in the future. This means
/// this suggester best applies when there is a strong
/// a-priori ranking of all the suggestions.
///
/// </para>
/// <para>This suggester supports contexts, however the
/// contexts must be valid utf8 (arbitrary binary terms will
/// not work).
///
/// @lucene.experimental
/// </para>
/// </summary>
public class AnalyzingInfixSuggester : Lookup, IDisposable
{
private readonly object syncLock = new object(); //uses syncLock as substitute for Java's synchronized (method) keyword
/// <summary>
/// Field name used for the indexed text. </summary>
protected const string TEXT_FIELD_NAME = "text";
/// <summary>
/// Field name used for the indexed text, as a
/// <see cref="StringField"/>, for exact lookup.
/// </summary>
protected const string EXACT_TEXT_FIELD_NAME = "exacttext";
/// <summary>
/// Field name used for the indexed context, as a
/// <see cref="StringField"/> and a <see cref="SortedSetDocValuesField"/>, for filtering.
/// </summary>
protected const string CONTEXTS_FIELD_NAME = "contexts";
/// <summary>
/// Analyzer used at search time </summary>
protected readonly Analyzer m_queryAnalyzer;
/// <summary>
/// Analyzer used at index time </summary>
protected readonly Analyzer m_indexAnalyzer;
internal readonly LuceneVersion matchVersion;
private readonly Directory dir;
internal readonly int minPrefixChars;
private readonly bool commitOnBuild;
/// <summary>
/// Used for ongoing NRT additions/updates. </summary>
private IndexWriter writer;
/// <summary>
/// <see cref="IndexSearcher"/> used for lookups. </summary>
protected SearcherManager m_searcherMgr;
/// <summary>
/// Default minimum number of leading characters before
/// PrefixQuery is used (4).
/// </summary>
public const int DEFAULT_MIN_PREFIX_CHARS = 4;
/// <summary>
/// How we sort the postings and search results. </summary>
private static readonly Sort SORT = new Sort(new SortField("weight", SortFieldType.INT64, true));
/// <summary>
/// Create a new instance, loading from a previously built
/// <see cref="AnalyzingInfixSuggester"/> directory, if it exists.
/// This directory must be
/// private to the infix suggester (i.e., not an external
/// Lucene index). Note that <see cref="Dispose()"/>
/// will also dispose the provided directory.
/// </summary>
public AnalyzingInfixSuggester(LuceneVersion matchVersion, Directory dir, Analyzer analyzer)
: this(matchVersion, dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS)
{
}
/// <summary>
/// Create a new instance, loading from a previously built
/// <see cref="AnalyzingInfixSuggester"/> directory, if it exists. This directory must be
/// private to the infix suggester (i.e., not an external
/// Lucene index). Note that <see cref="Dispose()"/>
/// will also dispose the provided directory.
/// </summary>
/// <param name="minPrefixChars"> Minimum number of leading characters
/// before <see cref="PrefixQuery"/> is used (default 4).
/// Prefixes shorter than this are indexed as character
/// ngrams (increasing index size but making lookups
/// faster). </param>
// LUCENENET specific - LUCENE-5889, a 4.11.0 feature. calls new constructor with extra param.
// LUCENENET TODO: Remove method at version 4.11.0. Was retained for perfect 4.8 compatibility
public AnalyzingInfixSuggester(LuceneVersion matchVersion, Directory dir, Analyzer indexAnalyzer,
Analyzer queryAnalyzer, int minPrefixChars)
: this(matchVersion, dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild: false)
{
}
/// <summary>
/// Create a new instance, loading from a previously built
/// <see cref="AnalyzingInfixSuggester"/> directory, if it exists. This directory must be
/// private to the infix suggester (i.e., not an external
/// Lucene index). Note that <see cref="Dispose()"/>
/// will also dispose the provided directory.
/// </summary>
/// <param name="minPrefixChars"> Minimum number of leading characters
/// before <see cref="PrefixQuery"/> is used (default 4).
/// Prefixes shorter than this are indexed as character
/// ngrams (increasing index size but making lookups
/// faster). </param>
/// <param name="commitOnBuild"> Call commit after the index has finished building. This
/// would persist the suggester index to disk and future instances of this suggester can
/// use this pre-built dictionary. </param>
// LUCENENET specific - LUCENE-5889, a 4.11.0 feature. (Code moved from other constructor to here.)
public AnalyzingInfixSuggester(LuceneVersion matchVersion, Directory dir, Analyzer indexAnalyzer,
Analyzer queryAnalyzer, int minPrefixChars, bool commitOnBuild)
{
if (minPrefixChars < 0)
{
throw new ArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
}
this.m_queryAnalyzer = queryAnalyzer;
this.m_indexAnalyzer = indexAnalyzer;
this.matchVersion = matchVersion;
this.dir = dir;
this.minPrefixChars = minPrefixChars;
this.commitOnBuild = commitOnBuild;
if (DirectoryReader.IndexExists(dir))
{
// Already built; open it:
writer = new IndexWriter(dir, GetIndexWriterConfig(matchVersion, GetGramAnalyzer(), OpenMode.APPEND));
m_searcherMgr = new SearcherManager(writer, true, null);
}
}
/// <summary>
/// Override this to customize index settings, e.g. which
/// codec to use.
/// </summary>
protected internal virtual IndexWriterConfig GetIndexWriterConfig(LuceneVersion matchVersion,
Analyzer indexAnalyzer, OpenMode openMode)
{
IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer)
{
Codec = new Lucene46Codec(),
OpenMode = openMode
};
// This way all merged segments will be sorted at
// merge time, allow for per-segment early termination
// when those segments are searched:
iwc.MergePolicy = new SortingMergePolicy(iwc.MergePolicy, SORT);
return iwc;
}
/// <summary>
/// Subclass can override to choose a specific
/// <see cref="Directory"/> implementation.
/// </summary>
protected internal virtual Directory GetDirectory(DirectoryInfo path)
{
return FSDirectory.Open(path);
}
public override void Build(IInputEnumerator enumerator)
{
if (m_searcherMgr != null)
{
m_searcherMgr.Dispose();
m_searcherMgr = null;
}
if (writer != null)
{
writer.Dispose();
writer = null;
}
AtomicReader r = null;
bool success = false;
try
{
// First pass: build a temporary normal Lucene index,
// just indexing the suggestions as they iterate:
writer = new IndexWriter(dir, GetIndexWriterConfig(matchVersion, GetGramAnalyzer(), OpenMode.CREATE));
//long t0 = System.nanoTime();
// TODO: use threads?
BytesRef text;
while (enumerator.MoveNext())
{
text = enumerator.Current;
BytesRef payload;
if (enumerator.HasPayloads)
{
payload = enumerator.Payload;
}
else
{
payload = null;
}
Add(text, enumerator.Contexts, enumerator.Weight, payload);
}
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
if (commitOnBuild) //LUCENENET specific -Support for LUCENE - 5889.
{
Commit();
}
m_searcherMgr = new SearcherManager(writer, true, null);
success = true;
}
finally
{
if (success)
{
IOUtils.Dispose(r);
}
else
{
IOUtils.DisposeWhileHandlingException(writer, r);
writer = null;
}
}
}
//LUCENENET specific -Support for LUCENE - 5889.
public void Commit()
{
if (writer == null)
{
throw new InvalidOperationException("Cannot commit on an closed writer. Add documents first");
}
writer.Commit();
}
private Analyzer GetGramAnalyzer()
=> new AnalyzerWrapperAnonymousClass(this, Analyzer.PER_FIELD_REUSE_STRATEGY);
private class AnalyzerWrapperAnonymousClass : AnalyzerWrapper
{
private readonly AnalyzingInfixSuggester outerInstance;
public AnalyzerWrapperAnonymousClass(AnalyzingInfixSuggester outerInstance, ReuseStrategy reuseStrategy)
: base(reuseStrategy)
{
this.outerInstance = outerInstance;
}
protected override Analyzer GetWrappedAnalyzer(string fieldName)
{
return outerInstance.m_indexAnalyzer;
}
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
{
if (fieldName.Equals("textgrams", StringComparison.Ordinal) && outerInstance.minPrefixChars > 0)
{
return new TokenStreamComponents(components.Tokenizer,
new EdgeNGramTokenFilter(
outerInstance.matchVersion,
components.TokenStream,
1,
outerInstance.minPrefixChars));
}
else
{
return components;
}
}
}
//LUCENENET specific -Support for LUCENE - 5889.
private void EnsureOpen()
{
if (writer != null)
return;
lock (syncLock)
{
if (writer == null)
{
if (m_searcherMgr != null)
{
m_searcherMgr.Dispose();
m_searcherMgr = null;
}
writer = new IndexWriter(dir, GetIndexWriterConfig(matchVersion, GetGramAnalyzer(), OpenMode.CREATE));
m_searcherMgr = new SearcherManager(writer, true, null);
}
}
}
/// <summary>
/// Adds a new suggestion. Be sure to use <see cref="Update"/>
/// instead if you want to replace a previous suggestion.
/// After adding or updating a batch of new suggestions,
/// you must call <see cref="Refresh()"/> in the end in order to
/// see the suggestions in <see cref="DoLookup(string, IEnumerable{BytesRef}, int, bool, bool)"/>
/// </summary>
public virtual void Add(BytesRef text, IEnumerable<BytesRef> contexts, long weight, BytesRef payload)
{
EnsureOpen(); //LUCENENET specific -Support for LUCENE - 5889.
writer.AddDocument(BuildDocument(text, contexts, weight, payload));
}
/// <summary>
/// Updates a previous suggestion, matching the exact same
/// text as before. Use this to change the weight or
/// payload of an already added suggstion. If you know
/// this text is not already present you can use <see cref="Add"/>
/// instead. After adding or updating a batch of
/// new suggestions, you must call <see cref="Refresh()"/> in the
/// end in order to see the suggestions in <see cref="DoLookup(string, IEnumerable{BytesRef}, int, bool, bool)"/>
/// </summary>
public virtual void Update(BytesRef text, IEnumerable<BytesRef> contexts, long weight, BytesRef payload)
{
writer.UpdateDocument(new Term(EXACT_TEXT_FIELD_NAME, text.Utf8ToString()), BuildDocument(text, contexts, weight, payload));
}
private Document BuildDocument(BytesRef text, IEnumerable<BytesRef> contexts, long weight, BytesRef payload)
{
string textString = text.Utf8ToString();
var ft = GetTextFieldType();
var doc = new Document
{
new Field(TEXT_FIELD_NAME, textString, ft),
new Field("textgrams", textString, ft),
new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO),
new BinaryDocValuesField(TEXT_FIELD_NAME, text),
new NumericDocValuesField("weight", weight)
};
if (payload != null)
{
doc.Add(new BinaryDocValuesField("payloads", payload));
}
if (contexts != null)
{
foreach (BytesRef context in contexts)
{
// TODO: if we had a BinaryTermField we could fix
// this "must be valid ut8f" limitation:
doc.Add(new StringField(CONTEXTS_FIELD_NAME, context.Utf8ToString(), Field.Store.NO));
doc.Add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
}
}
return doc;
}
/// <summary>
/// Reopens the underlying searcher; it's best to "batch
/// up" many additions/updates, and then call refresh
/// once in the end.
/// </summary>
public virtual void Refresh()
{
if (m_searcherMgr == null)
{
throw new InvalidOperationException("suggester was not built");
}
m_searcherMgr.MaybeRefreshBlocking();
}
/// <summary>
/// Subclass can override this method to change the field type of the text field
/// e.g. to change the index options
/// </summary>
protected virtual FieldType GetTextFieldType()
{
var ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.IndexOptions = IndexOptions.DOCS_ONLY;
ft.OmitNorms = true;
return ft;
}
public override IList<LookupResult> DoLookup(string key, IEnumerable<BytesRef> contexts, bool onlyMorePopular, int num)
{
return DoLookup(key, contexts, num, true, true);
}
/// <summary>
/// Lookup, without any context.
/// </summary>
public virtual IList<LookupResult> DoLookup(string key, int num, bool allTermsRequired, bool doHighlight)
{
return DoLookup(key, null, num, allTermsRequired, doHighlight);
}
/// <summary>
/// This is called if the last token isn't ended
/// (e.g. user did not type a space after it). Return an
/// appropriate <see cref="Query"/> clause to add to the <see cref="BooleanQuery"/>.
/// </summary>
protected internal virtual Query GetLastTokenQuery(string token)
{
if (token.Length < minPrefixChars)
{
// The leading ngram was directly indexed:
return new TermQuery(new Term("textgrams", token));
}
return new PrefixQuery(new Term(TEXT_FIELD_NAME, token));
}
/// <summary>
/// Retrieve suggestions, specifying whether all terms
/// must match (<paramref name="allTermsRequired"/>) and whether the hits
/// should be highlighted (<paramref name="doHighlight"/>).
/// </summary>
public virtual IList<LookupResult> DoLookup(string key, IEnumerable<BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight)
{
if (m_searcherMgr == null)
{
throw new InvalidOperationException("suggester was not built");
}
Occur occur;
if (allTermsRequired)
{
occur = Occur.MUST;
}
else
{
occur = Occur.SHOULD;
}
TokenStream ts = null;
BooleanQuery query;
var matchedTokens = new JCG.HashSet<string>();
string prefixToken = null;
try
{
ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key));
//long t0 = System.currentTimeMillis();
ts.Reset();
var termAtt = ts.AddAttribute<ICharTermAttribute>();
var offsetAtt = ts.AddAttribute<IOffsetAttribute>();
string lastToken = null;
query = new BooleanQuery();
int maxEndOffset = -1;
matchedTokens = new JCG.HashSet<string>();
while (ts.IncrementToken())
{
if (lastToken != null)
{
matchedTokens.Add(lastToken);
query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
}
lastToken = termAtt.ToString();
if (lastToken != null)
{
maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset);
}
}
ts.End();
if (lastToken != null)
{
Query lastQuery;
if (maxEndOffset == offsetAtt.EndOffset)
{
// Use PrefixQuery (or the ngram equivalent) when
// there was no trailing discarded chars in the
// string (e.g. whitespace), so that if query does
// not end with a space we show prefix matches for
// that token:
lastQuery = GetLastTokenQuery(lastToken);
prefixToken = lastToken;
}
else
{
// Use TermQuery for an exact match if there were
// trailing discarded chars (e.g. whitespace), so
// that if query ends with a space we only show
// exact matches for that term:
matchedTokens.Add(lastToken);
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
}
if (lastQuery != null)
{
query.Add(lastQuery, occur);
}
}
if (contexts != null)
{
BooleanQuery sub = new BooleanQuery();
query.Add(sub, Occur.MUST);
foreach (BytesRef context in contexts)
{
// NOTE: we "should" wrap this in
// ConstantScoreQuery, or maybe send this as a
// Filter instead to search, but since all of
// these are MUST'd, the change to the score won't
// affect the overall ranking. Since we indexed
// as DOCS_ONLY, the perf should be the same
// either way (no freq int[] blocks to decode):
// TODO: if we had a BinaryTermField we could fix
// this "must be valid ut8f" limitation:
sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD);
}
}
}
finally
{
IOUtils.DisposeWhileHandlingException(ts);
}
// TODO: we could allow blended sort here, combining
// weight w/ score. Now we ignore score and sort only
// by weight:
Query finalQuery = FinishQuery(query, allTermsRequired);
//System.out.println("finalQuery=" + query);
// Sort by weight, descending:
TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false);
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
ICollector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
IndexSearcher searcher = m_searcherMgr.Acquire();
IList<LookupResult> results = null;
try
{
//System.out.println("got searcher=" + searcher);
searcher.Search(finalQuery, c2);
TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs();
// Slower way if postings are not pre-sorted by weight:
// hits = searcher.search(query, null, num, SORT);
results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
}
finally
{
m_searcherMgr.Release(searcher);
}
//System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
//System.out.println(results);
return results;
}
/// <summary>
/// Create the results based on the search hits.
/// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary>
/// <exception cref="IOException"> If there are problems reading fields from the underlying Lucene index. </exception>
protected internal virtual IList<LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, ICollection<string> matchedTokens, string prefixToken)
{
BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME);
// This will just be null if app didn't pass payloads to build():
// TODO: maybe just stored fields? they compress...
BinaryDocValues payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads");
IList<AtomicReaderContext> leaves = searcher.IndexReader.Leaves;
List<LookupResult> results = new List<LookupResult>();
BytesRef scratch = new BytesRef();
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
FieldDoc fd = (FieldDoc)hits.ScoreDocs[i];
textDV.Get(fd.Doc, scratch);
string text = scratch.Utf8ToString();
long score = (long)fd.Fields[0];
BytesRef payload;
if (payloadsDV != null)
{
payload = new BytesRef();
payloadsDV.Get(fd.Doc, payload);
}
else
{
payload = null;
}
// Must look up sorted-set by segment:
int segment = ReaderUtil.SubIndex(fd.Doc, leaves);
SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME);
ISet<BytesRef> contexts;
if (contextsDV != null)
{
contexts = new JCG.HashSet<BytesRef>();
contextsDV.SetDocument(fd.Doc - leaves[segment].DocBase);
long ord;
while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
{
BytesRef context = new BytesRef();
contextsDV.LookupOrd(ord, context);
contexts.Add(context);
}
}
else
{
contexts = null;
}
LookupResult result;
if (doHighlight)
{
object highlightKey = Highlight(text, matchedTokens, prefixToken);
result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts);
}
else
{
result = new LookupResult(text, score, payload, contexts);
}
results.Add(result);
}
return results;
}
/// <summary>
/// Subclass can override this to tweak the Query before
/// searching.
/// </summary>
protected internal virtual Query FinishQuery(BooleanQuery bq, bool allTermsRequired)
{
return bq;
}
/// <summary>
/// Override this method to customize the Object
/// representing a single highlighted suggestions; the
/// result is set on each <see cref="Lookup.LookupResult.HighlightKey"/>
/// member.
/// </summary>
protected internal virtual object Highlight(string text, ICollection<string> matchedTokens, string prefixToken)
{
TokenStream ts = m_queryAnalyzer.GetTokenStream("text", new StringReader(text));
try
{
var termAtt = ts.AddAttribute<ICharTermAttribute>();
var offsetAtt = ts.AddAttribute<IOffsetAttribute>();
ts.Reset();
var sb = new StringBuilder();
int upto = 0;
while (ts.IncrementToken())
{
string token = termAtt.ToString();
int startOffset = offsetAtt.StartOffset;
int endOffset = offsetAtt.EndOffset;
if (upto < startOffset)
{
AddNonMatch(sb, text.Substring(upto, startOffset - upto));
upto = startOffset;
}
else if (upto > startOffset)
{
continue;
}
if (matchedTokens.Contains(token))
{
// Token matches.
AddWholeMatch(sb, text.Substring(startOffset, endOffset - startOffset), token);
upto = endOffset;
}
else if (prefixToken != null && token.StartsWith(prefixToken, StringComparison.Ordinal))
{
AddPrefixMatch(sb, text.Substring(startOffset, endOffset - startOffset), token, prefixToken);
upto = endOffset;
}
}
ts.End();
int endOffset2 = offsetAtt.EndOffset;
if (upto < endOffset2)
{
AddNonMatch(sb, text.Substring(upto));
}
return sb.ToString();
}
finally
{
IOUtils.DisposeWhileHandlingException(ts);
}
}
/// <summary>
/// Called while highlighting a single result, to append a
/// non-matching chunk of text from the suggestion to the
/// provided fragments list. </summary>
/// <param name="sb"> The <see cref="StringBuilder"/> to append to </param>
/// <param name="text"> The text chunk to add </param>
protected internal virtual void AddNonMatch(StringBuilder sb, string text)
{
sb.Append(text);
}
/// <summary>
/// Called while highlighting a single result, to append
/// the whole matched token to the provided fragments list. </summary>
/// <param name="sb"> The <see cref="StringBuilder"/> to append to </param>
/// <param name="surface"> The surface form (original) text </param>
/// <param name="analyzed"> The analyzed token corresponding to the surface form text </param>
protected internal virtual void AddWholeMatch(StringBuilder sb, string surface, string analyzed)
{
sb.Append("<b>");
sb.Append(surface);
sb.Append("</b>");
}
/// <summary>
/// Called while highlighting a single result, to append a
/// matched prefix token, to the provided fragments list. </summary>
/// <param name="sb"> The <see cref="StringBuilder"/> to append to </param>
/// <param name="surface"> The fragment of the surface form
/// (indexed during <see cref="Build(IInputEnumerator)"/>, corresponding to
/// this match </param>
/// <param name="analyzed"> The analyzed token that matched </param>
/// <param name="prefixToken"> The prefix of the token that matched </param>
protected internal virtual void AddPrefixMatch(StringBuilder sb, string surface, string analyzed, string prefixToken)
{
// TODO: apps can try to invert their analysis logic
// here, e.g. downcase the two before checking prefix:
sb.Append("<b>");
sb.Append(surface.Substring(0, prefixToken.Length - 0));
sb.Append("</b>");
if (prefixToken.Length < surface.Length)
{
sb.Append(surface.Substring(prefixToken.Length));
}
}
public override bool Store(DataOutput @in)
{
return false;
}
public override bool Load(DataInput @out)
{
return false;
}
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected virtual void Dispose(bool disposing) // LUCENENET specific - implemented proper dispose pattern
{
if (disposing)
{
if (m_searcherMgr != null)
{
m_searcherMgr.Dispose();
m_searcherMgr = null;
}
if (writer != null)
{
writer.Dispose();
dir.Dispose();
writer = null;
}
}
}
public override long GetSizeInBytes()
{
long mem = RamUsageEstimator.ShallowSizeOf(this);
try
{
if (m_searcherMgr != null)
{
IndexSearcher searcher = m_searcherMgr.Acquire();
try
{
foreach (AtomicReaderContext context in searcher.IndexReader.Leaves)
{
AtomicReader reader = FilterAtomicReader.Unwrap(context.AtomicReader);
if (reader is SegmentReader)
{
mem += ((SegmentReader)context.Reader).RamBytesUsed();
}
}
}
finally
{
m_searcherMgr.Release(searcher);
}
}
return mem;
}
catch (IOException ioe)
{
throw new Exception(ioe.ToString(), ioe);
}
}
public override long Count
{
get
{
if (m_searcherMgr == null)
{
return 0;
}
IndexSearcher searcher = m_searcherMgr.Acquire();
try
{
return searcher.IndexReader.NumDocs;
}
finally
{
m_searcherMgr.Release(searcher);
}
}
}
}
}