| using System; |
| using System.Collections.Generic; |
| using System.IO; |
| |
| using Lucene.Net.Documents; |
| using Lucene.Net.Index; |
| using Lucene.Net.Store; |
| using Lucene.Net.Util; |
| using Version = Lucene.Net.Util.Version; |
| using Directory = Lucene.Net.Store.Directory; |
| |
| namespace Lucene.Net.Search.Spell |
| { |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// <para> |
| /// Spell Checker class (Main class) <br/> |
| /// (initially inspired by the David Spencer code). |
| /// </para> |
| /// |
| /// <para>Example Usage: |
| /// |
| /// <pre class="prettyprint"> |
| /// SpellChecker spellchecker = new SpellChecker(spellIndexDirectory); |
| /// // To index a field of a user index: |
| /// spellchecker.indexDictionary(new LuceneDictionary(my_lucene_reader, a_field)); |
| /// // To index a file containing words: |
| /// spellchecker.indexDictionary(new PlainTextDictionary(new File("myfile.txt"))); |
| /// String[] suggestions = spellchecker.suggestSimilar("misspelt", 5); |
| /// </pre> |
| /// |
| /// |
| /// </para> |
| /// </summary> |
| public class SpellChecker : IDisposable |
| { |
| |
| /// <summary> |
| /// The default minimum score to use, if not specified by calling <seealso cref="#setAccuracy(float)"/> . |
| /// </summary> |
| public const float DEFAULT_ACCURACY = 0.5f; |
| |
| /// <summary> |
| /// Field name for each word in the ngram index. |
| /// </summary> |
| public const string F_WORD = "word"; |
| |
| /// <summary> |
| /// the spell index |
| /// </summary> |
| // don't modify the directory directly - see #swapSearcher() |
| // TODO: why is this package private? |
| internal Directory spellIndex; |
| /// <summary> |
| /// Boost value for start and end grams |
| /// </summary> |
| private float bStart = 2.0f; |
| |
| private float bEnd = 1.0f; |
| // don't use this searcher directly - see #swapSearcher() |
| |
| private IndexSearcher searcher; |
| /* |
| * this locks all modifications to the current searcher. |
| */ |
| |
| private readonly object searcherLock = new object(); |
| /* |
| * this lock synchronizes all possible modifications to the |
| * current index directory. It should not be possible to try modifying |
| * the same index concurrently. Note: Do not acquire the searcher lock |
| * before acquiring this lock! |
| */ |
| private readonly object modifyCurrentIndexLock = new object(); |
| |
| private volatile bool closed = false; |
| // minimum score for hits generated by the spell checker query |
| |
| private float accuracy = DEFAULT_ACCURACY; |
| |
| private StringDistance sd; |
| private IComparer<SuggestWord> comparator; |
| |
| /// <summary> |
| /// Use the given directory as a spell checker index. The directory |
| /// is created if it doesn't exist yet. </summary> |
| /// <param name="spellIndex"> the spell index directory </param> |
| /// <param name="sd"> the <seealso cref="StringDistance"/> measurement to use </param> |
| /// <exception cref="IOException"> if Spellchecker can not open the directory </exception> |
| public SpellChecker(Directory spellIndex, StringDistance sd) |
| : this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR) |
| { |
| } |
| /// <summary> |
| /// Use the given directory as a spell checker index with a |
| /// <seealso cref="LevensteinDistance"/> as the default <seealso cref="StringDistance"/>. The |
| /// directory is created if it doesn't exist yet. |
| /// </summary> |
| /// <param name="spellIndex"> |
| /// the spell index directory </param> |
| /// <exception cref="IOException"> |
| /// if spellchecker can not open the directory </exception> |
| public SpellChecker(Directory spellIndex) |
| : this(spellIndex, new LevensteinDistance()) |
| { |
| } |
| |
| /// <summary> |
| /// Use the given directory as a spell checker index with the given <seealso cref="Lucene.Net.Search.Spell.StringDistance"/> measure |
| /// and the given <seealso cref="java.util.Comparator"/> for sorting the results. </summary> |
| /// <param name="spellIndex"> The spelling index </param> |
| /// <param name="sd"> The distance </param> |
| /// <param name="comparator"> The comparator </param> |
| /// <exception cref="IOException"> if there is a problem opening the index </exception> |
| public SpellChecker(Directory spellIndex, StringDistance sd, IComparer<SuggestWord> comparator) |
| { |
| SpellIndex = spellIndex; |
| StringDistance = sd; |
| this.comparator = comparator; |
| } |
| |
| /// <summary> |
| /// Use a different index as the spell checker index or re-open |
| /// the existing index if <code>spellIndex</code> is the same value |
| /// as given in the constructor. </summary> |
| /// <param name="spellIndexDir"> the spell directory to use </param> |
| /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> |
| /// <exception cref="System.IO.IOException"> if spellchecker can not open the directory </exception> |
| // TODO: we should make this final as it is called in the constructor |
| public virtual Directory SpellIndex |
| { |
| set |
| { |
| // this could be the same directory as the current spellIndex |
| // modifications to the directory should be synchronized |
| lock (modifyCurrentIndexLock) |
| { |
| EnsureOpen(); |
| if (!DirectoryReader.IndexExists(value)) |
| { |
| using (var writer = new IndexWriter(value, new IndexWriterConfig(Version.LUCENE_CURRENT, null))) |
| { |
| } |
| } |
| SwapSearcher(value); |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Sets the <seealso cref="java.util.Comparator"/> for the <seealso cref="SuggestWordQueue"/>. </summary> |
| /// <param name="comparator"> the comparator </param> |
| public virtual IComparer<SuggestWord> Comparator |
| { |
| set |
| { |
| this.comparator = value; |
| } |
| get |
| { |
| return comparator; |
| } |
| } |
| |
| |
| /// <summary> |
| /// Sets the <seealso cref="StringDistance"/> implementation for this |
| /// <seealso cref="SpellChecker"/> instance. |
| /// </summary> |
| /// <param name="sd"> the <seealso cref="StringDistance"/> implementation for this |
| /// <seealso cref="SpellChecker"/> instance </param> |
| public virtual StringDistance StringDistance |
| { |
| set |
| { |
| this.sd = value; |
| } |
| get |
| { |
| return sd; |
| } |
| } |
| |
| /// <summary> |
| /// Sets the accuracy 0 < minScore < 1; default <seealso cref="#DEFAULT_ACCURACY"/> </summary> |
| /// <param name="acc"> The new accuracy </param> |
| public virtual float Accuracy |
| { |
| set |
| { |
| this.accuracy = value; |
| } |
| get |
| { |
| return accuracy; |
| } |
| } |
| |
| |
| /// <summary> |
| /// Suggest similar words. |
| /// |
| /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms |
| /// is not the same as the edit distance strategy used to calculate the best |
| /// matching spell-checked word from the hits that Lucene found, one usually has |
| /// to retrieve a couple of numSug's in order to get the true best match. |
| /// |
| /// </para> |
| /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one. |
| /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion. |
| /// |
| /// </para> |
| /// </summary> |
| /// <param name="word"> the word you want a spell check done on </param> |
| /// <param name="numSug"> the number of suggested words </param> |
| /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception> |
| /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> |
| /// <returns> String[] |
| /// </returns> |
| /// <seealso cref= #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) </seealso> |
| public virtual string[] SuggestSimilar(string word, int numSug) |
| { |
| return this.SuggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); |
| } |
| |
| /// <summary> |
| /// Suggest similar words. |
| /// |
| /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms |
| /// is not the same as the edit distance strategy used to calculate the best |
| /// matching spell-checked word from the hits that Lucene found, one usually has |
| /// to retrieve a couple of numSug's in order to get the true best match. |
| /// |
| /// </para> |
| /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one. |
| /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion. |
| /// |
| /// </para> |
| /// </summary> |
| /// <param name="word"> the word you want a spell check done on </param> |
| /// <param name="numSug"> the number of suggested words </param> |
| /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param> |
| /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception> |
| /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> |
| /// <returns> String[] |
| /// </returns> |
| /// <seealso cref= #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) </seealso> |
| public virtual string[] SuggestSimilar(string word, int numSug, float accuracy) |
| { |
| return this.SuggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy); |
| } |
| |
| /// <summary> |
| /// Calls {@link #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) |
| /// suggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy)} |
| /// |
| /// </summary> |
| public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode) |
| { |
| return SuggestSimilar(word, numSug, ir, field, suggestMode, this.accuracy); |
| } |
| |
| /// <summary> |
| /// Suggest similar words (optionally restricted to a field of an index). |
| /// |
| /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms |
| /// is not the same as the edit distance strategy used to calculate the best |
| /// matching spell-checked word from the hits that Lucene found, one usually has |
| /// to retrieve a couple of numSug's in order to get the true best match. |
| /// |
| /// </para> |
| /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one. |
| /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion. |
| /// |
| /// </para> |
| /// </summary> |
| /// <param name="word"> the word you want a spell check done on </param> |
| /// <param name="numSug"> the number of suggested words </param> |
| /// <param name="ir"> the indexReader of the user index (can be null see field param) </param> |
| /// <param name="field"> the field of the user index: if field is not null, the suggested |
| /// words are restricted to the words present in this field. </param> |
| /// <param name="suggestMode"> |
| /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param> |
| /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param> |
| /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception> |
| /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> |
| /// <returns> String[] the sorted list of the suggest words with these 2 criteria: |
| /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity |
| /// of the suggest words in the field of the user index |
| /// </returns> |
| public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy) |
| { |
| // obtainSearcher calls ensureOpen |
| IndexSearcher indexSearcher = ObtainSearcher(); |
| try |
| { |
| if (ir == null || field == null) |
| { |
| suggestMode = SuggestMode.SUGGEST_ALWAYS; |
| } |
| if (suggestMode == SuggestMode.SUGGEST_ALWAYS) |
| { |
| ir = null; |
| field = null; |
| } |
| |
| int lengthWord = word.Length; |
| |
| int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0; |
| int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0; |
| // if the word exists in the real index and we don't care for word frequency, return the word itself |
| if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) |
| { |
| return new string[] { word }; |
| } |
| |
| BooleanQuery query = new BooleanQuery(); |
| string[] grams; |
| string key; |
| |
| for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++) |
| { |
| |
| key = "gram" + ng; // form key |
| |
| grams = FormGrams(word, ng); // form word into ngrams (allow dups too) |
| |
| if (grams.Length == 0) |
| { |
| continue; // hmm |
| } |
| |
| if (bStart > 0) // should we boost prefixes? |
| { |
| Add(query, "start" + ng, grams[0], bStart); // matches start of word |
| |
| } |
| if (bEnd > 0) // should we boost suffixes |
| { |
| Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word |
| |
| } |
| for (int i = 0; i < grams.Length; i++) |
| { |
| Add(query, key, grams[i]); |
| } |
| } |
| |
| int maxHits = 10 * numSug; |
| |
| // System.out.println("Q: " + query); |
| ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs; |
| // System.out.println("HITS: " + hits.length()); |
| SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator); |
| |
| // go thru more than 'maxr' matches in case the distance filter triggers |
| int stop = Math.Min(hits.Length, maxHits); |
| SuggestWord sugWord = new SuggestWord(); |
| for (int i = 0; i < stop; i++) |
| { |
| |
| sugWord.@string = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word |
| |
| // don't suggest a word for itself, that would be silly |
| if (sugWord.@string.Equals(word)) |
| { |
| continue; |
| } |
| |
| // edit distance |
| sugWord.score = sd.GetDistance(word, sugWord.@string); |
| if (sugWord.score < accuracy) |
| { |
| continue; |
| } |
| |
| if (ir != null && field != null) // use the user index |
| { |
| sugWord.freq = ir.DocFreq(new Term(field, sugWord.@string)); // freq in the index |
| // don't suggest a word that is not present in the field |
| if ((suggestMode == SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1) |
| { |
| continue; |
| } |
| } |
| sugQueue.InsertWithOverflow(sugWord); |
| if (sugQueue.Size() == numSug) |
| { |
| // if queue full, maintain the minScore score |
| accuracy = sugQueue.Top().score; |
| } |
| sugWord = new SuggestWord(); |
| } |
| |
| // convert to array string |
| string[] list = new string[sugQueue.Size()]; |
| for (int i = sugQueue.Size() - 1; i >= 0; i--) |
| { |
| list[i] = sugQueue.Pop().@string; |
| } |
| |
| return list; |
| } |
| finally |
| { |
| ReleaseSearcher(indexSearcher); |
| } |
| } |
| /// <summary> |
| /// Add a clause to a boolean query. |
| /// </summary> |
| private static void Add(BooleanQuery q, string name, string value, float boost) |
| { |
| Query tq = new TermQuery(new Term(name, value)); |
| tq.Boost = boost; |
| q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD)); |
| } |
| |
| /// <summary> |
| /// Add a clause to a boolean query. |
| /// </summary> |
| private static void Add(BooleanQuery q, string name, string value) |
| { |
| q.Add(new BooleanClause(new TermQuery(new Term(name, value)), BooleanClause.Occur.SHOULD)); |
| } |
| |
| /// <summary> |
| /// Form all ngrams for a given word. </summary> |
| /// <param name="text"> the word to parse </param> |
| /// <param name="ng"> the ngram length e.g. 3 </param> |
| /// <returns> an array of all ngrams in the word and note that duplicates are not removed </returns> |
| private static string[] FormGrams(string text, int ng) |
| { |
| int len = text.Length; |
| string[] res = new string[len - ng + 1]; |
| for (int i = 0; i < len - ng + 1; i++) |
| { |
| res[i] = text.Substring(i, ng); |
| } |
| return res; |
| } |
| |
| /// <summary> |
| /// Removes all terms from the spell check index. </summary> |
| /// <exception cref="IOException"> If there is a low-level I/O error. </exception> |
| /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> |
| public virtual void ClearIndex() |
| { |
| lock (modifyCurrentIndexLock) |
| { |
| EnsureOpen(); |
| var dir = this.spellIndex; |
| using (var writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null) |
| .SetOpenMode(IndexWriterConfig.OpenMode.CREATE))) { } |
| SwapSearcher(dir); |
| } |
| } |
| |
| /// <summary> |
| /// Check whether the word exists in the index. </summary> |
| /// <param name="word"> word to check </param> |
| /// <exception cref="IOException"> If there is a low-level I/O error. </exception> |
| /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> |
| /// <returns> true if the word exists in the index </returns> |
| public virtual bool Exist(string word) |
| { |
| // obtainSearcher calls ensureOpen |
| IndexSearcher indexSearcher = ObtainSearcher(); |
| try |
| { |
| // TODO: we should use ReaderUtil+seekExact, we dont care about the docFreq |
| // this is just an existence check |
| return indexSearcher.IndexReader.DocFreq(new Term(F_WORD, word)) > 0; |
| } |
| finally |
| { |
| ReleaseSearcher(indexSearcher); |
| } |
| } |
| |
| /// <summary> |
| /// Indexes the data from the given <seealso cref="Dictionary"/>. </summary> |
| /// <param name="dict"> Dictionary to index </param> |
| /// <param name="config"> <seealso cref="IndexWriterConfig"/> to use </param> |
| /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param> |
| /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> |
| /// <exception cref="IOException"> If there is a low-level I/O error. </exception> |
| public void IndexDictionary(Dictionary dict, IndexWriterConfig config, bool fullMerge) |
| { |
| lock (modifyCurrentIndexLock) |
| { |
| EnsureOpen(); |
| Directory dir = this.spellIndex; |
| using (var writer = new IndexWriter(dir, config)) |
| { |
| IndexSearcher indexSearcher = ObtainSearcher(); |
| IList<TermsEnum> termsEnums = new List<TermsEnum>(); |
| |
| IndexReader reader = searcher.IndexReader; |
| if (reader.MaxDoc() > 0) |
| { |
| foreach (AtomicReaderContext ctx in reader.Leaves()) |
| { |
| Terms terms = ctx.Reader().Terms(F_WORD); |
| if (terms != null) |
| { |
| termsEnums.Add(terms.Iterator(null)); |
| } |
| } |
| } |
| |
| bool isEmpty = termsEnums.Count == 0; |
| |
| try |
| { |
| BytesRefIterator iter = dict.EntryIterator; |
| BytesRef currentTerm; |
| |
| while ((currentTerm = iter.Next()) != null) |
| { |
| |
| string word = currentTerm.Utf8ToString(); |
| int len = word.Length; |
| if (len < 3) |
| { |
| continue; // too short we bail but "too long" is fine... |
| } |
| |
| if (!isEmpty) |
| { |
| foreach (TermsEnum te in termsEnums) |
| { |
| if (te.SeekExact(currentTerm)) |
| { |
| goto termsContinue; |
| } |
| } |
| } |
| |
| // ok index the word |
| var doc = CreateDocument(word, GetMin(len), GetMax(len)); |
| writer.AddDocument(doc); |
| termsContinue: |
| ; |
| } |
| termsBreak: |
| ; |
| } |
| finally |
| { |
| ReleaseSearcher(indexSearcher); |
| } |
| if (fullMerge) |
| { |
| writer.ForceMerge(1); |
| } |
| } |
| // TODO: this isn't that great, maybe in the future SpellChecker should take |
| // IWC in its ctor / keep its writer open? |
| |
| // also re-open the spell index to see our own changes when the next suggestion |
| // is fetched: |
| SwapSearcher(dir); |
| } |
| } |
| |
| private static int GetMin(int l) |
| { |
| if (l > 5) |
| { |
| return 3; |
| } |
| if (l == 5) |
| { |
| return 2; |
| } |
| return 1; |
| } |
| |
| private static int GetMax(int l) |
| { |
| if (l > 5) |
| { |
| return 4; |
| } |
| if (l == 5) |
| { |
| return 3; |
| } |
| return 2; |
| } |
| |
| private static Document CreateDocument(string text, int ng1, int ng2) |
| { |
| var doc = new Document(); |
| // the word field is never queried on... its indexed so it can be quickly |
| // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos |
| Field f = new StringField(F_WORD, text, Field.Store.YES); |
| doc.Add(f); // orig term |
| AddGram(text, doc, ng1, ng2); |
| return doc; |
| } |
| |
| private static void AddGram(string text, Document doc, int ng1, int ng2) |
| { |
| int len = text.Length; |
| for (int ng = ng1; ng <= ng2; ng++) |
| { |
| string key = "gram" + ng; |
| string end = null; |
| for (int i = 0; i < len - ng + 1; i++) |
| { |
| string gram = text.Substring(i, ng); |
| FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); |
| ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; |
| Field ngramField = new Field(key, gram, ft); |
| // spellchecker does not use positional queries, but we want freqs |
| // for scoring these multivalued n-gram fields. |
| doc.Add(ngramField); |
| if (i == 0) |
| { |
| // only one term possible in the startXXField, TF/pos and norms aren't needed. |
| Field startField = new StringField("start" + ng, gram, Field.Store.NO); |
| doc.Add(startField); |
| } |
| end = gram; |
| } |
| if (end != null) // may not be present if len==ng1 |
| { |
| // only one term possible in the endXXField, TF/pos and norms aren't needed. |
| Field endField = new StringField("end" + ng, end, Field.Store.NO); |
| doc.Add(endField); |
| } |
| } |
| } |
| |
| private IndexSearcher ObtainSearcher() |
| { |
| lock (searcherLock) |
| { |
| EnsureOpen(); |
| searcher.IndexReader.IncRef(); |
| return searcher; |
| } |
| } |
| |
| private void ReleaseSearcher(IndexSearcher aSearcher) |
| { |
| // don't check if open - always decRef |
| // don't decrement the private searcher - could have been swapped |
| aSearcher.IndexReader.DecRef(); |
| } |
| |
| private void EnsureOpen() |
| { |
| if (closed) |
| { |
| throw new AlreadyClosedException("Spellchecker has been closed"); |
| } |
| } |
| |
| /// <summary> |
| /// Close the IndexSearcher used by this SpellChecker </summary> |
| /// <exception cref="IOException"> if the close operation causes an <seealso cref="IOException"/> </exception> |
| /// <exception cref="AlreadyClosedException"> if the <seealso cref="SpellChecker"/> is already closed </exception> |
| public void Dispose() |
| { |
| lock (searcherLock) |
| { |
| EnsureOpen(); |
| closed = true; |
| if (searcher != null) |
| { |
| searcher.IndexReader.Dispose(); |
| } |
| searcher = null; |
| } |
| } |
| |
| private void SwapSearcher(Directory dir) |
| { |
| /* |
| * opening a searcher is possibly very expensive. |
| * We rather close it again if the Spellchecker was closed during |
| * this operation than block access to the current searcher while opening. |
| */ |
| IndexSearcher indexSearcher = CreateSearcher(dir); |
| lock (searcherLock) |
| { |
| if (closed) |
| { |
| indexSearcher.IndexReader.Dispose(); |
| throw new AlreadyClosedException("Spellchecker has been closed"); |
| } |
| if (searcher != null) |
| { |
| searcher.IndexReader.Dispose(); |
| } |
| // set the spellindex in the sync block - ensure consistency. |
| searcher = indexSearcher; |
| this.spellIndex = dir; |
| } |
| } |
| |
| /// <summary> |
| /// Creates a new read-only IndexSearcher </summary> |
| /// <param name="dir"> the directory used to open the searcher </param> |
| /// <returns> a new read-only IndexSearcher </returns> |
| /// <exception cref="IOException"> f there is a low-level IO error </exception> |
| // for testing purposes |
| internal virtual IndexSearcher CreateSearcher(Directory dir) |
| { |
| return new IndexSearcher(DirectoryReader.Open(dir)); |
| } |
| |
| /// <summary> |
| /// Returns <code>true</code> if and only if the <seealso cref="SpellChecker"/> is |
| /// closed, otherwise <code>false</code>. |
| /// </summary> |
| /// <returns> <code>true</code> if and only if the <seealso cref="SpellChecker"/> is |
| /// closed, otherwise <code>false</code>. </returns> |
| internal virtual bool Closed |
| { |
| get |
| { |
| return closed; |
| } |
| } |
| |
| } |
| } |