blob: 92c323d6473d09747f59ec7bead5aeb3cf69a3f5 [file] [log] [blame]
using J2N.Threading;
using J2N.Threading.Atomic;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Threading;
namespace Lucene.Net.Search.Spell
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Spell checker test case
/// </summary>
public class TestSpellChecker : LuceneTestCase
{
private SpellCheckerMock spellChecker;
private Directory userindex, spellindex;
internal static ConcurrentBag<IndexSearcher> searchers;
public override void SetUp()
{
base.SetUp();
//create a user index
userindex = NewDirectory();
IndexWriter writer = new IndexWriter(userindex, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
for (int i = 0; i < 1000; i++)
{
Document doc = new Document();
doc.Add(NewTextField("field1", English.Int32ToEnglish(i), Field.Store.YES));
doc.Add(NewTextField("field2", English.Int32ToEnglish(i + 1), Field.Store.YES)); // + word thousand
doc.Add(NewTextField("field3", "fvei" + (i % 2 == 0 ? " five" : ""), Field.Store.YES)); // + word thousand
writer.AddDocument(doc);
}
{
Document doc = new Document();
doc.Add(NewTextField("field1", "eight", Field.Store.YES)); // "eight" in
// the index
// twice
writer.AddDocument(doc);
}
{
Document doc = new Document();
doc
.Add(NewTextField("field1", "twenty-one twenty-one", Field.Store.YES)); // "twenty-one" in the index thrice
writer.AddDocument(doc);
}
{
Document doc = new Document();
doc.Add(NewTextField("field1", "twenty", Field.Store.YES)); // "twenty"
// in the
// index
// twice
writer.AddDocument(doc);
}
writer.Dispose();
searchers = new ConcurrentBag<IndexSearcher>();
// create the spellChecker
spellindex = NewDirectory();
spellChecker = new SpellCheckerMock(spellindex);
}
public override void TearDown()
{
userindex.Dispose();
if (!spellChecker.IsDisposed)
spellChecker.Dispose();
spellindex.Dispose();
base.TearDown();
}
[Test]
public void TestBuild()
{
using (IndexReader r = DirectoryReader.Open(userindex))
{
spellChecker.ClearIndex();
Addwords(r, spellChecker, "field1");
int num_field1 = this.NumDoc();
Addwords(r, spellChecker, "field2");
int num_field2 = this.NumDoc();
assertEquals(num_field2, num_field1 + 1);
AssertLastSearcherOpen(4);
CheckCommonSuggestions(r);
CheckLevenshteinSuggestions(r);
spellChecker.StringDistance = (new JaroWinklerDistance());
spellChecker.Accuracy = (0.8f);
CheckCommonSuggestions(r);
CheckJaroWinklerSuggestions();
// the accuracy is set to 0.8 by default, but the best result has a score of 0.925
string[] similar = spellChecker.SuggestSimilar("fvie", 2, 0.93f);
assertTrue(similar.Length == 0);
similar = spellChecker.SuggestSimilar("fvie", 2, 0.92f);
assertTrue(similar.Length == 1);
similar = spellChecker.SuggestSimilar("fiv", 2);
assertTrue(similar.Length > 0);
assertEquals(similar[0], "five");
spellChecker.StringDistance = (new NGramDistance(2));
spellChecker.Accuracy = (0.5f);
CheckCommonSuggestions(r);
CheckNGramSuggestions();
}
}
[Test]
public void TestComparer()
{
using (Directory compIdx = NewDirectory())
{
SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparer());
try
{
using (IndexReader r = DirectoryReader.Open(userindex))
{
Addwords(r, compareSP, "field3");
string[] similar = compareSP.SuggestSimilar("fvie", 2, r, "field3",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.Length == 2);
//five and fvei have the same score, but different frequencies.
assertEquals("fvei", similar[0]);
assertEquals("five", similar[1]);
}
}
finally
{
if (!compareSP.IsDisposed)
compareSP.Dispose();
}
}
}
[Test]
public void TestBogusField()
{
using (Directory compIdx = NewDirectory())
{
SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparer());
try
{
using (IndexReader r = DirectoryReader.Open(userindex))
{
Addwords(r, compareSP, "field3");
string[] similar = compareSP.SuggestSimilar("fvie", 2, r,
"bogusFieldBogusField", SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.Length);
}
}
finally
{
if (!compareSP.IsDisposed)
compareSP.Dispose();
}
}
}
[Test]
public void TestSuggestModes()
{
using (IndexReader r = DirectoryReader.Open(userindex))
{
spellChecker.ClearIndex();
Addwords(r, spellChecker, "field1");
{
string[] similar = spellChecker.SuggestSimilar("eighty", 2, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.Length);
assertEquals("eighty", similar[0]);
}
{
string[] similar = spellChecker.SuggestSimilar("eight", 2, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.Length);
assertEquals("eight", similar[0]);
}
{
string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1",
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(5, similar.Length);
assertEquals("eight", similar[0]);
}
{
string[] similar = spellChecker.SuggestSimilar("twenty", 5, r, "field1",
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(1, similar.Length);
assertEquals("twenty-one", similar[0]);
}
{
string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1",
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.Length);
}
{
string[] similar = spellChecker.SuggestSimilar("eighty", 5, r, "field1",
SuggestMode.SUGGEST_ALWAYS);
assertEquals(5, similar.Length);
assertEquals("eight", similar[0]);
}
{
string[] similar = spellChecker.SuggestSimilar("eight", 5, r, "field1",
SuggestMode.SUGGEST_ALWAYS);
assertEquals(5, similar.Length);
assertEquals("eighty", similar[0]);
}
}
}
private void CheckCommonSuggestions(IndexReader r)
{
string[]
similar = spellChecker.SuggestSimilar("fvie", 2);
assertTrue(similar.Length > 0);
assertEquals(similar[0], "five");
similar = spellChecker.SuggestSimilar("five", 2);
if (similar.Length > 0)
{
assertFalse(similar[0].Equals("five", StringComparison.Ordinal)); // don't suggest a word for itself
}
similar = spellChecker.SuggestSimilar("fiv", 2);
assertTrue(similar.Length > 0);
assertEquals(similar[0], "five");
similar = spellChecker.SuggestSimilar("fives", 2);
assertTrue(similar.Length > 0);
assertEquals(similar[0], "five");
assertTrue(similar.Length > 0);
similar = spellChecker.SuggestSimilar("fie", 2);
assertEquals(similar[0], "five");
// test restraint to a field
similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.Length); // there isn't the term thousand in the field field1
similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.Length); // there is the term thousand in the field field2
}
private void CheckLevenshteinSuggestions(IndexReader r)
{
// test small word
string[]
similar = spellChecker.SuggestSimilar("fvie", 2);
assertEquals(1, similar.Length);
assertEquals(similar[0], "five");
similar = spellChecker.SuggestSimilar("five", 2);
assertEquals(1, similar.Length);
assertEquals(similar[0], "nine"); // don't suggest a word for itself
similar = spellChecker.SuggestSimilar("fiv", 2);
assertEquals(1, similar.Length);
assertEquals(similar[0], "five");
similar = spellChecker.SuggestSimilar("ive", 2);
assertEquals(2, similar.Length);
assertEquals(similar[0], "five");
assertEquals(similar[1], "nine");
similar = spellChecker.SuggestSimilar("fives", 2);
assertEquals(1, similar.Length);
assertEquals(similar[0], "five");
similar = spellChecker.SuggestSimilar("fie", 2);
assertEquals(2, similar.Length);
assertEquals(similar[0], "five");
assertEquals(similar[1], "nine");
similar = spellChecker.SuggestSimilar("fi", 2);
assertEquals(1, similar.Length);
assertEquals(similar[0], "five");
// test restraint to a field
similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.Length); // there isn't the term thousand in the field field1
similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2",
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.Length); // there is the term thousand in the field field2
similar = spellChecker.SuggestSimilar("onety", 2);
assertEquals(2, similar.Length);
assertEquals(similar[0], "ninety");
assertEquals(similar[1], "one");
try
{
similar = spellChecker.SuggestSimilar("tousand", 10, r, null,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
}
catch (NullReferenceException /*e*/)
{
assertTrue("threw an NPE, and it shouldn't have", false);
}
}
private void CheckJaroWinklerSuggestions()
{
string[]
similar = spellChecker.SuggestSimilar("onety", 2);
assertEquals(2, similar.Length);
assertEquals(similar[0], "one");
assertEquals(similar[1], "ninety");
}
private void CheckNGramSuggestions()
{
string[]
similar = spellChecker.SuggestSimilar("onety", 2);
assertEquals(2, similar.Length);
assertEquals(similar[0], "one");
assertEquals(similar[1], "ninety");
}
private void Addwords(IndexReader r, SpellChecker sc, string field)
{
long time = Environment.TickCount;
sc.IndexDictionary(new LuceneDictionary(r, field), NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false);
time = Environment.TickCount - time;
//System.out.println("time to build " + field + ": " + time);
}
private int NumDoc()
{
IndexReader rs = DirectoryReader.Open(spellindex);
int num = rs.NumDocs;
assertTrue(num != 0);
//System.out.println("num docs: " + num);
rs.Dispose();
return num;
}
[Test]
public void TestClose()
{
using (IndexReader r = DirectoryReader.Open(userindex))
{
spellChecker.ClearIndex();
string field = "field1";
Addwords(r, spellChecker, "field1");
int num_field1 = this.NumDoc();
Addwords(r, spellChecker, "field2");
int num_field2 = this.NumDoc();
assertEquals(num_field2, num_field1 + 1);
CheckCommonSuggestions(r);
AssertLastSearcherOpen(4);
spellChecker.Dispose();
AssertSearchersClosed();
// LUCENENET NOTE: Per MSDN, calling Dispose() multiple times
// should be a safe operation. http://stackoverflow.com/a/5306896/181087
// Certainly, there shouldn't be a problem with calling Dispose() within
// a using block if you decide to free up resources early.
//try
//{
// spellChecker.Dispose();
// fail("spellchecker was already closed");
//}
//catch (ObjectDisposedException e)
//{
// // expected
//}
try
{
CheckCommonSuggestions(r);
fail("spellchecker was already closed");
}
catch (ObjectDisposedException /*e*/)
{
// expected
}
try
{
spellChecker.ClearIndex();
fail("spellchecker was already closed");
}
catch (ObjectDisposedException /*e*/)
{
// expected
}
try
{
spellChecker.IndexDictionary(new LuceneDictionary(r, field), NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false);
fail("spellchecker was already closed");
}
catch (ObjectDisposedException /*e*/)
{
// expected
}
try
{
spellChecker.SetSpellIndex(spellindex);
fail("spellchecker was already closed");
}
catch (ObjectDisposedException /*e*/)
{
// expected
}
assertEquals(4, searchers.Count);
AssertSearchersClosed();
}
}
/*
* tests if the internally shared indexsearcher is correctly closed
* when the spellchecker is concurrently accessed and closed.
*/
[Test]
public void TestConcurrentAccess()
{
assertEquals(1, searchers.Count);
using (IndexReader r = DirectoryReader.Open(userindex))
{
spellChecker.ClearIndex();
assertEquals(2, searchers.Count);
Addwords(r, spellChecker, "field1");
assertEquals(3, searchers.Count);
int num_field1 = this.NumDoc();
Addwords(r, spellChecker, "field2");
assertEquals(4, searchers.Count);
int num_field2 = this.NumDoc();
assertEquals(num_field2, num_field1 + 1);
int numThreads = 5 + Random.nextInt(5);
SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
var stop = new AtomicBoolean(false);
for (int i = 0; i < numThreads; i++)
{
SpellCheckWorker spellCheckWorker = new SpellCheckWorker(this, r, stop);
workers[i] = spellCheckWorker;
spellCheckWorker.Start();
}
int iterations = 5 + Random.nextInt(5);
for (int i = 0; i < iterations; i++)
{
Thread.Sleep(100);
// concurrently reset the spell index
spellChecker.SetSpellIndex(this.spellindex);
// for debug - prints the internal open searchers
// showSearchersOpen();
}
spellChecker.Dispose();
stop.Value = true;
// wait for 60 seconds - usually this is very fast but coverage runs could take quite long
//executor.awaitTermination(60L, TimeUnit.SECONDS);
foreach (SpellCheckWorker worker in workers)
{
worker.Join((long)TimeSpan.FromSeconds(60).TotalMilliseconds);
}
for (int i = 0; i < workers.Length; i++)
{
assertFalse(string.Format(CultureInfo.InvariantCulture, "worker thread {0} failed \n" + workers[i].Error, i), workers[i].Error != null);
assertTrue(string.Format(CultureInfo.InvariantCulture, "worker thread {0} is still running but should be terminated", i), workers[i].terminated);
}
// 4 searchers more than iterations
// 1. at creation
// 2. clearIndex()
// 2. and 3. during addwords
assertEquals(iterations + 4, searchers.Count);
AssertSearchersClosed();
}
}
private void AssertLastSearcherOpen(int numSearchers)
{
assertEquals(numSearchers, searchers.Count);
// LUCENENET NOTE: The ConcurrentBag.Add() method adds each item to the
// beginning of the list, so we end up with a reverse order array.
// We can correct that here, since this is the only part of the
// test that cares about the order.
IndexSearcher[] searcherArray = searchers.Reverse().ToArray();
for (int i = 0; i < searcherArray.Length; i++)
{
if (i == searcherArray.Length - 1)
{
assertTrue("expected last searcher open but was closed",
searcherArray[i].IndexReader.RefCount > 0);
}
else
{
assertFalse("expected closed searcher but was open - Index: " + i,
searcherArray[i].IndexReader.RefCount > 0);
}
}
}
private void AssertSearchersClosed()
{
foreach (IndexSearcher searcher in searchers)
{
assertEquals(0, searcher.IndexReader.RefCount);
}
}
// For debug
// private void showSearchersOpen() {
// int count = 0;
// for (IndexSearcher searcher : searchers) {
// if(searcher.getIndexReader().getRefCount() > 0)
// ++count;
// }
// System.out.println(count);
// }
private class SpellCheckWorker : ThreadJob
{
private readonly TestSpellChecker outerInstance;
private readonly IndexReader reader;
private readonly AtomicBoolean stop;
private volatile Exception error;
internal volatile bool terminated = false;
public SpellCheckWorker(TestSpellChecker outerInstance, IndexReader reader, AtomicBoolean stop)
{
this.outerInstance = outerInstance;
this.reader = reader;
this.stop = stop;
}
public Exception Error => error;
public override void Run()
{
#if FEATURE_THREAD_PRIORITY
Priority += 1;
#endif
try
{
while (!stop)
{
try
{
outerInstance.CheckCommonSuggestions(reader);
Thread.Sleep(10);// don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
}
catch (ObjectDisposedException /*e*/)
{
return;
}
catch (Exception e)
{
e.printStackTrace();
error = e;
return;
}
}
}
finally
{
terminated = true;
}
}
}
/// <summary>
/// Grab the stack trace into a string since the exception was thrown in a thread and we want the assert
/// outside the thread to show the stack trace in case of failure.
/// </summary>
private string stackTraceStr(Exception error)
{
if (error == null)
{
return "";
}
error.printStackTrace();
return error.StackTrace;
}
internal class SpellCheckerMock : SpellChecker
{
public SpellCheckerMock(Directory spellIndex)
: base(spellIndex)
{
}
public SpellCheckerMock(Directory spellIndex, IStringDistance sd)
: base(spellIndex, sd)
{
}
public SpellCheckerMock(Directory spellIndex, IStringDistance sd, IComparer<SuggestWord> comparer)
: base(spellIndex, sd, comparer)
{
}
internal override IndexSearcher CreateSearcher(Directory dir)
{
IndexSearcher searcher = base.CreateSearcher(dir);
TestSpellChecker.searchers.Add(searcher);
return searcher;
}
}
}
}