blob: a144d333ded19230730831b40c92ed9fd70be3e1 [file] [log] [blame]
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
namespace Lucene.Net.Search.Spell
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestDirectSpellChecker : LuceneTestCase
{
[Test]
public void TestInternalLevenshteinDistance()
{
DirectSpellChecker spellchecker = new DirectSpellChecker();
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir,
new MockAnalyzer(Random, MockTokenizer.KEYWORD, true));
string[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoi𐑍" };
for (int i = 0; i < termsToAdd.Length; i++)
{
Document doc = new Document();
doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO));
writer.AddDocument(doc);
}
IndexReader ir = writer.GetReader();
string misspelled = "metanoix";
SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.Length == 4);
IStringDistance sd = spellchecker.Distance;
assertTrue(sd is LuceneLevenshteinDistance);
foreach (SuggestWord word in similar)
{
assertTrue(word.Score == sd.GetDistance(word.String, misspelled));
assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()?
}
ir.Dispose();
writer.Dispose();
dir.Dispose();
}
[Test]
public void TestSimpleExamples()
{
DirectSpellChecker spellChecker = new DirectSpellChecker();
spellChecker.MinQueryLength = (0);
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir,
new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
for (int i = 0; i < 20; i++)
{
Document doc = new Document();
doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
writer.AddDocument(doc);
}
IndexReader ir = writer.GetReader();
SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("numbers",
"fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.Length > 0);
assertEquals("five", similar[0].String);
similar = spellChecker.SuggestSimilar(new Term("numbers", "five"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
if (similar.Length > 0)
{
assertFalse(similar[0].String.Equals("five", StringComparison.Ordinal)); // don't suggest a word for itself
}
similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.Length > 0);
assertEquals("five", similar[0].String);
similar = spellChecker.SuggestSimilar(new Term("numbers", "fiv"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.Length > 0);
assertEquals("five", similar[0].String);
similar = spellChecker.SuggestSimilar(new Term("numbers", "fives"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.Length > 0);
assertEquals("five", similar[0].String);
assertTrue(similar.Length > 0);
similar = spellChecker.SuggestSimilar(new Term("numbers", "fie"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals("five", similar[0].String);
// add some more documents
for (int i = 1000; i < 1100; i++)
{
Document doc = new Document();
doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
writer.AddDocument(doc);
}
ir.Dispose();
ir = writer.GetReader();
// look ma, no spellcheck index rebuild
similar = spellChecker.SuggestSimilar(new Term("numbers", "tousand"), 10,
ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertTrue(similar.Length > 0);
assertEquals("thousand", similar[0].String);
ir.Dispose();
writer.Dispose();
dir.Dispose();
}
[Test]
public void TestOptions()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir,
new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
Document doc = new Document();
doc.Add(NewTextField("text", "foobar", Field.Store.NO));
writer.AddDocument(doc);
doc.Add(NewTextField("text", "foobar", Field.Store.NO));
writer.AddDocument(doc);
doc.Add(NewTextField("text", "foobaz", Field.Store.NO));
writer.AddDocument(doc);
doc.Add(NewTextField("text", "fobar", Field.Store.NO));
writer.AddDocument(doc);
IndexReader ir = writer.GetReader();
DirectSpellChecker spellChecker = new DirectSpellChecker();
spellChecker.MaxQueryFrequency = (0F);
SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("text",
"fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.Length);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.MinQueryLength = (5);
similar = spellChecker.SuggestSimilar(new Term("text", "foba"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.Length);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.MaxEdits = (1);
similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.Length);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.Accuracy = (0.9F);
similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.Length);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.MinPrefix = (0);
similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(1, similar.Length);
similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.MinPrefix = (1);
similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
SuggestMode.SUGGEST_MORE_POPULAR);
assertEquals(0, similar.Length);
spellChecker = new DirectSpellChecker(); // reset defaults
spellChecker.MaxEdits = (2);
similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 2, ir,
SuggestMode.SUGGEST_ALWAYS);
assertEquals(2, similar.Length);
ir.Dispose();
writer.Dispose();
dir.Dispose();
}
[Test]
public void TestBogusField()
{
DirectSpellChecker spellChecker = new DirectSpellChecker();
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir,
new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
for (int i = 0; i < 20; i++)
{
Document doc = new Document();
doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
writer.AddDocument(doc);
}
IndexReader ir = writer.GetReader();
SuggestWord[] similar = spellChecker.SuggestSimilar(new Term(
"bogusFieldBogusField", "fvie"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.Length);
ir.Dispose();
writer.Dispose();
dir.Dispose();
}
// simple test that transpositions work, we suggest five for fvie with ed=1
[Test]
public void TestTransposition()
{
DirectSpellChecker spellChecker = new DirectSpellChecker();
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir,
new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
for (int i = 0; i < 20; i++)
{
Document doc = new Document();
doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
writer.AddDocument(doc);
}
IndexReader ir = writer.GetReader();
SuggestWord[] similar = spellChecker.SuggestSimilar(new Term(
"numbers", "fvie"), 1, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.Length);
assertEquals("five", similar[0].String);
ir.Dispose();
writer.Dispose();
dir.Dispose();
}
// simple test that transpositions work, we suggest seventeen for seevntene with ed=2
[Test]
public void TestTransposition2()
{
DirectSpellChecker spellChecker = new DirectSpellChecker();
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir,
new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
for (int i = 0; i < 20; i++)
{
Document doc = new Document();
doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
writer.AddDocument(doc);
}
IndexReader ir = writer.GetReader();
SuggestWord[] similar = spellChecker.SuggestSimilar(new Term(
"numbers", "seevntene"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.Length);
assertEquals("seventeen", similar[0].String);
ir.Dispose();
writer.Dispose();
dir.Dispose();
}
}
}