blob: 47450f1ca85bdd9b0734b5d5fb04fed53ff52725 [file] [log] [blame]
using J2N;
using J2N.Text;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.En;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Util;
using NUnit.Framework;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace Lucene.Net.Analysis.Miscellaneous
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestStemmerOverrideFilter : BaseTokenStreamTestCase
{
[Test]
public virtual void TestOverride()
{
// lets make booked stem to books
// the override filter will convert "booked" to "books",
// but also mark it with KeywordAttribute so Porter will not change it.
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder();
builder.Add("booked", "books");
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.Build()));
AssertTokenStreamContents(stream, new string[] { "books" });
}
[Test]
public virtual void TestIgnoreCase()
{
// lets make booked stem to books
// the override filter will convert "booked" to "books",
// but also mark it with KeywordAttribute so Porter will not change it.
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
builder.Add("boOkEd", "books");
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.Build()));
AssertTokenStreamContents(stream, new string[] { "books" });
}
[Test]
public virtual void TestNoOverrides()
{
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.Build()));
AssertTokenStreamContents(stream, new string[] { "book" });
}
[Test]
public virtual void TestRandomRealisticWhiteSpace()
{
IDictionary<string, string> map = new Dictionary<string, string>();
int numTerms = AtLeast(50);
for (int i = 0; i < numTerms; i++)
{
string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(Random);
char[] charArray = randomRealisticUnicodeString.ToCharArray();
StringBuilder sb = new StringBuilder();
for (int j = 0; j < charArray.Length;)
{
int cp = Character.CodePointAt(charArray, j, charArray.Length);
if (!char.IsWhiteSpace((char)cp))
{
sb.AppendCodePoint(cp);
}
j += Character.CharCount(cp);
}
if (sb.Length > 0)
{
string value = TestUtil.RandomSimpleString(Random);
map[sb.ToString()] = value.Length == 0 ? "a" : value;
}
}
if (map.Count == 0)
{
map["booked"] = "books";
}
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(Random.nextBoolean());
IDictionary<string, string> entrySet = map;
StringBuilder input = new StringBuilder();
IList<string> output = new List<string>();
foreach (KeyValuePair<string, string> entry in entrySet)
{
builder.Add(entry.Key, entry.Value);
if (Random.nextBoolean() || output.Count == 0)
{
input.Append(entry.Key).Append(" ");
output.Add(entry.Value);
}
}
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input.ToString()));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.Build()));
AssertTokenStreamContents(stream, output.ToArray());
}
[Test]
public virtual void TestRandomRealisticKeyword()
{
IDictionary<string, string> map = new Dictionary<string, string>();
int numTerms = AtLeast(50);
for (int i = 0; i < numTerms; i++)
{
string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(Random);
if (randomRealisticUnicodeString.Length > 0)
{
string value = TestUtil.RandomSimpleString(Random);
map[randomRealisticUnicodeString] = value.Length == 0 ? "a" : value;
}
}
if (map.Count == 0)
{
map["booked"] = "books";
}
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(Random.nextBoolean());
IDictionary<string, string> entrySet = map;
foreach (KeyValuePair<string, string> entry in entrySet)
{
builder.Add(entry.Key, entry.Value);
}
StemmerOverrideFilter.StemmerOverrideMap build = builder.Build();
foreach (KeyValuePair<string, string> entry in entrySet)
{
if (Random.nextBoolean())
{
Tokenizer tokenizer = new KeywordTokenizer(new StringReader(entry.Key));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, build));
AssertTokenStreamContents(stream, new string[] { entry.Value });
}
}
}
}
}