blob: dab7c1b284ed85baa128537afa47065de27833c3 [file] [log] [blame]
using Lucene.Net.Analysis.CharFilters;
using Lucene.Net.Analysis.CommonGrams;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.Shingle;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Analysis.Wikipedia;
using NUnit.Framework;
using System;
using System.IO;
using Console = Lucene.Net.Util.SystemConsole;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Analysis.Core
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
[SuppressCodecs("Direct")]
public class TestBugInSomething : BaseTokenStreamTestCase
{
[Test]
public virtual void Test()
{
CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);
cas.add("jjp");
cas.add("wlmwoknt");
cas.add("tcgyreo");
NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
builder.Add("mtqlpi", "");
builder.Add("mwoknt", "jjp");
builder.Add("tcgyreo", "zpfpajyws");
NormalizeCharMap map = builder.Build();
Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
return new TokenStreamComponents(t, f);
}, initReader: (fieldName, reader) =>
{
reader = new MockCharFilter(reader, 0);
reader = new MappingCharFilter(map, reader);
return reader;
});
CheckAnalysisConsistency(Random, a, false, "wmgddzunizdomqyj");
}
internal CharFilter wrappedStream = new CharFilterAnonymousClass(new StringReader("bogus"));
private sealed class CharFilterAnonymousClass : CharFilter
{
public CharFilterAnonymousClass(StringReader java) : base(java)
{
}
public override void Mark(int readAheadLimit)
{
throw new NotSupportedException("Mark(int)");
}
public override bool IsMarkSupported => throw new NotSupportedException("IsMarkSupported");
public override int Read()
{
throw new NotSupportedException("Read()");
}
// LUCENENET: We don't support these overloads in .NET
// public override int Read(char[] cbuf)
// {
//throw new NotSupportedException("Read(char[])");
// }
//public override int read(CharBuffer target)
//{
// throw new NotSupportedException("Read(CharBuffer)");
//}
public override bool IsReady => throw new NotSupportedException("Ready()");
public override void Reset()
{
throw new NotSupportedException("Reset()");
}
public override long Skip(int n)
{
throw new NotSupportedException("Skip(long)");
}
protected override int Correct(int currentOff)
{
throw new NotSupportedException("Correct(int)");
}
protected override void Dispose(bool disposing)
{
throw new NotSupportedException("Close()");
}
public override int Read(char[] arg0, int arg1, int arg2)
{
throw new NotSupportedException("Read(char[], int, int)");
}
}
[Test]
public virtual void TestWrapping()
{
CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
try
{
cs.Mark(1);
fail();
}
catch (Exception e)
{
assertEquals("Mark(int)", e.Message);
}
try
{
var supported = cs.IsMarkSupported;
fail();
}
catch (Exception e)
{
assertEquals("IsMarkSupported", e.Message);
}
try
{
cs.Read();
fail();
}
catch (Exception e)
{
assertEquals("Read()", e.Message);
}
try
{
cs.read(new char[0]);
fail();
}
catch (Exception e)
{
// LUCENENET NOTE: TextReader doesn't support an overload that doesn't supply
// index and count. We have an extension method that does in test environment,
// but the error will be for the cascaded overload
//assertEquals("Read(char[])", e.Message);
assertEquals("Read(char[], int, int)", e.Message);
}
// LUCENENET NOTE: We don't have a CharBuffer type in Lucene.Net,
// nor do we have an overload that accepts it.
//try
//{
// cs.read(CharBuffer.wrap(new char[0]));
// fail();
//}
//catch (Exception e)
//{
// assertEquals("Read(CharBuffer)", e.Message);
//}
try
{
cs.Reset();
fail();
}
catch (Exception e)
{
assertEquals("Reset()", e.Message);
}
try
{
cs.Skip(1);
fail();
}
catch (Exception e)
{
assertEquals("Skip(long)", e.Message);
}
try
{
cs.CorrectOffset(1);
fail();
}
catch (Exception e)
{
assertEquals("Correct(int)", e.Message);
}
try
{
cs.Dispose();
fail();
}
catch (Exception e)
{
assertEquals("Close()", e.Message);
}
try
{
cs.Read(new char[0], 0, 0);
fail();
}
catch (Exception e)
{
assertEquals("Read(char[], int, int)", e.Message);
}
}
// todo: test framework?
internal sealed class SopTokenFilter : TokenFilter
{
internal SopTokenFilter(TokenStream input) : base(input)
{
}
public override bool IncrementToken()
{
if (m_input.IncrementToken())
{
Console.WriteLine(m_input.GetType().Name + "->" + this.ReflectAsString(false));
return true;
}
else
{
return false;
}
}
public override void End()
{
base.End();
Console.WriteLine(m_input.GetType().Name + ".end()");
}
protected override void Dispose(bool disposing)
{
base.Dispose(disposing);
if (disposing)
{
Console.WriteLine(m_input.GetType().Name + ".close()");
}
}
public override void Reset()
{
base.Reset();
Console.WriteLine(m_input.GetType().Name + ".reset()");
}
}
// LUCENE-5269
[Test]
[Slow]
public virtual void TestUnicodeShinglesAndNgrams()
{
Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, 2, 94);
//TokenStream stream = new SopTokenFilter(tokenizer);
TokenStream stream = new ShingleFilter(tokenizer, 5);
//stream = new SopTokenFilter(stream);
stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83);
//stream = new SopTokenFilter(stream);
return new TokenStreamComponents(tokenizer, stream);
});
CheckRandomData(Random, analyzer, 2000);
}
[Test]
public virtual void TestCuriousWikipediaString()
{
CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new JCG.HashSet<string> { "rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha" }, false);
byte[] table = (byte[])(Array)new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
Tokenizer tokenizer = new WikipediaTokenizer(reader);
TokenStream stream = new SopTokenFilter(tokenizer);
stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, (WordDelimiterFlags)(object)-50, protWords);
stream = new SopTokenFilter(stream);
return new TokenStreamComponents(tokenizer, stream);
});
CheckAnalysisConsistency(Random, a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
}
}
}