blob: 656dba59f2cf8228f43b69ea77f83843ba726258 [file] [log] [blame]
using J2N.Runtime.CompilerServices;
using J2N.Text;
using Lucene.Net.Analysis.CharFilters;
using Lucene.Net.Analysis.Cjk;
using Lucene.Net.Analysis.CommonGrams;
using Lucene.Net.Analysis.Compound;
using Lucene.Net.Analysis.Compound.Hyphenation;
using Lucene.Net.Analysis.Hunspell;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.No;
using Lucene.Net.Analysis.Path;
using Lucene.Net.Analysis.Payloads;
using Lucene.Net.Analysis.Snowball;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Synonym;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Analysis.Wikipedia;
using Lucene.Net.Diagnostics;
using Lucene.Net.Support;
using Lucene.Net.Tartarus.Snowball;
using Lucene.Net.TestFramework.Analysis;
using Lucene.Net.Util;
using Lucene.Net.Util.Automaton;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using Console = Lucene.Net.Util.SystemConsole;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Analysis.Core
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// tests random analysis chains </summary>
public class TestRandomChains : BaseTokenStreamTestCase
{
internal static List<ConstructorInfo> tokenizers;
internal static List<ConstructorInfo> tokenfilters;
internal static List<ConstructorInfo> charfilters;
private interface IPredicate<T>
{
bool Apply(T o);
}
private static readonly IPredicate<object[]> ALWAYS = new PredicateAnonymousClass();
private class PredicateAnonymousClass : IPredicate<object[]>
{
public PredicateAnonymousClass()
{
}
public virtual bool Apply(object[] args)
{
return true;
}
}
private static readonly IDictionary<ConstructorInfo, IPredicate<object[]>> brokenConstructors = new Dictionary<ConstructorInfo, IPredicate<object[]>>();
// TODO: also fix these and remove (maybe):
// Classes/options that don't produce consistent graph offsets:
private static readonly IDictionary<ConstructorInfo, IPredicate<object[]>> brokenOffsetsConstructors = new Dictionary<ConstructorInfo, IPredicate<object[]>>();
internal static readonly ISet<Type> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
[System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1810:Initialize reference type static fields inline", Justification = "Complexity")]
static TestRandomChains()
{
try
{
brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int) })] = ALWAYS;
brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int), typeof(bool) })] = new PredicateAnonymousClass2();
brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int) })] = ALWAYS;
brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int), typeof(bool) })] = new PredicateAnonymousClass3();
foreach (Type c in new Type[] {
// TODO: can we promote some of these to be only
// offsets offenders?
// doesn't actual reset itself:
typeof(CachingTokenFilter),
// Not broken, simulates brokenness:
typeof(CrankyTokenFilter),
// Not broken: we forcefully add this, so we shouldn't
// also randomly pick it:
typeof(ValidatingTokenFilter)
})
{
foreach (ConstructorInfo ctor in c.GetConstructors())
{
brokenConstructors[ctor] = ALWAYS;
}
}
}
catch (Exception e)
{
throw new Exception(e.Message, e);
}
try
{
foreach (Type c in new Type[] {
typeof(ReversePathHierarchyTokenizer),
typeof(PathHierarchyTokenizer),
// TODO: it seems to mess up offsets!?
typeof(WikipediaTokenizer),
// TODO: doesn't handle graph inputs
typeof(CJKBigramFilter),
// TODO: doesn't handle graph inputs (or even look at positionIncrement)
typeof(HyphenatedWordsFilter),
// TODO: LUCENE-4983
typeof(CommonGramsFilter),
// TODO: doesn't handle graph inputs
typeof(CommonGramsQueryFilter),
// TODO: probably doesnt handle graph inputs, too afraid to try
typeof(WordDelimiterFilter) })
{
foreach (ConstructorInfo ctor in c.GetConstructors())
{
brokenOffsetsConstructors[ctor] = ALWAYS;
}
}
}
catch (Exception e)
{
throw new Exception(e.Message, e);
}
allowedTokenizerArgs = new JCG.HashSet<Type>(IdentityEqualityComparer<Type>.Default);
allowedTokenizerArgs.addAll(argProducers.Keys);
allowedTokenizerArgs.Add(typeof(TextReader));
allowedTokenizerArgs.Add(typeof(AttributeSource.AttributeFactory));
allowedTokenizerArgs.Add(typeof(AttributeSource));
allowedTokenFilterArgs = new JCG.HashSet<Type>(IdentityEqualityComparer<Type>.Default);
allowedTokenFilterArgs.addAll(argProducers.Keys);
allowedTokenFilterArgs.Add(typeof(TokenStream));
// TODO: fix this one, thats broken:
allowedTokenFilterArgs.Add(typeof(CommonGramsFilter));
allowedCharFilterArgs = new JCG.HashSet<Type>(IdentityEqualityComparer<Type>.Default);
allowedCharFilterArgs.addAll(argProducers.Keys);
allowedCharFilterArgs.Add(typeof(TextReader));
}
private class PredicateAnonymousClass2 : IPredicate<object[]>
{
public PredicateAnonymousClass2()
{
}
public virtual bool Apply(object[] args)
{
if (Debugging.AssertsEnabled) Debugging.Assert(args.Length == 3);
return !((bool)args[2]); // args are broken if consumeAllTokens is false
}
}
private class PredicateAnonymousClass3 : IPredicate<object[]>
{
public PredicateAnonymousClass3()
{
}
public virtual bool Apply(object[] args)
{
if (Debugging.AssertsEnabled) Debugging.Assert(args.Length == 3);
return !((bool)args[2]); // args are broken if consumeAllTokens is false
}
}
[OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
IEnumerable<Type> analysisClasses = typeof(StandardAnalyzer).Assembly.GetTypes()
.Where(c => {
var typeInfo = c;
return !typeInfo.IsAbstract && typeInfo.IsPublic && !typeInfo.IsInterface
&& typeInfo.IsClass && (typeInfo.GetCustomAttribute<ObsoleteAttribute>() == null)
&& (typeInfo.IsSubclassOf(typeof(Tokenizer)) || typeInfo.IsSubclassOf(typeof(TokenFilter)) || typeInfo.IsSubclassOf(typeof(CharFilter)));
})
.ToArray();
tokenizers = new List<ConstructorInfo>();
tokenfilters = new List<ConstructorInfo>();
charfilters = new List<ConstructorInfo>();
foreach (Type c in analysisClasses)
{
foreach (ConstructorInfo ctor in c.GetConstructors())
{
if (ctor.GetCustomAttribute<ObsoleteAttribute>() != null || (brokenConstructors.ContainsKey(ctor) && brokenConstructors[ctor] == ALWAYS))
{
continue;
}
var typeInfo = c;
if (typeInfo.IsSubclassOf(typeof(Tokenizer)))
{
assertTrue(ctor.ToString() + " has unsupported parameter types",
allowedTokenizerArgs.containsAll(ctor.GetParameters().Select(p => p.ParameterType).ToArray()));
tokenizers.Add(ctor);
}
else if (typeInfo.IsSubclassOf(typeof(TokenFilter)))
{
assertTrue(ctor.ToString() + " has unsupported parameter types",
allowedTokenFilterArgs.containsAll(ctor.GetParameters().Select(p => p.ParameterType).ToArray()));
tokenfilters.Add(ctor);
}
else if (typeInfo.IsSubclassOf(typeof(CharFilter)))
{
assertTrue(ctor.ToString() + " has unsupported parameter types",
allowedCharFilterArgs.containsAll(ctor.GetParameters().Select(p => p.ParameterType).ToArray()));
charfilters.Add(ctor);
}
else
{
fail("Cannot get here");
}
}
}
IComparer<ConstructorInfo> ctorComp = Comparer<ConstructorInfo>.Create((arg0, arg1)=> arg0.ToString().CompareToOrdinal(arg1.ToString()));
tokenizers.Sort(ctorComp);
tokenfilters.Sort(ctorComp);
charfilters.Sort(ctorComp);
if (Verbose)
{
Console.WriteLine("tokenizers = " + tokenizers);
Console.WriteLine("tokenfilters = " + tokenfilters);
Console.WriteLine("charfilters = " + charfilters);
}
}
[OneTimeTearDown]
public override void AfterClass()
{
tokenizers = null;
tokenfilters = null;
charfilters = null;
base.AfterClass();
}
private interface IArgProducer
{
object Create(Random random);
}
private static readonly IDictionary<Type, IArgProducer> argProducers = new JCG.Dictionary<Type, IArgProducer>(IdentityEqualityComparer<Type>.Default)
{
{ typeof(int), new IntArgProducer() },
{ typeof(char), new CharArgProducer() },
{ typeof(float), new FloatArgProducer() },
{ typeof(bool), new BooleanArgProducer() },
{ typeof(byte), new ByteArgProducer() },
{ typeof(byte[]), new ByteArrayArgProducer() },
{ typeof(sbyte[]), new SByteArrayArgProducer() },
{ typeof(Random), new RandomArgProducer() },
{ typeof(LuceneVersion), new VersionArgProducer() },
{ typeof(IEnumerable<string>), new StringEnumerableArgProducer() },
{ typeof(ICollection<string>), new StringEnumerableArgProducer() },
{ typeof(ICollection<char[]>), new CharArrayCollectionArgProducer() },// CapitalizationFilter
{ typeof(CharArraySet), new CharArraySetArgProducer() },
{ typeof(Regex), new RegexArgProducer() },
{ typeof(Regex[]), new RegexArrayArgProducer() },
{ typeof(IPayloadEncoder), new PayloadEncoderArgProducer() },
{ typeof(Dictionary), new DictionaryArgProducer() },
#pragma warning disable 612, 618
{ typeof(Lucene43EdgeNGramTokenizer.Side), new Lucene43SideArgProducer() },
#pragma warning restore 612, 618
{ typeof(EdgeNGramTokenFilter.Side), new SideArgProducer() },
{ typeof(HyphenationTree), new HyphenationTreeArgProducer() },
{ typeof(SnowballProgram), new SnowballProgramArgProducer() },
{ typeof(string), new StringArgProducer() },
{ typeof(NormalizeCharMap), new NormalizeCharMapArgProducer() },
{ typeof(CharacterRunAutomaton), new CharacterRunAutomatonArgProducer() },
{ typeof(CharArrayMap<string>), new StringCharArrayMapArgProducer() },
{ typeof(StemmerOverrideFilter.StemmerOverrideMap), new StemmerOverrideMapArgProducer() },
{ typeof(SynonymMap), new SynonymMapArgProducer() },
{ typeof(WordDelimiterFlags), new AnonymousProducer((random) => {
int max = Enum.GetValues(typeof(WordDelimiterFlags)).Cast<int>().Sum();
return (WordDelimiterFlags)random.Next(0, max + 1);
}) }, // WordDelimiterFilter
{ typeof(NorwegianStandard), new AnonymousProducer((random) => {
int max = Enum.GetValues(typeof(NorwegianStandard)).Cast<int>().Sum();
return (NorwegianStandard)random.Next(0, max + 1);
}) },
{ typeof(CJKScript), new AnonymousProducer((random) => {
int max = Enum.GetValues(typeof(CJKScript)).Cast<int>().Sum();
return (CJKScript)random.Next(0, max + 1);
}) },
{ typeof(CultureInfo), new AnonymousProducer((random) => {
return LuceneTestCase.RandomCulture(random);
}) },
};
private class IntArgProducer : IArgProducer
{
public object Create(Random random)
{
// TODO: could cause huge ram usage to use full int range for some filters
// (e.g. allocate enormous arrays)
// return Integer.valueOf(random.nextInt());
return TestUtil.NextInt32(random, -100, 100);
}
}
private class AnonymousProducer : IArgProducer
{
private readonly Func<Random, object> create;
public AnonymousProducer(Func<Random, object> create)
{
this.create = create ?? throw new ArgumentNullException(nameof(create));
}
public object Create(Random random)
{
return create(random);
}
}
private class CharArgProducer : IArgProducer
{
public object Create(Random random)
{
// TODO: fix any filters that care to throw IAE instead.
// also add a unicode validating filter to validate termAtt?
// return Character.valueOf((char)random.nextInt(65536));
while (true)
{
char c = (char)random.nextInt(65536);
if (c < '\uD800' || c > '\uDFFF')
{
return c;
}
}
}
}
private class FloatArgProducer : IArgProducer
{
public object Create(Random random)
{
return (float)random.NextDouble();
}
}
private class BooleanArgProducer : IArgProducer
{
public object Create(Random random)
{
return random.nextBoolean();
}
}
private class ByteArgProducer : IArgProducer
{
public object Create(Random random)
{
// this wraps to negative when casting to byte
return (byte)random.nextInt(256);
}
}
private class ByteArrayArgProducer : IArgProducer
{
public object Create(Random random)
{
byte[] bytes = new byte[random.nextInt(256)];
random.NextBytes(bytes);
return bytes;
}
}
private class SByteArrayArgProducer : IArgProducer
{
public object Create(Random random)
{
byte[] bytes = new byte[random.nextInt(256)];
random.NextBytes(bytes);
return (sbyte[])(Array)bytes;
}
}
private class RandomArgProducer : IArgProducer
{
public object Create(Random random)
{
return new Random(random.Next());
}
}
private class VersionArgProducer : IArgProducer
{
public object Create(Random random)
{
// we expect bugs in emulating old versions
return TEST_VERSION_CURRENT;
}
}
private class StringEnumerableArgProducer : IArgProducer
{
public object Create(Random random)
{
// TypeTokenFilter
ISet<string> set = new JCG.HashSet<string>();
int num = random.nextInt(5);
for (int i = 0; i < num; i++)
{
set.Add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.Length)]);
}
return set;
}
}
private class CharArrayCollectionArgProducer : IArgProducer
{
public object Create(Random random)
{
// CapitalizationFilter
ICollection<char[]> col = new List<char[]>();
int num = random.nextInt(5);
for (int i = 0; i < num; i++)
{
col.Add(TestUtil.RandomSimpleString(random).toCharArray());
}
return col;
}
}
private class CharArraySetArgProducer : IArgProducer
{
public object Create(Random random)
{
int num = random.nextInt(10);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());
for (int i = 0; i < num; i++)
{
// TODO: make nastier
set.add(TestUtil.RandomSimpleString(random));
}
return set;
}
}
private class RegexArgProducer : IArgProducer
{
public object Create(Random random)
{
// TODO: don't want to make the exponentially slow ones Dawid documents
// in TestPatternReplaceFilter, so dont use truly random patterns (for now)
return new Regex("a", RegexOptions.Compiled);
}
}
private class RegexArrayArgProducer : IArgProducer
{
public object Create(Random random)
{
return new Regex[] { new Regex("([a-z]+)", RegexOptions.Compiled), new Regex("([0-9]+)", RegexOptions.Compiled) };
}
}
private class PayloadEncoderArgProducer : IArgProducer
{
public object Create(Random random)
{
return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
}
}
private class DictionaryArgProducer : IArgProducer
{
public object Create(Random random)
{
// TODO: make nastier
using Stream affixStream = typeof(TestHunspellStemFilter).getResourceAsStream("simple.aff");
using Stream dictStream = typeof(TestHunspellStemFilter).getResourceAsStream("simple.dic");
try
{
return new Dictionary(affixStream, dictStream);
}
catch (Exception /*ex*/)
{
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
#pragma warning disable 162
return null; // unreachable code
#pragma warning restore 162
}
}
}
private class Lucene43SideArgProducer : IArgProducer
{
public object Create(Random random)
{
return random.nextBoolean()
#pragma warning disable 612, 618
? Lucene43EdgeNGramTokenizer.Side.FRONT
: Lucene43EdgeNGramTokenizer.Side.BACK;
#pragma warning restore 612, 618
}
}
private class SideArgProducer : IArgProducer
{
public object Create(Random random)
{
return random.nextBoolean()
? EdgeNGramTokenFilter.Side.FRONT
#pragma warning disable 612, 618
: EdgeNGramTokenFilter.Side.BACK;
#pragma warning restore 612, 618
}
}
private class HyphenationTreeArgProducer : IArgProducer
{
public object Create(Random random)
{
// TODO: make nastier
try
{
using Stream @is = typeof(TestCompoundWordTokenFilter).getResourceAsStream("da_UTF8.xml");
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
return hyphenator;
}
catch (Exception /*ex*/)
{
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
#pragma warning disable 162
return null; // unreachable code
#pragma warning restore 162
}
}
}
private class SnowballProgramArgProducer : IArgProducer
{
public object Create(Random random)
{
try
{
string lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.Length)];
Type clazz = Type.GetType("Lucene.Net.Tartarus.Snowball.Ext." + lang + "Stemmer, Lucene.Net.Analysis.Common");
return clazz.GetConstructor(new Type[0]).Invoke(new object[0]);
}
catch (Exception /*ex*/)
{
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
#pragma warning disable 162
return null; // unreachable code
#pragma warning restore 162
}
}
}
private class StringArgProducer : IArgProducer
{
public object Create(Random random)
{
// TODO: make nastier
if (random.nextBoolean())
{
// a token type
return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.Length)];
}
else
{
return TestUtil.RandomSimpleString(random);
}
}
}
private class NormalizeCharMapArgProducer : IArgProducer
{
public object Create(Random random)
{
NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
// we can't add duplicate keys, or NormalizeCharMap gets angry
ISet<string> keys = new JCG.HashSet<string>();
int num = random.nextInt(5);
//System.out.println("NormalizeCharMap=");
for (int i = 0; i < num; i++)
{
string key = TestUtil.RandomSimpleString(random);
if (!keys.contains(key) && key.Length > 0)
{
string value = TestUtil.RandomSimpleString(random);
builder.Add(key, value);
keys.add(key);
//System.out.println("mapping: '" + key + "' => '" + value + "'");
}
}
return builder.Build();
}
}
private class CharacterRunAutomatonArgProducer : IArgProducer
{
public object Create(Random random)
{
// TODO: could probably use a purely random automaton
switch (random.nextInt(5))
{
case 0: return MockTokenizer.KEYWORD;
case 1: return MockTokenizer.SIMPLE;
case 2: return MockTokenizer.WHITESPACE;
case 3: return MockTokenFilter.EMPTY_STOPSET;
default: return MockTokenFilter.ENGLISH_STOPSET;
}
}
}
private class StringCharArrayMapArgProducer : IArgProducer
{
public object Create(Random random)
{
int num = random.nextInt(10);
CharArrayMap<string> map = new CharArrayMap<string>(TEST_VERSION_CURRENT, num, random.nextBoolean());
for (int i = 0; i < num; i++)
{
// TODO: make nastier
map.Put(TestUtil.RandomSimpleString(random), TestUtil.RandomSimpleString(random));
}
return map;
}
}
private class StemmerOverrideMapArgProducer : IArgProducer
{
public object Create(Random random)
{
int num = random.nextInt(10);
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
for (int i = 0; i < num; i++)
{
string input = "";
do
{
input = TestUtil.RandomRealisticUnicodeString(random);
} while (input.Length == 0); // LUCENENET: CA1820: Test for empty strings using string length
string @out = ""; TestUtil.RandomSimpleString(random);
do
{
@out = TestUtil.RandomRealisticUnicodeString(random);
} while (@out.Length == 0); // LUCENENET: CA1820: Test for empty strings using string length
builder.Add(input, @out);
}
try
{
return builder.Build();
}
catch (Exception /*ex*/)
{
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
#pragma warning disable 162
return null; // unreachable code
#pragma warning restore 162
}
}
}
private class SynonymMapArgProducer : IArgProducer
{
public object Create(Random random)
{
SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
int numEntries = AtLeast(10);
for (int j = 0; j < numEntries; j++)
{
AddSyn(b, RandomNonEmptyString(random), RandomNonEmptyString(random), random.nextBoolean());
}
try
{
return b.Build();
}
catch (Exception /*ex*/)
{
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
#pragma warning disable 162
return null; // unreachable code
#pragma warning restore 162
}
}
private void AddSyn(SynonymMap.Builder b, string input, string output, bool keepOrig)
{
b.Add(new CharsRef(input.Replace(" +", "\u0000")),
new CharsRef(output.Replace(" +", "\u0000")),
keepOrig);
}
private string RandomNonEmptyString(Random random)
{
while (true)
{
string s = TestUtil.RandomUnicodeString(random).Trim();
if (s.Length != 0 && s.IndexOf('\u0000') == -1)
{
return s;
}
}
}
}
internal static T NewRandomArg<T>(Random random, Type paramType)
{
IArgProducer producer = argProducers[paramType];
assertNotNull("No producer for arguments of type " + paramType + " found", producer);
return (T)producer.Create(random);
}
internal static object[] NewTokenizerArgs(Random random, TextReader reader, Type[] paramTypes)
{
object[] args = new object[paramTypes.Length];
for (int i = 0; i < args.Length; i++)
{
Type paramType = paramTypes[i];
if (paramType == typeof(TextReader))
{
args[i] = reader;
}
else if (paramType == typeof(AttributeSource.AttributeFactory))
{
// TODO: maybe the collator one...???
args[i] = AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
}
else if (paramType == typeof(AttributeSource))
{
// TODO: args[i] = new AttributeSource();
// this is currently too scary to deal with!
args[i] = null; // force IAE
}
else
{
args[i] = NewRandomArg<object>(random, paramType);
}
}
return args;
}
internal static object[] NewCharFilterArgs(Random random, TextReader reader, Type[] paramTypes)
{
object[] args = new object[paramTypes.Length];
for (int i = 0; i < args.Length; i++)
{
Type paramType = paramTypes[i];
if (paramType == typeof(TextReader))
{
args[i] = reader;
}
else
{
args[i] = NewRandomArg<object>(random, paramType);
}
}
return args;
}
static object[] NewFilterArgs(Random random, TokenStream stream, Type[] paramTypes)
{
object[] args = new object[paramTypes.Length];
for (int i = 0; i < args.Length; i++)
{
Type paramType = paramTypes[i];
if (paramType == typeof(TokenStream))
{
args[i] = stream;
}
else if (paramType == typeof(CommonGramsFilter))
{
// TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, NewRandomArg<CharArraySet>(random, typeof(CharArraySet)));
}
else
{
args[i] = NewRandomArg<object>(random, paramType);
}
}
return args;
}
private class MockRandomAnalyzer : Analyzer
{
internal readonly int seed;
public MockRandomAnalyzer(int seed)
{
this.seed = seed;
}
public bool OffsetsAreCorrect
{
get
{
// TODO: can we not do the full chain here!?
Random random = new Random(seed);
TokenizerSpec tokenizerSpec = NewTokenizer(random, new StringReader(""));
TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
return filterSpec.offsetsAreCorrect;
}
}
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Random random = new Random(seed);
TokenizerSpec tokenizerSpec = NewTokenizer(random, reader);
//System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
//System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
}
protected internal override TextReader InitReader(string fieldName, TextReader reader)
{
Random random = new Random(seed);
CharFilterSpec charfilterspec = NewCharFilterChain(random, reader);
return charfilterspec.reader;
}
public override string ToString()
{
Random random = new Random(seed);
StringBuilder sb = new StringBuilder();
CharFilterSpec charFilterSpec = NewCharFilterChain(random, new StringReader(""));
sb.Append("\ncharfilters=");
sb.Append(charFilterSpec.toString);
// intentional: initReader gets its own separate random
random = new Random(seed);
TokenizerSpec tokenizerSpec = NewTokenizer(random, charFilterSpec.reader);
sb.Append("\n");
sb.Append("tokenizer=");
sb.Append(tokenizerSpec.toString);
TokenFilterSpec tokenFilterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
sb.Append("\n");
sb.Append("filters=");
sb.Append(tokenFilterSpec.toString);
sb.Append("\n");
sb.Append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
return sb.ToString();
}
private T CreateComponent<T>(ConstructorInfo ctor, object[] args, StringBuilder descr)
{
try
{
T instance = (T)ctor.Invoke(args);
/*
if (descr.length() > 0) {
descr.append(",");
}
*/
descr.append("\n ");
descr.append(ctor.DeclaringType.Name);
string @params = Arrays.ToString(args);
//@params = @params.Substring(1, (@params.Length - 1) - 1); // LUCENENET - This is causing truncation of types
descr.append("(").append(@params).append(")");
return instance;
}
catch (TargetInvocationException ite)
{
if (ite.InnerException != null && (ite.InnerException.GetType().Equals(typeof(ArgumentException))
|| ite.InnerException.GetType().Equals(typeof(ArgumentOutOfRangeException))
|| ite.InnerException.GetType().Equals(typeof(NotSupportedException))))
{
// thats ok, ignore
if (Verbose)
{
Console.WriteLine("Ignoring IAE/UOE from ctor:");
//cause.printStackTrace(System.err);
}
}
else
{
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
}
}
//catch (IllegalAccessException iae)
//{
// Rethrow.rethrow(iae);
//}
//catch (InstantiationException ie)
//{
// Rethrow.rethrow(ie);
//}
return default; // no success
}
private static bool Broken(ConstructorInfo ctor, object[] args) // LUCENENET: CA1822: Mark members as static
{
return brokenConstructors.TryGetValue(ctor, out IPredicate<object[]> pred) && pred != null && pred.Apply(args);
}
private static bool BrokenOffsets(ConstructorInfo ctor, object[] args) // LUCENENET: CA1822: Mark members as static
{
return brokenOffsetsConstructors.TryGetValue(ctor, out IPredicate<object[]> pred) && pred != null && pred.Apply(args);
}
// create a new random tokenizer from classpath
private TokenizerSpec NewTokenizer(Random random, TextReader reader)
{
TokenizerSpec spec = new TokenizerSpec();
while (spec.tokenizer == null)
{
ConstructorInfo ctor = tokenizers[random.nextInt(tokenizers.size())];
StringBuilder descr = new StringBuilder();
CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
object[] args = NewTokenizerArgs(random, wrapper, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
if (Broken(ctor, args))
{
continue;
}
spec.tokenizer = CreateComponent<Tokenizer>(ctor, args, descr);
if (spec.tokenizer != null)
{
spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args);
spec.toString = descr.toString();
}
else
{
assertFalse(ctor.DeclaringType.Name + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
}
}
return spec;
}
private CharFilterSpec NewCharFilterChain(Random random, TextReader reader)
{
CharFilterSpec spec = new CharFilterSpec();
spec.reader = reader;
StringBuilder descr = new StringBuilder();
int numFilters = random.nextInt(3);
for (int i = 0; i < numFilters; i++)
{
while (true)
{
ConstructorInfo ctor = charfilters[random.nextInt(charfilters.size())];
object[] args = NewCharFilterArgs(random, spec.reader, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
if (Broken(ctor, args))
{
continue;
}
reader = CreateComponent<TextReader>(ctor, args, descr);
if (reader != null)
{
spec.reader = reader;
break;
}
}
}
spec.toString = descr.toString();
return spec;
}
private TokenFilterSpec NewFilterChain(Random random, Tokenizer tokenizer, bool offsetsAreCorrect)
{
TokenFilterSpec spec = new TokenFilterSpec();
spec.offsetsAreCorrect = offsetsAreCorrect;
spec.stream = tokenizer;
StringBuilder descr = new StringBuilder();
int numFilters = random.nextInt(5);
for (int i = 0; i < numFilters; i++)
{
// Insert ValidatingTF after each stage so we can
// catch problems right after the TF that "caused"
// them:
spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);
while (true)
{
ConstructorInfo ctor = tokenfilters[random.nextInt(tokenfilters.size())];
// hack: MockGraph/MockLookahead has assertions that will trip if they follow
// an offsets violator. so we cant use them after e.g. wikipediatokenizer
if (!spec.offsetsAreCorrect &&
(ctor.DeclaringType.Equals(typeof(MockGraphTokenFilter)))
|| ctor.DeclaringType.Equals(typeof(MockRandomLookaheadTokenFilter)))
{
continue;
}
object[] args = NewFilterArgs(random, spec.stream, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
if (Broken(ctor, args))
{
continue;
}
TokenFilter flt = CreateComponent<TokenFilter>(ctor, args, descr);
if (flt != null)
{
spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args);
spec.stream = flt;
break;
}
}
}
// Insert ValidatingTF after each stage so we can
// catch problems right after the TF that "caused"
// them:
spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);
spec.toString = descr.toString();
return spec;
}
}
internal class CheckThatYouDidntReadAnythingReaderWrapper : CharFilter
{
internal bool readSomething;
public CheckThatYouDidntReadAnythingReaderWrapper(TextReader @in)
: base(@in)
{ }
private CharFilter Input => (CharFilter)this.m_input;
protected override int Correct(int currentOff)
{
return currentOff; // we don't change any offsets
}
public override int Read(char[] cbuf, int off, int len)
{
readSomething = true;
return m_input.Read(cbuf, off, len);
}
public override int Read()
{
readSomething = true;
return m_input.Read();
}
// LUCENENET: TextReader dosn't support this overload
//public int read(char[] cbuf)
//{
// readSomething = true;
// return input.read(cbuf);
//}
public override long Skip(int n)
{
readSomething = true;
return Input.Skip(n);
}
public override void Mark(int readAheadLimit)
{
Input.Mark(readAheadLimit);
}
public override bool IsMarkSupported => Input.IsMarkSupported;
public override bool IsReady => Input.IsReady;
public override void Reset()
{
Input.Reset();
}
}
internal class TokenizerSpec
{
internal Tokenizer tokenizer;
internal string toString;
internal bool offsetsAreCorrect = true;
}
internal class TokenFilterSpec
{
internal TokenStream stream;
internal string toString;
internal bool offsetsAreCorrect = true;
}
internal class CharFilterSpec
{
internal TextReader reader;
internal string toString;
}
[Test]
[Slow]
[AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test occasionally fails
public void TestRandomChains_()
{
int numIterations = AtLeast(20);
Random random = Random;
for (int i = 0; i < numIterations; i++)
{
MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next());
if (Verbose)
{
Console.WriteLine("Creating random analyzer:" + a);
}
try
{
CheckRandomData(random, a, 500 * RandomMultiplier, 20, false,
false /* We already validate our own offsets... */);
}
catch (Exception /*e*/)
{
Console.WriteLine("Exception from random analyzer: " + a);
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
}
}
}
// we might regret this decision...
[Test]
[Slow]
[AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test occasionally fails
public void TestRandomChainsWithLargeStrings()
{
int numIterations = AtLeast(20);
Random random = Random;
for (int i = 0; i < numIterations; i++)
{
MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next());
if (Verbose)
{
Console.WriteLine("Creating random analyzer:" + a);
}
try
{
CheckRandomData(random, a, 50 * RandomMultiplier, 128, false,
false /* We already validate our own offsets... */);
}
catch (Exception /*e*/)
{
Console.WriteLine("Exception from random analyzer: " + a);
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
}
}
}
}
}