src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs - lucenenet - Git at Google

 using J2N.Threading;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Documents;
 using Lucene.Net.Index;
 using Lucene.Net.Support;
 using Lucene.Net.TestFramework;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.Globalization;
 using System.IO;
 using System.Linq;
 using System.Text;
 using System.Threading;
 using AttributeFactory = Lucene.Net.Util.AttributeSource.AttributeFactory;
 using Assert = Lucene.Net.TestFramework.Assert;
 using AssertionError = Lucene.Net.Diagnostics.AssertionException;
 using Attribute = Lucene.Net.Util.Attribute;
 using Directory = Lucene.Net.Store.Directory;
 using Console = Lucene.Net.Support.SystemConsole;

 namespace Lucene.Net.Analysis
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// Attribute that records if it was cleared or not.  this is used
     /// for testing that <see cref="Lucene.Net.Util.AttributeSource.ClearAttributes()"/> was called correctly.
     /// </summary>
     public interface ICheckClearAttributesAttribute : IAttribute
     {
         bool GetAndResetClearCalled();
     }

     /// <summary>
     /// Attribute that records if it was cleared or not.  this is used
     /// for testing that <see cref="Lucene.Net.Util.AttributeSource.ClearAttributes()"/> was called correctly.
     /// </summary>
     public sealed class CheckClearAttributesAttribute : Attribute, ICheckClearAttributesAttribute
     {
         private bool clearCalled = false;

         public bool GetAndResetClearCalled()
         {
             bool old = clearCalled;
             clearCalled = false;
             return old;
         }

         public override void Clear()
         {
             clearCalled = true;
         }

         public override bool Equals(object other)
         {
             return (other is CheckClearAttributesAttribute && ((CheckClearAttributesAttribute)other).clearCalled == this.clearCalled);
         }

         public override int GetHashCode()
         {
             return 76137213 ^ clearCalled.GetHashCode();
         }

         public override void CopyTo(IAttribute target)
         {
             ((CheckClearAttributesAttribute)target).Clear();
         }
     }

     /// <summary>
     /// Base class for all Lucene unit tests that use <see cref="TokenStream"/>s.
     /// <para/>
     /// When writing unit tests for analysis components, its highly recommended
     /// to use the helper methods here (especially in conjunction with <see cref="MockAnalyzer"/> or
     /// <see cref="MockTokenizer"/>), as they contain many assertions and checks to
     /// catch bugs.
     /// </summary>
     /// <seealso cref="MockAnalyzer"/>
     /// <seealso cref="MockTokenizer"/>
     public abstract class BaseTokenStreamTestCase : LuceneTestCase
 #if TESTFRAMEWORK_XUNIT
         , Xunit.IClassFixture<BeforeAfterClass>
     {
         public BaseTokenStreamTestCase(BeforeAfterClass beforeAfter)
             : base(beforeAfter)
         {
         }
 #else
     {
 #endif
 //#if TESTFRAMEWORK_MSTEST
 //        [Microsoft.VisualStudio.TestTools.UnitTesting.ClassInitializeAttribute(Microsoft.VisualStudio.TestTools.UnitTesting.InheritanceBehavior.BeforeEachDerivedClass)]
 //        new public static void BeforeClass(Microsoft.VisualStudio.TestTools.UnitTesting.TestContext context)
 //        {
 //            Lucene.Net.Util.LuceneTestCase.BeforeClass(context);
 //        }

 //        [Microsoft.VisualStudio.TestTools.UnitTesting.ClassCleanupAttribute(Microsoft.VisualStudio.TestTools.UnitTesting.InheritanceBehavior.BeforeEachDerivedClass)]
 //        new public static void AfterClass()
 //        {
 //            Lucene.Net.Util.LuceneTestCase.AfterClass();
 //        }
 //#endif

         // some helpers to test Analyzers and TokenStreams:

         // LUCENENET specific - de-nested ICheckClearAttributesAttribute

         // LUCENENET specific - de-nested CheckClearAttributesAttribute

         // offsetsAreCorrect also validates:
         //   - graph offsets are correct (all tokens leaving from
         //     pos X have the same startOffset; all tokens
         //     arriving to pos Y have the same endOffset)
         //   - offsets only move forwards (startOffset >=
         //     lastStartOffset)
         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect, byte[][] payloads)
         {
             // LUCENENET: Bug fix: NUnit throws an exception when something fails.
             // This causes Dispose() to be skipped and it pollutes other tests indicating false negatives.
             // Added this try-finally block to fix this.
             try
             {

                 Assert.IsNotNull(output);
                 var checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>();

                 ICharTermAttribute termAtt = null;
                 if (output.Length > 0)
                 {
                     Assert.IsTrue(ts.HasAttribute<ICharTermAttribute>(), "has no CharTermAttribute");
                     termAtt = ts.GetAttribute<ICharTermAttribute>();
                 }

                 IOffsetAttribute offsetAtt = null;
                 if (startOffsets != null || endOffsets != null || finalOffset != null)
                 {
                     Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute");
                     offsetAtt = ts.GetAttribute<IOffsetAttribute>();
                 }

                 ITypeAttribute typeAtt = null;
                 if (types != null)
                 {
                     Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute");
                     typeAtt = ts.GetAttribute<ITypeAttribute>();
                 }

                 IPositionIncrementAttribute posIncrAtt = null;
                 if (posIncrements != null || finalPosInc != null)
                 {
                     Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute");
                     posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>();
                 }

                 IPositionLengthAttribute posLengthAtt = null;
                 if (posLengths != null)
                 {
                     Assert.IsTrue(ts.HasAttribute<IPositionLengthAttribute>(), "has no PositionLengthAttribute");
                     posLengthAtt = ts.GetAttribute<IPositionLengthAttribute>();
                 }

                 IKeywordAttribute keywordAtt = null;
                 if (keywordAtts != null)
                 {
                     Assert.IsTrue(ts.HasAttribute<IKeywordAttribute>(), "has no KeywordAttribute");
                     keywordAtt = ts.GetAttribute<IKeywordAttribute>();
                 }

                 // *********** From Lucene 8.2.0 **************

                 IPayloadAttribute payloadAtt = null;
                 if (payloads != null)
                 {
                     Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>(), "has no PayloadAttribute");
                     payloadAtt = ts.GetAttribute<IPayloadAttribute>();
                 }

                 // *********** End From Lucene 8.2.0 **************

                 // Maps position to the start/end offset:
                 IDictionary<int?, int?> posToStartOffset = new Dictionary<int?, int?>();
                 IDictionary<int?, int?> posToEndOffset = new Dictionary<int?, int?>();

                 ts.Reset();
                 int pos = -1;
                 int lastStartOffset = 0;
                 for (int i = 0; i < output.Length; i++)
                 {
                     // extra safety to enforce, that the state is not preserved and also assign bogus values
                     ts.ClearAttributes();
                     termAtt.SetEmpty().Append("bogusTerm");
                     if (offsetAtt != null)
                     {
                         offsetAtt.SetOffset(14584724, 24683243);
                     }
                     if (typeAtt != null)
                     {
                         typeAtt.Type = "bogusType";
                     }
                     if (posIncrAtt != null)
                     {
                         posIncrAtt.PositionIncrement = 45987657;
                     }
                     if (posLengthAtt != null)
                     {
                         posLengthAtt.PositionLength = 45987653;
                     }
                     if (keywordAtt != null)
                     {
                         keywordAtt.IsKeyword = (i & 1) == 0;
                     }
                     // *********** From Lucene 8.2.0 **************
                     if (payloadAtt != null)
                     {
                         payloadAtt.Payload = new BytesRef(new byte[] { 0x00, unchecked((byte)-0x21), 0x12, unchecked((byte)-0x43), 0x24 });
                     }
                     // *********** End From Lucene 8.2.0 **************

                     bool reset = checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
                     Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                     Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain");

                     Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString());
                     if (startOffsets != null)
                     {
                         Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i);
                     }
                     if (endOffsets != null)
                     {
                         Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i);
                     }
                     if (types != null)
                     {
                         Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
                     }
                     if (posIncrements != null)
                     {
                         Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
                     }
                     if (posLengths != null)
                     {
                         Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i);
                     }
                     if (keywordAtts != null)
                     {
                         Assert.AreEqual(keywordAtts[i], keywordAtt.IsKeyword, "keywordAtt " + i);
                     }
                     // *********** From Lucene 8.2.0 **************
                     if (payloads != null)
                     {
                         if (payloads[i] != null)
                         {
                             Assert.AreEqual(new BytesRef(payloads[i]), payloadAtt.Payload, "payloads " + i);
                         }
                         else
                         {
                             Assert.IsNull(payloads[i], "payloads " + i);
                         }
                     }
                     // *********** End From Lucene 8.2.0 **************


                     // we can enforce some basic things about a few attributes even if the caller doesn't check:
                     if (offsetAtt != null)
                     {
                         int startOffset = offsetAtt.StartOffset;
                         int endOffset = offsetAtt.EndOffset;
                         if (finalOffset != null)
                         {
                             Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset");
                             Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset);
                         }

                         if (offsetsAreCorrect)
                         {
                             Assert.IsTrue(offsetAtt.StartOffset >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
                             lastStartOffset = offsetAtt.StartOffset;
                         }

                         if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null)
                         {
                             // Validate offset consistency in the graph, ie
                             // all tokens leaving from a certain pos have the
                             // same startOffset, and all tokens arriving to a
                             // certain pos have the same endOffset:
                             int posInc = posIncrAtt.PositionIncrement;
                             pos += posInc;

                             int posLength = posLengthAtt.PositionLength;

                             if (!posToStartOffset.TryGetValue(pos, out int? oldStartOffset))
                             {
                                 // First time we've seen a token leaving from this position:
                                 posToStartOffset[pos] = startOffset;
                                 //System.out.println("  + s " + pos + " -> " + startOffset);
                             }
                             else
                             {
                                 // We've seen a token leaving from this position
                                 // before; verify the startOffset is the same:
                                 //System.out.println("  + vs " + pos + " -> " + startOffset);
                                 Assert.AreEqual(oldStartOffset.GetValueOrDefault(), startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                             }

                             int endPos = pos + posLength;

                             if (!posToEndOffset.TryGetValue(endPos, out int? oldEndOffset))
                             {
                                 // First time we've seen a token arriving to this position:
                                 posToEndOffset[endPos] = endOffset;
                                 //System.out.println("  + e " + endPos + " -> " + endOffset);
                             }
                             else
                             {
                                 // We've seen a token arriving to this position
                                 // before; verify the endOffset is the same:
                                 //System.out.println("  + ve " + endPos + " -> " + endOffset);
                                 Assert.AreEqual(oldEndOffset.GetValueOrDefault(), endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                             }
                         }
                     }
                     if (posIncrAtt != null)
                     {
                         if (i == 0)
                         {
                             Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1");
                         }
                         else
                         {
                             Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0");
                         }
                     }
                     if (posLengthAtt != null)
                     {
                         Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1");
                     }
                 }

                 if (ts.IncrementToken())
                 {
                     Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt);
                 }

                 // repeat our extra safety checks for End()
                 ts.ClearAttributes();
                 if (termAtt != null)
                 {
                     termAtt.SetEmpty().Append("bogusTerm");
                 }
                 if (offsetAtt != null)
                 {
                     offsetAtt.SetOffset(14584724, 24683243);
                 }
                 if (typeAtt != null)
                 {
                     typeAtt.Type = "bogusType";
                 }
                 if (posIncrAtt != null)
                 {
                     posIncrAtt.PositionIncrement = 45987657;
                 }
                 if (posLengthAtt != null)
                 {
                     posLengthAtt.PositionLength = 45987653;
                 }

                 var reset_ = checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before

                 ts.End();
                 Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "base.End()/ClearAttributes() was not called correctly in End()");

                 if (finalOffset != null)
                 {
                     Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset, "finalOffset");
                 }
                 if (offsetAtt != null)
                 {
                     Assert.IsTrue(offsetAtt.EndOffset >= 0, "finalOffset must be >= 0");
                 }
                 if (finalPosInc != null)
                 {
                     Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc");
                 }

                 //ts.Dispose();
             }
             catch (Exception)
             {
                 //ts.Reset();
                 ts.ClearAttributes();
                 ts.End();
                 throw;
             }
             finally
             {
                 ts.Dispose();
             }
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, bool[] keywordAtts, bool offsetsAreCorrect)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect, null);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, bool offsetsAreCorrect)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, true);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int? finalOffset)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, finalOffset);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, null);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output)
         {
             AssertTokenStreamContents(ts, output, null, null, null, null, null, null);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, string[] types)
         {
             AssertTokenStreamContents(ts, output, null, null, types, null, null, null);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] posIncrements)
         {
             AssertTokenStreamContents(ts, output, null, null, null, posIncrements, null, null);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, null);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int? finalOffset)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, finalOffset);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, null);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, int? finalOffset)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, finalOffset);
         }

         public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, int[] posLengths, int? finalOffset)
         {
             AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, posLengths, finalOffset);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements)
         {
             CheckResetException(a, input);
             AssertTokenStreamContents(a.GetTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.Length);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths)
         {
             CheckResetException(a, input);
             AssertTokenStreamContents(a.GetTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, bool offsetsAreCorrect)
         {
             CheckResetException(a, input);
             AssertTokenStreamContents(a.GetTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length, offsetsAreCorrect);
         }

         // LUCENENET: Overload from Lucene 8.2.0
         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, bool graphOffsetsAreCorrect, byte[][] payloads)
         {
             CheckResetException(a, input);
             AssertTokenStreamContents(a.GetTokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length, null, null, graphOffsetsAreCorrect, payloads);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output)
         {
             AssertAnalyzesTo(a, input, output, null, null, null, null, null);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, string[] types)
         {
             AssertAnalyzesTo(a, input, output, null, null, types, null, null);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] posIncrements)
         {
             AssertAnalyzesTo(a, input, output, null, null, null, posIncrements, null);
         }

         public static void AssertAnalyzesToPositions(Analyzer a, string input, string[] output, int[] posIncrements, int[] posLengths)
         {
             AssertAnalyzesTo(a, input, output, null, null, null, posIncrements, posLengths);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets)
         {
             AssertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null, null);
         }

         public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements)
         {
             AssertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
         }

         internal static void CheckResetException(Analyzer a, string input)
         {
             TokenStream ts = a.GetTokenStream("bogus", new StringReader(input));
             try
             {
                 if (ts.IncrementToken())
                 {
                     ts.ReflectAsString(false);
                     Assert.Fail("didn't get expected exception when reset() not called");
                 }
             }
 #pragma warning disable 168
             catch (InvalidOperationException expected)
 #pragma warning restore 168
             {
                 //ok
             }
             catch (AssertionError expected) // LUCENENET: Actual AssertionError type is Lucene.Net.Diagnostics.AssertionException
             {
                 // ok: MockTokenizer
                 Assert.IsTrue(expected.Message != null && expected.Message.Contains("wrong state"), expected.Message);
             }
             catch (Exception unexpected)
             {
                 //unexpected.printStackTrace(System.err);
                 Console.Error.WriteLine(unexpected.StackTrace);
                 Assert.Fail("Got wrong exception when Reset() not called: " + unexpected);
             }
             finally
             {
                 // consume correctly
                 ts.Reset();
                 while (ts.IncrementToken())
                 {
                 }
                 ts.End();
                 ts.Dispose();
             }

             // check for a missing Close()
             ts = a.GetTokenStream("bogus", new StringReader(input));
             ts.Reset();
             while (ts.IncrementToken())
             {
             }
             ts.End();
             try
             {
                 ts = a.GetTokenStream("bogus", new StringReader(input));
                 Assert.Fail("Didn't get expected exception when Dispose() not called");
             }
             catch (Exception)
             {
                 // ok
             }
             finally
             {
                 ts.Dispose();
             }
         }

         /// <summary>
         /// Simple utility method for testing stemmers
         /// </summary>
         public static void CheckOneTerm(Analyzer a, string input, string expected)
         {
             AssertAnalyzesTo(a, input, new string[] { expected });
         }

 #if !FEATURE_INSTANCE_TESTDATA_INITIALIZATION
         /// <summary>
         /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy
         /// </summary>
         public static void CheckRandomData(Random random, Analyzer a, int iterations)
         {
             CheckRandomData(random, a, iterations, 20, false, true);
         }

         /// <summary>
         /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy
         /// </summary>
         public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength)
         {
             CheckRandomData(random, a, iterations, maxWordLength, false, true);
         }

         /// <summary>
         /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy
         /// </summary>
         /// <param name="simple"> true if only ascii strings will be used (try to avoid)</param>
         public static void CheckRandomData(Random random, Analyzer a, int iterations, bool simple)
         {
             CheckRandomData(random, a, iterations, 20, simple, true);
         }
 #else
         /// <summary>
         /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy
         /// <para/>
         /// LUCENENET specific
         /// Non-static to reduce the inter-class dependencies due to use of
         /// static variables
         /// </summary>
         public void CheckRandomData(Random random, Analyzer a, int iterations)
         {
             CheckRandomData(random, a, iterations, 20, false, true);
         }

         /// <summary>
         /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy
         /// <para/>
         /// LUCENENET specific:
         /// Non-static to reduce the inter-class dependencies due to use of
         /// static variables
         /// </summary>
         public void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength)
         {
             CheckRandomData(random, a, iterations, maxWordLength, false, true);
         }

         /// <summary>
         /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy
         /// <para/>
         /// LUCENENET specific:
         /// Non-static to reduce the inter-class dependencies due to use of
         /// static variables
         /// </summary>
         /// <param name="simple"> true if only ascii strings will be used (try to avoid)</param>
         public void CheckRandomData(Random random, Analyzer a, int iterations, bool simple)
         {
             CheckRandomData(random, a, iterations, 20, simple, true);
         }
 #endif

         internal class AnalysisThread : ThreadJob
         {
             internal readonly int iterations;
             internal readonly int maxWordLength;
             internal readonly long seed;
             internal readonly Analyzer a;
             internal readonly bool useCharFilter;
             internal readonly bool simple;
             internal readonly bool offsetsAreCorrect;
             internal readonly RandomIndexWriter iw;
             private readonly CountdownEvent latch;

             // NOTE: not volatile because we don't want the tests to
             // add memory barriers (ie alter how threads
             // interact)... so this is just "best effort":
             public bool Failed { get; set; }
             public Exception FirstException { get; set; } = null;

             internal AnalysisThread(long seed, CountdownEvent latch, Analyzer a, int iterations, int maxWordLength,
                 bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw)
             {
                 this.seed = seed;
                 this.a = a;
                 this.iterations = iterations;
                 this.maxWordLength = maxWordLength;
                 this.useCharFilter = useCharFilter;
                 this.simple = simple;
                 this.offsetsAreCorrect = offsetsAreCorrect;
                 this.iw = iw;
                 this.latch = latch;
             }

             public override void Run()
             {
                 bool success = false;
                 try
                 {
                     if (latch != null) latch.Wait();
                     // see the part in checkRandomData where it replays the same text again
                     // to verify reproducability/reuse: hopefully this would catch thread hazards.
                     CheckRandomData(new Random((int)seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw);
                     success = true;
                 }
                 catch (Exception e)
                 {
                     //Console.WriteLine("Exception in Thread: " + e);
                     //throw;
                     // LUCENENET: Throwing an exception on another thread
                     // is pointless, so we set it to a variable so we can read
                     // it from our main thread (for debugging).
                     if (FirstException == null)
                     {
                         FirstException = e;
                     }
                 }
                 finally
                 {
                     Failed = !success;
                 }
             }
         }

 #if !FEATURE_INSTANCE_TESTDATA_INITIALIZATION
         public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple)
         {
             CheckRandomData(random, a, iterations, maxWordLength, simple, true);
         }

         public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple, bool offsetsAreCorrect)
 #else
         public void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple)
         {
             CheckRandomData(random, a, iterations, maxWordLength, simple, true);
         }

         public void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple, bool offsetsAreCorrect)
 #endif
         {
             CheckResetException(a, "best effort");
             long seed = random.Next();
             bool useCharFilter = random.NextBoolean();
             Directory dir = null;
             RandomIndexWriter iw = null;
             string postingsFormat = TestUtil.GetPostingsFormat("dummy");
             bool codecOk = iterations * maxWordLength < 100000
                 || !(postingsFormat.Equals("Memory", StringComparison.Ordinal)
                 || postingsFormat.Equals("SimpleText", StringComparison.Ordinal));
             if (Rarely(random) && codecOk)
             {
                 dir = NewFSDirectory(CreateTempDir("bttc"));
                 iw = new RandomIndexWriter(
 #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                     this,
 #endif
                     new Random((int)seed), dir, a);
             }

             bool success = false;
             try
             {
                 CheckRandomData(new Random((int)seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw);
                 // now test with multiple threads: note we do the EXACT same thing we did before in each thread,
                 // so this should only really fail from another thread if its an actual thread problem
                 int numThreads = TestUtil.NextInt32(random, 2, 4);
                 var startingGun = new CountdownEvent(1);
                 var threads = new AnalysisThread[numThreads];
                 for (int i = 0; i < threads.Length; i++)
                 {
                     threads[i] = new AnalysisThread(seed, startingGun, a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw);
                 }

                 foreach (AnalysisThread thread in threads)
                 {
                     thread.Start();
                 }

                 startingGun.Signal();
                 foreach (var t in threads)
                 {
 #if !NETSTANDARD1_6
                     try
                     {
 #endif
                         t.Join();
 #if !NETSTANDARD1_6
                     }
 #pragma warning disable 168
                     catch (ThreadInterruptedException e)
 #pragma warning restore 168
                     {
                         fail("Thread interrupted");
                     }
 #endif
                 }

                 //if (threads.Any(x => x.Failed))
                 //    Fail("Thread threw exception");
                 foreach (var t in threads)
                 {
                     if (t.Failed)
                     {
                         fail("Thread threw exception: " + t.FirstException.ToString());
                     }
                 }

                 success = true;
             }
             finally
             {
                 if (success)
                 {
                     IOUtils.Dispose(iw, dir);
                 }
                 else
                 {
                     IOUtils.DisposeWhileHandlingException(iw, dir); // checkindex
                 }
             }
         }

         private static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw)
         {
             LineFileDocs docs = new LineFileDocs(random);
             Document doc = null;
             Field field = null, currentField = null;
             StringReader bogus = new StringReader("");
             if (iw != null)
             {
                 doc = new Document();
                 FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
                 if (random.NextBoolean())
                 {
                     ft.StoreTermVectors = true;
                     ft.StoreTermVectorOffsets = random.NextBoolean();
                     ft.StoreTermVectorPositions = random.NextBoolean();
                     if (ft.StoreTermVectorPositions && !OldFormatImpersonationIsActive)
                     {
                         ft.StoreTermVectorPayloads = random.NextBoolean();
                     }
                 }
                 if (random.NextBoolean())
                 {
                     ft.OmitNorms = true;
                 }
                 string pf = TestUtil.GetPostingsFormat("dummy");
                 bool supportsOffsets = !DoesntSupportOffsets.Contains(pf);
                 switch (random.Next(4))
                 {
                     case 0:
                         ft.IndexOptions = IndexOptions.DOCS_ONLY;
                         break;

                     case 1:
                         ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                         break;

                     case 2:
                         ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                         break;

                     default:
                         if (supportsOffsets && offsetsAreCorrect)
                         {
                             ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                         }
                         else
                         {
                             ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                         }
                         break;
                 }
                 currentField = field = new Field("dummy", bogus, ft);
                 doc.Add(currentField);
             }

             try
             {
                 for (int i = 0; i < iterations; i++)
                 {
                     string text;

                     if (random.Next(10) == 7)
                     {
                         // real data from linedocs
                         text = docs.NextDoc().Get("body");
                         if (text.Length > maxWordLength)
                         {
                             // Take a random slice from the text...:
                             int startPos = random.Next(text.Length - maxWordLength);
                             if (startPos > 0 && char.IsLowSurrogate(text[startPos]))
                             {
                                 // Take care not to split up a surrogate pair:
                                 startPos--;
                                 Assert.True(char.IsHighSurrogate(text[startPos]));
                             }
                             int endPos = startPos + maxWordLength - 1;
                             if (char.IsHighSurrogate(text[endPos]))
                             {
                                 // Take care not to split up a surrogate pair:
                                 endPos--;
                             }
                             text = text.Substring(startPos, 1 + endPos - startPos);
                         }
                     }
                     else
                     {
                         // synthetic
                         text = TestUtil.RandomAnalysisString(random, maxWordLength, simple);
                     }

                     try
                     {
                         CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField);
                         if (iw != null)
                         {
                             if (random.Next(7) == 0)
                             {
                                 // pile up a multivalued field
                                 var ft = field.FieldType;
                                 currentField = new Field("dummy", bogus, ft);
                                 doc.Add(currentField);
                             }
                             else
                             {
                                 iw.AddDocument(doc);
                                 if (doc.Fields.Count > 1)
                                 {
                                     // back to 1 field
                                     currentField = field;
                                     doc.RemoveFields("dummy");
                                     doc.Add(currentField);
                                 }
                             }
                         }
                     }
 #pragma warning disable 168
                     catch (Exception t)
 #pragma warning restore 168
                     {
                         // TODO: really we should pass a random seed to
                         // checkAnalysisConsistency then print it here too:
                         Console.Error.WriteLine("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + Escape(text) + "'");
                         throw;
                     }
                 }
             }
             finally
             {
                 IOUtils.DisposeWhileHandlingException(docs);
             }
         }

         public static string Escape(string s)
         {
             int charUpto = 0;
             StringBuilder sb = new StringBuilder();
             while (charUpto < s.Length)
             {
                 int c = s[charUpto];
                 if (c == 0xa)
                 {
                     // Strangely, you cannot put \ u000A into Java
                     // sources (not in a comment nor a string
                     // constant)...:
                     sb.Append("\\n");
                 }
                 else if (c == 0xd)
                 {
                     // ... nor \ u000D:
                     sb.Append("\\r");
                 }
                 else if (c == '"')
                 {
                     sb.Append("\\\"");
                 }
                 else if (c == '\\')
                 {
                     sb.Append("\\\\");
                 }
                 else if (c >= 0x20 && c < 0x80)
                 {
                     sb.Append((char)c);
                 }
                 else
                 {
                     // TODO: we can make ascii easier to read if we
                     // don't escape...
                     sb.AppendFormat(CultureInfo.InvariantCulture, "\\u{0:x4}", c);
                 }
                 charUpto++;
             }
             return sb.ToString();
         }

         public static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text)
         {
             CheckAnalysisConsistency(random, a, useCharFilter, text, true);
         }

         public static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text, bool offsetsAreCorrect)
         {
             CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, null);
         }

         private static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text, bool offsetsAreCorrect, Field field)
         {
             if (VERBOSE)
             {
                 Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: get first token stream now text=" + text);
             }

             ICharTermAttribute termAtt;
             IOffsetAttribute offsetAtt;
             IPositionIncrementAttribute posIncAtt;
             IPositionLengthAttribute posLengthAtt;
             ITypeAttribute typeAtt;

             IList<string> tokens = new List<string>();
             IList<string> types = new List<string>();
             IList<int> positions = new List<int>();
             IList<int> positionLengths = new List<int>();
             IList<int> startOffsets = new List<int>();
             IList<int> endOffsets = new List<int>();

             int remainder = random.Next(10);
             TextReader reader = new StringReader(text);

             TokenStream ts;
             using (ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader))
             {
                 bool isReset = false;
                 try
                 {
                     termAtt = ts.HasAttribute<ICharTermAttribute>() ? ts.GetAttribute<ICharTermAttribute>() : null;
                     offsetAtt = ts.HasAttribute<IOffsetAttribute>() ? ts.GetAttribute<IOffsetAttribute>() : null;
                     posIncAtt = ts.HasAttribute<IPositionIncrementAttribute>() ? ts.GetAttribute<IPositionIncrementAttribute>() : null;
                     posLengthAtt = ts.HasAttribute<IPositionLengthAttribute>() ? ts.GetAttribute<IPositionLengthAttribute>() : null;
                     typeAtt = ts.HasAttribute<ITypeAttribute>() ? ts.GetAttribute<ITypeAttribute>() : null;

                     ts.Reset();
                     isReset = true;

                     // First pass: save away "correct" tokens
                     while (ts.IncrementToken())
                     {
                         Assert.IsNotNull(termAtt, "has no CharTermAttribute");
                         tokens.Add(termAtt.ToString());
                         if (typeAtt != null)
                         {
                             types.Add(typeAtt.Type);
                         }
                         if (posIncAtt != null)
                         {
                             positions.Add(posIncAtt.PositionIncrement);
                         }
                         if (posLengthAtt != null)
                         {
                             positionLengths.Add(posLengthAtt.PositionLength);
                         }
                         if (offsetAtt != null)
                         {
                             startOffsets.Add(offsetAtt.StartOffset);
                             endOffsets.Add(offsetAtt.EndOffset);
                         }
                     }
                     // LUCENENET: We are doing this in the finally block to ensure it happens
                     // when there are exeptions thrown (such as when the assert fails).
                     //ts.End();
                     //ts.Dispose();
                 }
                 finally
                 {
                     if (!isReset)
                     {
                         try
                         {
                             // consume correctly
                             ts.Reset();
                             while (ts.IncrementToken());
                             //ts.End();
                             //ts.Dispose();
                         }
 #pragma warning disable 168
                         catch (Exception ex)
 #pragma warning restore 168
                         {
                             // ignore
                         }
                     }
                     ts.End(); // ts.end();
                 }
             } // ts.close();

             // verify reusing is "reproducable" and also get the normal tokenstream sanity checks
             if (tokens.Count > 0)
             {
                 // KWTokenizer (for example) can produce a token
                 // even when input is length 0:
                 if (text.Length != 0)
                 {
                     // (Optional) second pass: do something evil:
                     int evilness = random.Next(50);
                     if (evilness == 17)
                     {
                         if (VERBOSE)
                         {
                             Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis w/ exception");
                         }
                         // Throw an errant exception from the Reader:

                         MockReaderWrapper evilReader = new MockReaderWrapper(random, new StringReader(text));
                         evilReader.ThrowExcAfterChar(random.Next(text.Length)); // LUCENENET note, Next() is exclusive, so we don't need +1
                         reader = evilReader;

                         try
                         {
                             // NOTE: some Tokenizers go and read characters
                             // when you call .SetReader(TextReader), eg
                             // PatternTokenizer.  this is a bit
                             // iffy... (really, they should only
                             // pull from the TextReader when you call
                             // .IncremenToken(), I think?), but we
                             // currently allow it, so, we must call
                             // a.TokenStream inside the try since we may
                             // hit the exc on init:
                             ts = a.GetTokenStream("dummy", useCharFilter ? (TextReader)new MockCharFilter(evilReader, remainder) : evilReader);
                             ts.Reset();
                             while (ts.IncrementToken()) ;
                             Assert.Fail("did not hit exception");
                         }
                         catch (Exception re)
                         {
                             Assert.IsTrue(MockReaderWrapper.IsMyEvilException(re));
                         }

                         try
                         {
                             ts.End();
                         }
                         // LUCENENET: Actual AssertionError type is Lucene.Net.Diagnostics.AssertionException
                         catch (AssertionError ae) when (ae.Message.Contains("End() called before IncrementToken() returned false!"))
                         {
                             // Catch & ignore MockTokenizer's
                             // anger...
                             // OK
                         }
                         finally
                         {
                             ts.Dispose();
                         }
                     }
                     else if (evilness == 7)
                     {
                         // Only consume a subset of the tokens:
                         int numTokensToRead = random.Next(tokens.Count);
                         if (VERBOSE)
                         {
                             Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis, only consuming " + numTokensToRead + " of " + tokens.Count + " tokens");
                         }

                         reader = new StringReader(text);
                         ts = a.GetTokenStream("dummy", useCharFilter ? (TextReader)new MockCharFilter(reader, remainder) : reader);
                         ts.Reset();
                         for (int tokenCount = 0; tokenCount < numTokensToRead; tokenCount++)
                         {
                             Assert.IsTrue(ts.IncrementToken());
                         }

                         try
                         {
                             ts.End();
                         }
                         // LUCENENET: Actual AssertionError type is Lucene.Net.Diagnostics.AssertionException
                         catch (AssertionError ae) when (ae.Message.Contains("End() called before IncrementToken() returned false!"))
                         {
                             // Catch & ignore MockTokenizer's
                             // anger...
                             // OK
                         }
                         finally
                         {
                             ts.Dispose();
                         }
                     }
                 }
             }

             // Final pass: verify clean tokenization matches
             // results from first pass:

             if (VERBOSE)
             {
                 Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis; " + tokens.Count + " tokens");
             }
             reader = new StringReader(text);

             long seed = random.Next();
             random = new Random((int)seed);
             if (random.Next(30) == 7)
             {
                 if (VERBOSE)
                 {
                     Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: using spoon-feed reader");
                 }

                 reader = new MockReaderWrapper(random, reader);
             }

             ts = a.GetTokenStream("dummy", useCharFilter ? (TextReader)new MockCharFilter(reader, remainder) : reader);
             if (typeAtt != null && posIncAtt != null && posLengthAtt != null && offsetAtt != null)
             {
                 // offset + pos + posLength + type
                 AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), types.ToArray(), ToIntArray(positions), ToIntArray(positionLengths), text.Length, offsetsAreCorrect);
             }
             else if (typeAtt != null && posIncAtt != null && offsetAtt != null)
             {
                 // offset + pos + type
                 AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), types.ToArray(), ToIntArray(positions), null, text.Length, offsetsAreCorrect);
             }
             else if (posIncAtt != null && posLengthAtt != null && offsetAtt != null)
             {
                 // offset + pos + posLength
                 AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), null, ToIntArray(positions), ToIntArray(positionLengths), text.Length, offsetsAreCorrect);
             }
             else if (posIncAtt != null && offsetAtt != null)
             {
                 // offset + pos
                 AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), null, ToIntArray(positions), null, text.Length, offsetsAreCorrect);
             }
             else if (offsetAtt != null)
             {
                 // offset
                 AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), null, null, null, text.Length, offsetsAreCorrect);
             }
             else
             {
                 // terms only
                 AssertTokenStreamContents(ts, tokens.ToArray());
             }

             if (field != null)
             {
                 reader = new StringReader(text);
                 random = new Random((int)seed);
                 if (random.Next(30) == 7)
                 {
                     if (VERBOSE)
                     {
                         Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: indexing using spoon-feed reader");
                     }

                     reader = new MockReaderWrapper(random, reader);
                 }

                 field.SetReaderValue(useCharFilter ? (TextReader)new MockCharFilter(reader, remainder) : reader);
             }
         }

         protected internal virtual string ToDot(Analyzer a, string inputText)
         {
             StringWriter sw = new StringWriter();
             TokenStream ts = a.GetTokenStream("field", new StringReader(inputText));
             ts.Reset();
             (new TokenStreamToDot(inputText, ts, /*new StreamWriter(*/(TextWriter)sw/*)*/)).ToDot();
             return sw.ToString();
         }

         protected internal virtual void ToDotFile(Analyzer a, string inputText, string localFileName)
         {
             using (StreamWriter w = new StreamWriter(new FileStream(localFileName, FileMode.Open), Encoding.UTF8))
             {
                 TokenStream ts = a.GetTokenStream("field", new StringReader(inputText));
                 ts.Reset();
                 (new TokenStreamToDot(inputText, ts,/* new PrintWriter(*/w/*)*/)).ToDot();
             }
         }

         [ExceptionToNetNumericConvention] // LUCENENET: Private API, keeping as-is
         internal static int[] ToIntArray(IList<int> list)
         {
             int[] ret = new int[list.Count];
             int offset = 0;
             foreach (int i in list)
             {
                 ret[offset++] = i;
             }
             return ret;
         }

         // *********** From Lucene 8.2.0 **************

         /// <summary>Returns a random <see cref="AttributeFactory"/> impl</summary>
         public static AttributeFactory NewAttributeFactory(Random random)
         {
             switch (random.nextInt(2))
             {
                 case 0:
                     return Token.TOKEN_ATTRIBUTE_FACTORY;
                 case 1:
                     return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
                 default:
                     throw new AssertionError("Please fix the Random.nextInt() call above");
             }

             //switch (random.nextInt(3))
             //{
             //    case 0:
             //        return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY;
             //    case 1:
             //        return Token.TOKEN_ATTRIBUTE_FACTORY;
             //    case 2:
             //        return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
             //    default:
             //        throw new AssertionError("Please fix the Random.nextInt() call above");
             //}
         }

         /// <summary>Returns a random <see cref="AttributeFactory"/> impl</summary>
         public static AttributeFactory NewAttributeFactory()
         {
             return NewAttributeFactory(Random);
         }

         // *********** End From Lucene 8.2.0 **************
     }
 }