| using J2N.Threading; |
| using Lucene.Net.Analysis.TokenAttributes; |
| using Lucene.Net.Documents; |
| using Lucene.Net.Index; |
| using Lucene.Net.Support; |
| using Lucene.Net.TestFramework; |
| using Lucene.Net.Util; |
| using System; |
| using System.Collections.Generic; |
| using System.Globalization; |
| using System.IO; |
| using System.Linq; |
| using System.Text; |
| using System.Threading; |
| using AttributeFactory = Lucene.Net.Util.AttributeSource.AttributeFactory; |
| using Assert = Lucene.Net.TestFramework.Assert; |
| using AssertionError = Lucene.Net.Diagnostics.AssertionException; |
| using Attribute = Lucene.Net.Util.Attribute; |
| using Directory = Lucene.Net.Store.Directory; |
| using Console = Lucene.Net.Support.SystemConsole; |
| |
| namespace Lucene.Net.Analysis |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// Attribute that records if it was cleared or not. this is used |
| /// for testing that <see cref="Lucene.Net.Util.AttributeSource.ClearAttributes()"/> was called correctly. |
| /// </summary> |
| public interface ICheckClearAttributesAttribute : IAttribute |
| { |
| bool GetAndResetClearCalled(); |
| } |
| |
| /// <summary> |
| /// Attribute that records if it was cleared or not. this is used |
| /// for testing that <see cref="Lucene.Net.Util.AttributeSource.ClearAttributes()"/> was called correctly. |
| /// </summary> |
| public sealed class CheckClearAttributesAttribute : Attribute, ICheckClearAttributesAttribute |
| { |
| private bool clearCalled = false; |
| |
| public bool GetAndResetClearCalled() |
| { |
| bool old = clearCalled; |
| clearCalled = false; |
| return old; |
| } |
| |
| public override void Clear() |
| { |
| clearCalled = true; |
| } |
| |
| public override bool Equals(object other) |
| { |
| return (other is CheckClearAttributesAttribute && ((CheckClearAttributesAttribute)other).clearCalled == this.clearCalled); |
| } |
| |
| public override int GetHashCode() |
| { |
| return 76137213 ^ clearCalled.GetHashCode(); |
| } |
| |
| public override void CopyTo(IAttribute target) |
| { |
| ((CheckClearAttributesAttribute)target).Clear(); |
| } |
| } |
| |
| /// <summary> |
| /// Base class for all Lucene unit tests that use <see cref="TokenStream"/>s. |
| /// <para/> |
| /// When writing unit tests for analysis components, its highly recommended |
| /// to use the helper methods here (especially in conjunction with <see cref="MockAnalyzer"/> or |
| /// <see cref="MockTokenizer"/>), as they contain many assertions and checks to |
| /// catch bugs. |
| /// </summary> |
| /// <seealso cref="MockAnalyzer"/> |
| /// <seealso cref="MockTokenizer"/> |
| public abstract class BaseTokenStreamTestCase : LuceneTestCase |
| #if TESTFRAMEWORK_XUNIT |
| , Xunit.IClassFixture<BeforeAfterClass> |
| { |
| public BaseTokenStreamTestCase(BeforeAfterClass beforeAfter) |
| : base(beforeAfter) |
| { |
| } |
| #else |
| { |
| #endif |
| //#if TESTFRAMEWORK_MSTEST |
| // [Microsoft.VisualStudio.TestTools.UnitTesting.ClassInitializeAttribute(Microsoft.VisualStudio.TestTools.UnitTesting.InheritanceBehavior.BeforeEachDerivedClass)] |
| // new public static void BeforeClass(Microsoft.VisualStudio.TestTools.UnitTesting.TestContext context) |
| // { |
| // Lucene.Net.Util.LuceneTestCase.BeforeClass(context); |
| // } |
| |
| // [Microsoft.VisualStudio.TestTools.UnitTesting.ClassCleanupAttribute(Microsoft.VisualStudio.TestTools.UnitTesting.InheritanceBehavior.BeforeEachDerivedClass)] |
| // new public static void AfterClass() |
| // { |
| // Lucene.Net.Util.LuceneTestCase.AfterClass(); |
| // } |
| //#endif |
| |
| // some helpers to test Analyzers and TokenStreams: |
| |
| // LUCENENET specific - de-nested ICheckClearAttributesAttribute |
| |
| // LUCENENET specific - de-nested CheckClearAttributesAttribute |
| |
| // offsetsAreCorrect also validates: |
| // - graph offsets are correct (all tokens leaving from |
| // pos X have the same startOffset; all tokens |
| // arriving to pos Y have the same endOffset) |
| // - offsets only move forwards (startOffset >= |
| // lastStartOffset) |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect, byte[][] payloads) |
| { |
| // LUCENENET: Bug fix: NUnit throws an exception when something fails. |
| // This causes Dispose() to be skipped and it pollutes other tests indicating false negatives. |
| // Added this try-finally block to fix this. |
| try |
| { |
| |
| Assert.IsNotNull(output); |
| var checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>(); |
| |
| ICharTermAttribute termAtt = null; |
| if (output.Length > 0) |
| { |
| Assert.IsTrue(ts.HasAttribute<ICharTermAttribute>(), "has no CharTermAttribute"); |
| termAtt = ts.GetAttribute<ICharTermAttribute>(); |
| } |
| |
| IOffsetAttribute offsetAtt = null; |
| if (startOffsets != null || endOffsets != null || finalOffset != null) |
| { |
| Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no OffsetAttribute"); |
| offsetAtt = ts.GetAttribute<IOffsetAttribute>(); |
| } |
| |
| ITypeAttribute typeAtt = null; |
| if (types != null) |
| { |
| Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no TypeAttribute"); |
| typeAtt = ts.GetAttribute<ITypeAttribute>(); |
| } |
| |
| IPositionIncrementAttribute posIncrAtt = null; |
| if (posIncrements != null || finalPosInc != null) |
| { |
| Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no PositionIncrementAttribute"); |
| posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>(); |
| } |
| |
| IPositionLengthAttribute posLengthAtt = null; |
| if (posLengths != null) |
| { |
| Assert.IsTrue(ts.HasAttribute<IPositionLengthAttribute>(), "has no PositionLengthAttribute"); |
| posLengthAtt = ts.GetAttribute<IPositionLengthAttribute>(); |
| } |
| |
| IKeywordAttribute keywordAtt = null; |
| if (keywordAtts != null) |
| { |
| Assert.IsTrue(ts.HasAttribute<IKeywordAttribute>(), "has no KeywordAttribute"); |
| keywordAtt = ts.GetAttribute<IKeywordAttribute>(); |
| } |
| |
| // *********** From Lucene 8.2.0 ************** |
| |
| IPayloadAttribute payloadAtt = null; |
| if (payloads != null) |
| { |
| Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>(), "has no PayloadAttribute"); |
| payloadAtt = ts.GetAttribute<IPayloadAttribute>(); |
| } |
| |
| // *********** End From Lucene 8.2.0 ************** |
| |
| // Maps position to the start/end offset: |
| IDictionary<int?, int?> posToStartOffset = new Dictionary<int?, int?>(); |
| IDictionary<int?, int?> posToEndOffset = new Dictionary<int?, int?>(); |
| |
| ts.Reset(); |
| int pos = -1; |
| int lastStartOffset = 0; |
| for (int i = 0; i < output.Length; i++) |
| { |
| // extra safety to enforce, that the state is not preserved and also assign bogus values |
| ts.ClearAttributes(); |
| termAtt.SetEmpty().Append("bogusTerm"); |
| if (offsetAtt != null) |
| { |
| offsetAtt.SetOffset(14584724, 24683243); |
| } |
| if (typeAtt != null) |
| { |
| typeAtt.Type = "bogusType"; |
| } |
| if (posIncrAtt != null) |
| { |
| posIncrAtt.PositionIncrement = 45987657; |
| } |
| if (posLengthAtt != null) |
| { |
| posLengthAtt.PositionLength = 45987653; |
| } |
| if (keywordAtt != null) |
| { |
| keywordAtt.IsKeyword = (i & 1) == 0; |
| } |
| // *********** From Lucene 8.2.0 ************** |
| if (payloadAtt != null) |
| { |
| payloadAtt.Payload = new BytesRef(new byte[] { 0x00, unchecked((byte)-0x21), 0x12, unchecked((byte)-0x43), 0x24 }); |
| } |
| // *********** End From Lucene 8.2.0 ************** |
| |
| bool reset = checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before |
| Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); |
| Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain"); |
| |
| Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString()); |
| if (startOffsets != null) |
| { |
| Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i); |
| } |
| if (endOffsets != null) |
| { |
| Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i); |
| } |
| if (types != null) |
| { |
| Assert.AreEqual(types[i], typeAtt.Type, "type " + i); |
| } |
| if (posIncrements != null) |
| { |
| Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i); |
| } |
| if (posLengths != null) |
| { |
| Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i); |
| } |
| if (keywordAtts != null) |
| { |
| Assert.AreEqual(keywordAtts[i], keywordAtt.IsKeyword, "keywordAtt " + i); |
| } |
| // *********** From Lucene 8.2.0 ************** |
| if (payloads != null) |
| { |
| if (payloads[i] != null) |
| { |
| Assert.AreEqual(new BytesRef(payloads[i]), payloadAtt.Payload, "payloads " + i); |
| } |
| else |
| { |
| Assert.IsNull(payloads[i], "payloads " + i); |
| } |
| } |
| // *********** End From Lucene 8.2.0 ************** |
| |
| |
| // we can enforce some basic things about a few attributes even if the caller doesn't check: |
| if (offsetAtt != null) |
| { |
| int startOffset = offsetAtt.StartOffset; |
| int endOffset = offsetAtt.EndOffset; |
| if (finalOffset != null) |
| { |
| Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset"); |
| Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset); |
| } |
| |
| if (offsetsAreCorrect) |
| { |
| Assert.IsTrue(offsetAtt.StartOffset >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset); |
| lastStartOffset = offsetAtt.StartOffset; |
| } |
| |
| if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null) |
| { |
| // Validate offset consistency in the graph, ie |
| // all tokens leaving from a certain pos have the |
| // same startOffset, and all tokens arriving to a |
| // certain pos have the same endOffset: |
| int posInc = posIncrAtt.PositionIncrement; |
| pos += posInc; |
| |
| int posLength = posLengthAtt.PositionLength; |
| |
| if (!posToStartOffset.TryGetValue(pos, out int? oldStartOffset)) |
| { |
| // First time we've seen a token leaving from this position: |
| posToStartOffset[pos] = startOffset; |
| //System.out.println(" + s " + pos + " -> " + startOffset); |
| } |
| else |
| { |
| // We've seen a token leaving from this position |
| // before; verify the startOffset is the same: |
| //System.out.println(" + vs " + pos + " -> " + startOffset); |
| Assert.AreEqual(oldStartOffset.GetValueOrDefault(), startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); |
| } |
| |
| int endPos = pos + posLength; |
| |
| if (!posToEndOffset.TryGetValue(endPos, out int? oldEndOffset)) |
| { |
| // First time we've seen a token arriving to this position: |
| posToEndOffset[endPos] = endOffset; |
| //System.out.println(" + e " + endPos + " -> " + endOffset); |
| } |
| else |
| { |
| // We've seen a token arriving to this position |
| // before; verify the endOffset is the same: |
| //System.out.println(" + ve " + endPos + " -> " + endOffset); |
| Assert.AreEqual(oldEndOffset.GetValueOrDefault(), endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); |
| } |
| } |
| } |
| if (posIncrAtt != null) |
| { |
| if (i == 0) |
| { |
| Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1"); |
| } |
| else |
| { |
| Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0"); |
| } |
| } |
| if (posLengthAtt != null) |
| { |
| Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1"); |
| } |
| } |
| |
| if (ts.IncrementToken()) |
| { |
| Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt); |
| } |
| |
| // repeat our extra safety checks for End() |
| ts.ClearAttributes(); |
| if (termAtt != null) |
| { |
| termAtt.SetEmpty().Append("bogusTerm"); |
| } |
| if (offsetAtt != null) |
| { |
| offsetAtt.SetOffset(14584724, 24683243); |
| } |
| if (typeAtt != null) |
| { |
| typeAtt.Type = "bogusType"; |
| } |
| if (posIncrAtt != null) |
| { |
| posIncrAtt.PositionIncrement = 45987657; |
| } |
| if (posLengthAtt != null) |
| { |
| posLengthAtt.PositionLength = 45987653; |
| } |
| |
| var reset_ = checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before |
| |
| ts.End(); |
| Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "base.End()/ClearAttributes() was not called correctly in End()"); |
| |
| if (finalOffset != null) |
| { |
| Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset, "finalOffset"); |
| } |
| if (offsetAtt != null) |
| { |
| Assert.IsTrue(offsetAtt.EndOffset >= 0, "finalOffset must be >= 0"); |
| } |
| if (finalPosInc != null) |
| { |
| Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc"); |
| } |
| |
| //ts.Dispose(); |
| } |
| catch (Exception) |
| { |
| //ts.Reset(); |
| ts.ClearAttributes(); |
| ts.End(); |
| throw; |
| } |
| finally |
| { |
| ts.Dispose(); |
| } |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, bool[] keywordAtts, bool offsetsAreCorrect) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect, null); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, bool offsetsAreCorrect) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, true); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int? finalOffset) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, finalOffset); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, null); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output) |
| { |
| AssertTokenStreamContents(ts, output, null, null, null, null, null, null); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, string[] types) |
| { |
| AssertTokenStreamContents(ts, output, null, null, types, null, null, null); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] posIncrements) |
| { |
| AssertTokenStreamContents(ts, output, null, null, null, posIncrements, null, null); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, null); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int? finalOffset) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, finalOffset); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, null); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, int? finalOffset) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, finalOffset); |
| } |
| |
| public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, int[] posLengths, int? finalOffset) |
| { |
| AssertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, posLengths, finalOffset); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements) |
| { |
| CheckResetException(a, input); |
| AssertTokenStreamContents(a.GetTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.Length); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths) |
| { |
| CheckResetException(a, input); |
| AssertTokenStreamContents(a.GetTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, bool offsetsAreCorrect) |
| { |
| CheckResetException(a, input); |
| AssertTokenStreamContents(a.GetTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length, offsetsAreCorrect); |
| } |
| |
| // LUCENENET: Overload from Lucene 8.2.0 |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, bool graphOffsetsAreCorrect, byte[][] payloads) |
| { |
| CheckResetException(a, input); |
| AssertTokenStreamContents(a.GetTokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.Length, null, null, graphOffsetsAreCorrect, payloads); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output) |
| { |
| AssertAnalyzesTo(a, input, output, null, null, null, null, null); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, string[] types) |
| { |
| AssertAnalyzesTo(a, input, output, null, null, types, null, null); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] posIncrements) |
| { |
| AssertAnalyzesTo(a, input, output, null, null, null, posIncrements, null); |
| } |
| |
| public static void AssertAnalyzesToPositions(Analyzer a, string input, string[] output, int[] posIncrements, int[] posLengths) |
| { |
| AssertAnalyzesTo(a, input, output, null, null, null, posIncrements, posLengths); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets) |
| { |
| AssertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null, null); |
| } |
| |
| public static void AssertAnalyzesTo(Analyzer a, string input, string[] output, int[] startOffsets, int[] endOffsets, int[] posIncrements) |
| { |
| AssertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null); |
| } |
| |
| internal static void CheckResetException(Analyzer a, string input) |
| { |
| TokenStream ts = a.GetTokenStream("bogus", new StringReader(input)); |
| try |
| { |
| if (ts.IncrementToken()) |
| { |
| ts.ReflectAsString(false); |
| Assert.Fail("didn't get expected exception when reset() not called"); |
| } |
| } |
| #pragma warning disable 168 |
| catch (InvalidOperationException expected) |
| #pragma warning restore 168 |
| { |
| //ok |
| } |
| catch (AssertionError expected) // LUCENENET: Actual AssertionError type is Lucene.Net.Diagnostics.AssertionException |
| { |
| // ok: MockTokenizer |
| Assert.IsTrue(expected.Message != null && expected.Message.Contains("wrong state"), expected.Message); |
| } |
| catch (Exception unexpected) |
| { |
| //unexpected.printStackTrace(System.err); |
| Console.Error.WriteLine(unexpected.StackTrace); |
| Assert.Fail("Got wrong exception when Reset() not called: " + unexpected); |
| } |
| finally |
| { |
| // consume correctly |
| ts.Reset(); |
| while (ts.IncrementToken()) |
| { |
| } |
| ts.End(); |
| ts.Dispose(); |
| } |
| |
| // check for a missing Close() |
| ts = a.GetTokenStream("bogus", new StringReader(input)); |
| ts.Reset(); |
| while (ts.IncrementToken()) |
| { |
| } |
| ts.End(); |
| try |
| { |
| ts = a.GetTokenStream("bogus", new StringReader(input)); |
| Assert.Fail("Didn't get expected exception when Dispose() not called"); |
| } |
| catch (Exception) |
| { |
| // ok |
| } |
| finally |
| { |
| ts.Dispose(); |
| } |
| } |
| |
| /// <summary> |
| /// Simple utility method for testing stemmers |
| /// </summary> |
| public static void CheckOneTerm(Analyzer a, string input, string expected) |
| { |
| AssertAnalyzesTo(a, input, new string[] { expected }); |
| } |
| |
| #if !FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| /// <summary> |
| /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy |
| /// </summary> |
| public static void CheckRandomData(Random random, Analyzer a, int iterations) |
| { |
| CheckRandomData(random, a, iterations, 20, false, true); |
| } |
| |
| /// <summary> |
| /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy |
| /// </summary> |
| public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength) |
| { |
| CheckRandomData(random, a, iterations, maxWordLength, false, true); |
| } |
| |
| /// <summary> |
| /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy |
| /// </summary> |
| /// <param name="simple"> true if only ascii strings will be used (try to avoid)</param> |
| public static void CheckRandomData(Random random, Analyzer a, int iterations, bool simple) |
| { |
| CheckRandomData(random, a, iterations, 20, simple, true); |
| } |
| #else |
| /// <summary> |
| /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy |
| /// <para/> |
| /// LUCENENET specific |
| /// Non-static to reduce the inter-class dependencies due to use of |
| /// static variables |
| /// </summary> |
| public void CheckRandomData(Random random, Analyzer a, int iterations) |
| { |
| CheckRandomData(random, a, iterations, 20, false, true); |
| } |
| |
| /// <summary> |
| /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy |
| /// <para/> |
| /// LUCENENET specific: |
| /// Non-static to reduce the inter-class dependencies due to use of |
| /// static variables |
| /// </summary> |
| public void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength) |
| { |
| CheckRandomData(random, a, iterations, maxWordLength, false, true); |
| } |
| |
| /// <summary> |
| /// Utility method for blasting tokenstreams with data to make sure they don't do anything crazy |
| /// <para/> |
| /// LUCENENET specific: |
| /// Non-static to reduce the inter-class dependencies due to use of |
| /// static variables |
| /// </summary> |
| /// <param name="simple"> true if only ascii strings will be used (try to avoid)</param> |
| public void CheckRandomData(Random random, Analyzer a, int iterations, bool simple) |
| { |
| CheckRandomData(random, a, iterations, 20, simple, true); |
| } |
| #endif |
| |
| internal class AnalysisThread : ThreadJob |
| { |
| internal readonly int iterations; |
| internal readonly int maxWordLength; |
| internal readonly long seed; |
| internal readonly Analyzer a; |
| internal readonly bool useCharFilter; |
| internal readonly bool simple; |
| internal readonly bool offsetsAreCorrect; |
| internal readonly RandomIndexWriter iw; |
| private readonly CountdownEvent latch; |
| |
| // NOTE: not volatile because we don't want the tests to |
| // add memory barriers (ie alter how threads |
| // interact)... so this is just "best effort": |
| public bool Failed { get; set; } |
| public Exception FirstException { get; set; } = null; |
| |
| internal AnalysisThread(long seed, CountdownEvent latch, Analyzer a, int iterations, int maxWordLength, |
| bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw) |
| { |
| this.seed = seed; |
| this.a = a; |
| this.iterations = iterations; |
| this.maxWordLength = maxWordLength; |
| this.useCharFilter = useCharFilter; |
| this.simple = simple; |
| this.offsetsAreCorrect = offsetsAreCorrect; |
| this.iw = iw; |
| this.latch = latch; |
| } |
| |
| public override void Run() |
| { |
| bool success = false; |
| try |
| { |
| if (latch != null) latch.Wait(); |
| // see the part in checkRandomData where it replays the same text again |
| // to verify reproducability/reuse: hopefully this would catch thread hazards. |
| CheckRandomData(new Random((int)seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); |
| success = true; |
| } |
| catch (Exception e) |
| { |
| //Console.WriteLine("Exception in Thread: " + e); |
| //throw; |
| // LUCENENET: Throwing an exception on another thread |
| // is pointless, so we set it to a variable so we can read |
| // it from our main thread (for debugging). |
| if (FirstException == null) |
| { |
| FirstException = e; |
| } |
| } |
| finally |
| { |
| Failed = !success; |
| } |
| } |
| } |
| |
| #if !FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple) |
| { |
| CheckRandomData(random, a, iterations, maxWordLength, simple, true); |
| } |
| |
| public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple, bool offsetsAreCorrect) |
| #else |
| public void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple) |
| { |
| CheckRandomData(random, a, iterations, maxWordLength, simple, true); |
| } |
| |
| public void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple, bool offsetsAreCorrect) |
| #endif |
| { |
| CheckResetException(a, "best effort"); |
| long seed = random.Next(); |
| bool useCharFilter = random.NextBoolean(); |
| Directory dir = null; |
| RandomIndexWriter iw = null; |
| string postingsFormat = TestUtil.GetPostingsFormat("dummy"); |
| bool codecOk = iterations * maxWordLength < 100000 |
| || !(postingsFormat.Equals("Memory", StringComparison.Ordinal) |
| || postingsFormat.Equals("SimpleText", StringComparison.Ordinal)); |
| if (Rarely(random) && codecOk) |
| { |
| dir = NewFSDirectory(CreateTempDir("bttc")); |
| iw = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| new Random((int)seed), dir, a); |
| } |
| |
| bool success = false; |
| try |
| { |
| CheckRandomData(new Random((int)seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); |
| // now test with multiple threads: note we do the EXACT same thing we did before in each thread, |
| // so this should only really fail from another thread if its an actual thread problem |
| int numThreads = TestUtil.NextInt32(random, 2, 4); |
| var startingGun = new CountdownEvent(1); |
| var threads = new AnalysisThread[numThreads]; |
| for (int i = 0; i < threads.Length; i++) |
| { |
| threads[i] = new AnalysisThread(seed, startingGun, a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw); |
| } |
| |
| foreach (AnalysisThread thread in threads) |
| { |
| thread.Start(); |
| } |
| |
| startingGun.Signal(); |
| foreach (var t in threads) |
| { |
| #if !NETSTANDARD1_6 |
| try |
| { |
| #endif |
| t.Join(); |
| #if !NETSTANDARD1_6 |
| } |
| #pragma warning disable 168 |
| catch (ThreadInterruptedException e) |
| #pragma warning restore 168 |
| { |
| fail("Thread interrupted"); |
| } |
| #endif |
| } |
| |
| //if (threads.Any(x => x.Failed)) |
| // Fail("Thread threw exception"); |
| foreach (var t in threads) |
| { |
| if (t.Failed) |
| { |
| fail("Thread threw exception: " + t.FirstException.ToString()); |
| } |
| } |
| |
| success = true; |
| } |
| finally |
| { |
| if (success) |
| { |
| IOUtils.Dispose(iw, dir); |
| } |
| else |
| { |
| IOUtils.DisposeWhileHandlingException(iw, dir); // checkindex |
| } |
| } |
| } |
| |
| private static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw) |
| { |
| LineFileDocs docs = new LineFileDocs(random); |
| Document doc = null; |
| Field field = null, currentField = null; |
| StringReader bogus = new StringReader(""); |
| if (iw != null) |
| { |
| doc = new Document(); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| if (random.NextBoolean()) |
| { |
| ft.StoreTermVectors = true; |
| ft.StoreTermVectorOffsets = random.NextBoolean(); |
| ft.StoreTermVectorPositions = random.NextBoolean(); |
| if (ft.StoreTermVectorPositions && !OldFormatImpersonationIsActive) |
| { |
| ft.StoreTermVectorPayloads = random.NextBoolean(); |
| } |
| } |
| if (random.NextBoolean()) |
| { |
| ft.OmitNorms = true; |
| } |
| string pf = TestUtil.GetPostingsFormat("dummy"); |
| bool supportsOffsets = !DoesntSupportOffsets.Contains(pf); |
| switch (random.Next(4)) |
| { |
| case 0: |
| ft.IndexOptions = IndexOptions.DOCS_ONLY; |
| break; |
| |
| case 1: |
| ft.IndexOptions = IndexOptions.DOCS_AND_FREQS; |
| break; |
| |
| case 2: |
| ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| break; |
| |
| default: |
| if (supportsOffsets && offsetsAreCorrect) |
| { |
| ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; |
| } |
| else |
| { |
| ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| } |
| break; |
| } |
| currentField = field = new Field("dummy", bogus, ft); |
| doc.Add(currentField); |
| } |
| |
| try |
| { |
| for (int i = 0; i < iterations; i++) |
| { |
| string text; |
| |
| if (random.Next(10) == 7) |
| { |
| // real data from linedocs |
| text = docs.NextDoc().Get("body"); |
| if (text.Length > maxWordLength) |
| { |
| // Take a random slice from the text...: |
| int startPos = random.Next(text.Length - maxWordLength); |
| if (startPos > 0 && char.IsLowSurrogate(text[startPos])) |
| { |
| // Take care not to split up a surrogate pair: |
| startPos--; |
| Assert.True(char.IsHighSurrogate(text[startPos])); |
| } |
| int endPos = startPos + maxWordLength - 1; |
| if (char.IsHighSurrogate(text[endPos])) |
| { |
| // Take care not to split up a surrogate pair: |
| endPos--; |
| } |
| text = text.Substring(startPos, 1 + endPos - startPos); |
| } |
| } |
| else |
| { |
| // synthetic |
| text = TestUtil.RandomAnalysisString(random, maxWordLength, simple); |
| } |
| |
| try |
| { |
| CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField); |
| if (iw != null) |
| { |
| if (random.Next(7) == 0) |
| { |
| // pile up a multivalued field |
| var ft = field.FieldType; |
| currentField = new Field("dummy", bogus, ft); |
| doc.Add(currentField); |
| } |
| else |
| { |
| iw.AddDocument(doc); |
| if (doc.Fields.Count > 1) |
| { |
| // back to 1 field |
| currentField = field; |
| doc.RemoveFields("dummy"); |
| doc.Add(currentField); |
| } |
| } |
| } |
| } |
| #pragma warning disable 168 |
| catch (Exception t) |
| #pragma warning restore 168 |
| { |
| // TODO: really we should pass a random seed to |
| // checkAnalysisConsistency then print it here too: |
| Console.Error.WriteLine("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + Escape(text) + "'"); |
| throw; |
| } |
| } |
| } |
| finally |
| { |
| IOUtils.DisposeWhileHandlingException(docs); |
| } |
| } |
| |
| public static string Escape(string s) |
| { |
| int charUpto = 0; |
| StringBuilder sb = new StringBuilder(); |
| while (charUpto < s.Length) |
| { |
| int c = s[charUpto]; |
| if (c == 0xa) |
| { |
| // Strangely, you cannot put \ u000A into Java |
| // sources (not in a comment nor a string |
| // constant)...: |
| sb.Append("\\n"); |
| } |
| else if (c == 0xd) |
| { |
| // ... nor \ u000D: |
| sb.Append("\\r"); |
| } |
| else if (c == '"') |
| { |
| sb.Append("\\\""); |
| } |
| else if (c == '\\') |
| { |
| sb.Append("\\\\"); |
| } |
| else if (c >= 0x20 && c < 0x80) |
| { |
| sb.Append((char)c); |
| } |
| else |
| { |
| // TODO: we can make ascii easier to read if we |
| // don't escape... |
| sb.AppendFormat(CultureInfo.InvariantCulture, "\\u{0:x4}", c); |
| } |
| charUpto++; |
| } |
| return sb.ToString(); |
| } |
| |
| public static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text) |
| { |
| CheckAnalysisConsistency(random, a, useCharFilter, text, true); |
| } |
| |
| public static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text, bool offsetsAreCorrect) |
| { |
| CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, null); |
| } |
| |
| private static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text, bool offsetsAreCorrect, Field field) |
| { |
| if (VERBOSE) |
| { |
| Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: get first token stream now text=" + text); |
| } |
| |
| ICharTermAttribute termAtt; |
| IOffsetAttribute offsetAtt; |
| IPositionIncrementAttribute posIncAtt; |
| IPositionLengthAttribute posLengthAtt; |
| ITypeAttribute typeAtt; |
| |
| IList<string> tokens = new List<string>(); |
| IList<string> types = new List<string>(); |
| IList<int> positions = new List<int>(); |
| IList<int> positionLengths = new List<int>(); |
| IList<int> startOffsets = new List<int>(); |
| IList<int> endOffsets = new List<int>(); |
| |
| int remainder = random.Next(10); |
| TextReader reader = new StringReader(text); |
| |
| TokenStream ts; |
| using (ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader)) |
| { |
| bool isReset = false; |
| try |
| { |
| termAtt = ts.HasAttribute<ICharTermAttribute>() ? ts.GetAttribute<ICharTermAttribute>() : null; |
| offsetAtt = ts.HasAttribute<IOffsetAttribute>() ? ts.GetAttribute<IOffsetAttribute>() : null; |
| posIncAtt = ts.HasAttribute<IPositionIncrementAttribute>() ? ts.GetAttribute<IPositionIncrementAttribute>() : null; |
| posLengthAtt = ts.HasAttribute<IPositionLengthAttribute>() ? ts.GetAttribute<IPositionLengthAttribute>() : null; |
| typeAtt = ts.HasAttribute<ITypeAttribute>() ? ts.GetAttribute<ITypeAttribute>() : null; |
| |
| ts.Reset(); |
| isReset = true; |
| |
| // First pass: save away "correct" tokens |
| while (ts.IncrementToken()) |
| { |
| Assert.IsNotNull(termAtt, "has no CharTermAttribute"); |
| tokens.Add(termAtt.ToString()); |
| if (typeAtt != null) |
| { |
| types.Add(typeAtt.Type); |
| } |
| if (posIncAtt != null) |
| { |
| positions.Add(posIncAtt.PositionIncrement); |
| } |
| if (posLengthAtt != null) |
| { |
| positionLengths.Add(posLengthAtt.PositionLength); |
| } |
| if (offsetAtt != null) |
| { |
| startOffsets.Add(offsetAtt.StartOffset); |
| endOffsets.Add(offsetAtt.EndOffset); |
| } |
| } |
| // LUCENENET: We are doing this in the finally block to ensure it happens |
| // when there are exeptions thrown (such as when the assert fails). |
| //ts.End(); |
| //ts.Dispose(); |
| } |
| finally |
| { |
| if (!isReset) |
| { |
| try |
| { |
| // consume correctly |
| ts.Reset(); |
| while (ts.IncrementToken()); |
| //ts.End(); |
| //ts.Dispose(); |
| } |
| #pragma warning disable 168 |
| catch (Exception ex) |
| #pragma warning restore 168 |
| { |
| // ignore |
| } |
| } |
| ts.End(); // ts.end(); |
| } |
| } // ts.close(); |
| |
| // verify reusing is "reproducable" and also get the normal tokenstream sanity checks |
| if (tokens.Count > 0) |
| { |
| // KWTokenizer (for example) can produce a token |
| // even when input is length 0: |
| if (text.Length != 0) |
| { |
| // (Optional) second pass: do something evil: |
| int evilness = random.Next(50); |
| if (evilness == 17) |
| { |
| if (VERBOSE) |
| { |
| Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis w/ exception"); |
| } |
| // Throw an errant exception from the Reader: |
| |
| MockReaderWrapper evilReader = new MockReaderWrapper(random, new StringReader(text)); |
| evilReader.ThrowExcAfterChar(random.Next(text.Length)); // LUCENENET note, Next() is exclusive, so we don't need +1 |
| reader = evilReader; |
| |
| try |
| { |
| // NOTE: some Tokenizers go and read characters |
| // when you call .SetReader(TextReader), eg |
| // PatternTokenizer. this is a bit |
| // iffy... (really, they should only |
| // pull from the TextReader when you call |
| // .IncremenToken(), I think?), but we |
| // currently allow it, so, we must call |
| // a.TokenStream inside the try since we may |
| // hit the exc on init: |
| ts = a.GetTokenStream("dummy", useCharFilter ? (TextReader)new MockCharFilter(evilReader, remainder) : evilReader); |
| ts.Reset(); |
| while (ts.IncrementToken()) ; |
| Assert.Fail("did not hit exception"); |
| } |
| catch (Exception re) |
| { |
| Assert.IsTrue(MockReaderWrapper.IsMyEvilException(re)); |
| } |
| |
| try |
| { |
| ts.End(); |
| } |
| // LUCENENET: Actual AssertionError type is Lucene.Net.Diagnostics.AssertionException |
| catch (AssertionError ae) when (ae.Message.Contains("End() called before IncrementToken() returned false!")) |
| { |
| // Catch & ignore MockTokenizer's |
| // anger... |
| // OK |
| } |
| finally |
| { |
| ts.Dispose(); |
| } |
| } |
| else if (evilness == 7) |
| { |
| // Only consume a subset of the tokens: |
| int numTokensToRead = random.Next(tokens.Count); |
| if (VERBOSE) |
| { |
| Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis, only consuming " + numTokensToRead + " of " + tokens.Count + " tokens"); |
| } |
| |
| reader = new StringReader(text); |
| ts = a.GetTokenStream("dummy", useCharFilter ? (TextReader)new MockCharFilter(reader, remainder) : reader); |
| ts.Reset(); |
| for (int tokenCount = 0; tokenCount < numTokensToRead; tokenCount++) |
| { |
| Assert.IsTrue(ts.IncrementToken()); |
| } |
| |
| try |
| { |
| ts.End(); |
| } |
| // LUCENENET: Actual AssertionError type is Lucene.Net.Diagnostics.AssertionException |
| catch (AssertionError ae) when (ae.Message.Contains("End() called before IncrementToken() returned false!")) |
| { |
| // Catch & ignore MockTokenizer's |
| // anger... |
| // OK |
| } |
| finally |
| { |
| ts.Dispose(); |
| } |
| } |
| } |
| } |
| |
| // Final pass: verify clean tokenization matches |
| // results from first pass: |
| |
| if (VERBOSE) |
| { |
| Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis; " + tokens.Count + " tokens"); |
| } |
| reader = new StringReader(text); |
| |
| long seed = random.Next(); |
| random = new Random((int)seed); |
| if (random.Next(30) == 7) |
| { |
| if (VERBOSE) |
| { |
| Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: using spoon-feed reader"); |
| } |
| |
| reader = new MockReaderWrapper(random, reader); |
| } |
| |
| ts = a.GetTokenStream("dummy", useCharFilter ? (TextReader)new MockCharFilter(reader, remainder) : reader); |
| if (typeAtt != null && posIncAtt != null && posLengthAtt != null && offsetAtt != null) |
| { |
| // offset + pos + posLength + type |
| AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), types.ToArray(), ToIntArray(positions), ToIntArray(positionLengths), text.Length, offsetsAreCorrect); |
| } |
| else if (typeAtt != null && posIncAtt != null && offsetAtt != null) |
| { |
| // offset + pos + type |
| AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), types.ToArray(), ToIntArray(positions), null, text.Length, offsetsAreCorrect); |
| } |
| else if (posIncAtt != null && posLengthAtt != null && offsetAtt != null) |
| { |
| // offset + pos + posLength |
| AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), null, ToIntArray(positions), ToIntArray(positionLengths), text.Length, offsetsAreCorrect); |
| } |
| else if (posIncAtt != null && offsetAtt != null) |
| { |
| // offset + pos |
| AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), null, ToIntArray(positions), null, text.Length, offsetsAreCorrect); |
| } |
| else if (offsetAtt != null) |
| { |
| // offset |
| AssertTokenStreamContents(ts, tokens.ToArray(), ToIntArray(startOffsets), ToIntArray(endOffsets), null, null, null, text.Length, offsetsAreCorrect); |
| } |
| else |
| { |
| // terms only |
| AssertTokenStreamContents(ts, tokens.ToArray()); |
| } |
| |
| if (field != null) |
| { |
| reader = new StringReader(text); |
| random = new Random((int)seed); |
| if (random.Next(30) == 7) |
| { |
| if (VERBOSE) |
| { |
| Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: indexing using spoon-feed reader"); |
| } |
| |
| reader = new MockReaderWrapper(random, reader); |
| } |
| |
| field.SetReaderValue(useCharFilter ? (TextReader)new MockCharFilter(reader, remainder) : reader); |
| } |
| } |
| |
| protected internal virtual string ToDot(Analyzer a, string inputText) |
| { |
| StringWriter sw = new StringWriter(); |
| TokenStream ts = a.GetTokenStream("field", new StringReader(inputText)); |
| ts.Reset(); |
| (new TokenStreamToDot(inputText, ts, /*new StreamWriter(*/(TextWriter)sw/*)*/)).ToDot(); |
| return sw.ToString(); |
| } |
| |
| protected internal virtual void ToDotFile(Analyzer a, string inputText, string localFileName) |
| { |
| using (StreamWriter w = new StreamWriter(new FileStream(localFileName, FileMode.Open), Encoding.UTF8)) |
| { |
| TokenStream ts = a.GetTokenStream("field", new StringReader(inputText)); |
| ts.Reset(); |
| (new TokenStreamToDot(inputText, ts,/* new PrintWriter(*/w/*)*/)).ToDot(); |
| } |
| } |
| |
| [ExceptionToNetNumericConvention] // LUCENENET: Private API, keeping as-is |
| internal static int[] ToIntArray(IList<int> list) |
| { |
| int[] ret = new int[list.Count]; |
| int offset = 0; |
| foreach (int i in list) |
| { |
| ret[offset++] = i; |
| } |
| return ret; |
| } |
| |
| // *********** From Lucene 8.2.0 ************** |
| |
| /// <summary>Returns a random <see cref="AttributeFactory"/> impl</summary> |
| public static AttributeFactory NewAttributeFactory(Random random) |
| { |
| switch (random.nextInt(2)) |
| { |
| case 0: |
| return Token.TOKEN_ATTRIBUTE_FACTORY; |
| case 1: |
| return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; |
| default: |
| throw new AssertionError("Please fix the Random.nextInt() call above"); |
| } |
| |
| //switch (random.nextInt(3)) |
| //{ |
| // case 0: |
| // return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY; |
| // case 1: |
| // return Token.TOKEN_ATTRIBUTE_FACTORY; |
| // case 2: |
| // return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; |
| // default: |
| // throw new AssertionError("Please fix the Random.nextInt() call above"); |
| //} |
| } |
| |
| /// <summary>Returns a random <see cref="AttributeFactory"/> impl</summary> |
| public static AttributeFactory NewAttributeFactory() |
| { |
| return NewAttributeFactory(Random); |
| } |
| |
| // *********** End From Lucene 8.2.0 ************** |
| } |
| } |