| using J2N.Threading; |
| using J2N.Threading.Atomic; |
| using Lucene.Net.Analysis; |
| using Lucene.Net.Analysis.TokenAttributes; |
| using Lucene.Net.Codecs; |
| using Lucene.Net.Documents; |
| using Lucene.Net.Randomized.Generators; |
| using Lucene.Net.Search; |
| using Lucene.Net.Store; |
| using Lucene.Net.Support; |
| using Lucene.Net.Util; |
| using System; |
| using System.Collections.Generic; |
| using System.Linq; |
| using System.Threading; |
| using JCG = J2N.Collections.Generic; |
| using static Lucene.Net.Index.TermsEnum; |
| using Assert = Lucene.Net.TestFramework.Assert; |
| using AssertionError = Lucene.Net.Diagnostics.AssertionException; |
| using Attribute = Lucene.Net.Util.Attribute; |
| using System.Diagnostics.CodeAnalysis; |
| |
| #if TESTFRAMEWORK_MSTEST |
| using Test = Microsoft.VisualStudio.TestTools.UnitTesting.TestMethodAttribute; |
| #elif TESTFRAMEWORK_NUNIT |
| using Test = NUnit.Framework.TestAttribute; |
| #elif TESTFRAMEWORK_XUNIT |
| using Test = Lucene.Net.TestFramework.SkippableFactAttribute; |
| #endif |
| |
| namespace Lucene.Net.Index |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// Base class aiming at testing <see cref="TermVectorsFormat"/>. |
| /// To test a new format, all you need is to register a new <see cref="Codec"/> which |
| /// uses it and extend this class and override <see cref="BaseIndexFileFormatTestCase.GetCodec()"/>. |
| /// <para/> |
| /// @lucene.experimental |
| /// </summary> |
| public abstract class BaseTermVectorsFormatTestCase : BaseIndexFileFormatTestCase |
| #if TESTFRAMEWORK_XUNIT |
| , Xunit.IClassFixture<BeforeAfterClass> |
| { |
| public BaseTermVectorsFormatTestCase(BeforeAfterClass beforeAfter) |
| : base(beforeAfter) |
| { |
| } |
| #else |
| { |
| #endif |
| /// <summary> |
| /// A combination of term vectors options. |
| /// </summary> |
| protected internal enum Options |
| { |
| NONE,//(false, false, false), |
| POSITIONS,//(true, false, false), |
| OFFSETS,//(false, true, false), |
| POSITIONS_AND_OFFSETS,//(true, true, false), |
| POSITIONS_AND_PAYLOADS,//(true, false, true), |
| POSITIONS_AND_OFFSETS_AND_PAYLOADS//(true, true, true); |
| // final boolean positions, offsets, payloads; |
| // private Options(boolean positions, boolean offsets, boolean payloads) |
| // { |
| // this.positions = positions; |
| // this.offsets = offsets; |
| // this.payloads = payloads; |
| // } |
| } |
| |
| private class OptionsWrapper |
| { |
| internal bool positions, offsets, payloads; |
| |
| private void SetOptionsWrapper(bool positions, bool offsets, bool payloads) |
| { |
| this.positions = positions; |
| this.offsets = offsets; |
| this.payloads = payloads; |
| } |
| |
| public OptionsWrapper(Options opt) |
| { |
| switch (opt) |
| { |
| case Options.NONE: |
| SetOptionsWrapper(false, false, false); |
| break; |
| |
| case Options.POSITIONS: |
| SetOptionsWrapper(true, false, false); |
| break; |
| |
| case Options.OFFSETS: |
| SetOptionsWrapper(false, true, false); |
| break; |
| |
| case Options.POSITIONS_AND_OFFSETS: |
| SetOptionsWrapper(true, true, false); |
| break; |
| |
| case Options.POSITIONS_AND_PAYLOADS: |
| SetOptionsWrapper(true, false, true); |
| break; |
| |
| case Options.POSITIONS_AND_OFFSETS_AND_PAYLOADS: |
| SetOptionsWrapper(true, true, true); |
| break; |
| |
| default: |
| throw new InvalidOperationException("Invalid Options enum type"); |
| } |
| } |
| |
| public static IEnumerable<Options> GetAsEnumer() |
| { |
| return (Options[])Enum.GetValues(typeof(Options)); |
| } |
| |
| public static IEnumerable<Options> GetAsEnumer(Options startInc, Options endInc) |
| { |
| foreach (Options opt in Enum.GetValues(typeof(Options))) |
| { |
| if (opt >= startInc && opt <= endInc) |
| yield return opt; |
| } |
| } |
| } |
| |
| protected virtual IEnumerable<Options> ValidOptions() |
| { |
| return OptionsWrapper.GetAsEnumer(); |
| } |
| |
| protected virtual IEnumerable<Options> ValidOptions(Options startInc, Options endInc) |
| { |
| return OptionsWrapper.GetAsEnumer(startInc, endInc); |
| } |
| |
| protected virtual Options RandomOptions() |
| { |
| return RandomPicks.RandomFrom(Random, new List<Options>(ValidOptions())); |
| } |
| |
| protected virtual FieldType FieldType(Options options) |
| { |
| var ft = new FieldType(TextField.TYPE_NOT_STORED) |
| { |
| StoreTermVectors = true, |
| StoreTermVectorPositions = (new OptionsWrapper(options)).positions, |
| StoreTermVectorOffsets = (new OptionsWrapper(options)).offsets, |
| StoreTermVectorPayloads = (new OptionsWrapper(options)).payloads |
| }; |
| ft.Freeze(); |
| return ft; |
| } |
| |
| protected virtual BytesRef RandomPayload() |
| { |
| int len = Random.Next(5); |
| if (len == 0) |
| { |
| return null; |
| } |
| BytesRef payload = new BytesRef(len); |
| Random.NextBytes(payload.Bytes); |
| payload.Length = len; |
| return payload; |
| } |
| |
| protected override void AddRandomFields(Document doc) |
| { |
| foreach (Options opts in ValidOptions()) |
| { |
| FieldType ft = FieldType(opts); |
| int numFields = Random.Next(5); |
| for (int j = 0; j < numFields; ++j) |
| { |
| doc.Add(new Field("f_" + opts, TestUtil.RandomSimpleString(Random, 2), ft)); |
| } |
| } |
| } |
| |
| // custom impl to test cases that are forbidden by the default OffsetAttribute impl |
| private class PermissiveOffsetAttribute : Attribute, IOffsetAttribute // LUCENENET specific - renamed from PermissiveOffsetAttributeImpl |
| { |
| internal int start, end; |
| |
| public int StartOffset => start; |
| |
| public int EndOffset => end; |
| |
| public void SetOffset(int startOffset, int endOffset) |
| { |
| // no check! |
| start = startOffset; |
| end = endOffset; |
| } |
| |
| public override void Clear() |
| { |
| start = end = 0; |
| } |
| |
| public override bool Equals(object other) |
| { |
| if (other == this) |
| { |
| return true; |
| } |
| |
| if (other is PermissiveOffsetAttribute) |
| { |
| PermissiveOffsetAttribute o = (PermissiveOffsetAttribute)other; |
| return o.start == start && o.end == end; |
| } |
| |
| return false; |
| } |
| |
| public override int GetHashCode() |
| { |
| return start + 31 * end; |
| } |
| |
| public override void CopyTo(IAttribute target) |
| { |
| OffsetAttribute t = (OffsetAttribute)target; |
| t.SetOffset(start, end); |
| } |
| } |
| |
| // TODO: use CannedTokenStream? |
| protected internal class RandomTokenStream : TokenStream |
| { |
| private readonly BaseTermVectorsFormatTestCase outerInstance; |
| |
| internal readonly string[] terms; |
| internal readonly BytesRef[] termBytes; |
| internal readonly int[] positionsIncrements; |
| internal readonly int[] positions; |
| internal readonly int[] startOffsets, endOffsets; |
| internal readonly BytesRef[] payloads; |
| |
| internal readonly IDictionary<string, int?> freqs; |
| internal readonly IDictionary<int?, ISet<int?>> positionToTerms; |
| internal readonly IDictionary<int?, ISet<int?>> startOffsetToTerms; |
| |
| internal readonly ICharTermAttribute termAtt; |
| internal readonly IPositionIncrementAttribute piAtt; |
| internal readonly IOffsetAttribute oAtt; |
| internal readonly IPayloadAttribute pAtt; |
| internal int i = 0; |
| |
| protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes) |
| : this(outerInstance, len, sampleTerms, sampleTermBytes, Rarely()) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes, bool offsetsGoBackwards) |
| { |
| this.outerInstance = outerInstance; |
| terms = new string[len]; |
| termBytes = new BytesRef[len]; |
| positionsIncrements = new int[len]; |
| positions = new int[len]; |
| startOffsets = new int[len]; |
| endOffsets = new int[len]; |
| payloads = new BytesRef[len]; |
| for (int i = 0; i < len; ++i) |
| { |
| int o = Random.Next(sampleTerms.Length); |
| terms[i] = sampleTerms[o]; |
| termBytes[i] = sampleTermBytes[o]; |
| positionsIncrements[i] = TestUtil.NextInt32(Random, i == 0 ? 1 : 0, 10); |
| if (offsetsGoBackwards) |
| { |
| startOffsets[i] = Random.Next(); |
| endOffsets[i] = Random.Next(); |
| } |
| else |
| { |
| if (i == 0) |
| { |
| startOffsets[i] = TestUtil.NextInt32(Random, 0, 1 << 16); |
| } |
| else |
| { |
| startOffsets[i] = startOffsets[i - 1] + TestUtil.NextInt32(Random, 0, Rarely() ? 1 << 16 : 20); |
| } |
| endOffsets[i] = startOffsets[i] + TestUtil.NextInt32(Random, 0, Rarely() ? 1 << 10 : 20); |
| } |
| } |
| |
| for (int i = 0; i < len; ++i) |
| { |
| if (i == 0) |
| { |
| positions[i] = positionsIncrements[i] - 1; |
| } |
| else |
| { |
| positions[i] = positions[i - 1] + positionsIncrements[i]; |
| } |
| } |
| if (Rarely()) |
| { |
| Arrays.Fill(payloads, outerInstance.RandomPayload()); |
| } |
| else |
| { |
| for (int i = 0; i < len; ++i) |
| { |
| payloads[i] = outerInstance.RandomPayload(); |
| } |
| } |
| |
| positionToTerms = new Dictionary<int?, ISet<int?>>(len); |
| startOffsetToTerms = new Dictionary<int?, ISet<int?>>(len); |
| for (int i = 0; i < len; ++i) |
| { |
| if (!positionToTerms.TryGetValue(positions[i], out ISet<int?> positionTerms)) |
| { |
| positionToTerms[positions[i]] = positionTerms = new JCG.HashSet<int?>(1); |
| } |
| positionTerms.Add(i); |
| if (!startOffsetToTerms.TryGetValue(startOffsets[i], out ISet<int?> startOffsetTerms)) |
| { |
| startOffsetToTerms[startOffsets[i]] = startOffsetTerms = new JCG.HashSet<int?>(1); |
| } |
| startOffsetTerms.Add(i); |
| } |
| |
| freqs = new Dictionary<string, int?>(); |
| foreach (string term in terms) |
| { |
| if (freqs.TryGetValue(term, out int? freq)) |
| { |
| freqs[term] = freq + 1; |
| } |
| else |
| { |
| freqs[term] = 1; |
| } |
| } |
| |
| AddAttributeImpl(new PermissiveOffsetAttribute()); |
| |
| termAtt = AddAttribute<ICharTermAttribute>(); |
| piAtt = AddAttribute<IPositionIncrementAttribute>(); |
| oAtt = AddAttribute<IOffsetAttribute>(); |
| pAtt = AddAttribute<IPayloadAttribute>(); |
| } |
| |
| public virtual bool HasPayloads() |
| { |
| foreach (BytesRef payload in payloads) |
| { |
| if (payload != null && payload.Length > 0) |
| { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| public sealed override bool IncrementToken() |
| { |
| if (i < terms.Length) |
| { |
| termAtt.SetLength(0).Append(terms[i]); |
| piAtt.PositionIncrement = positionsIncrements[i]; |
| oAtt.SetOffset(startOffsets[i], endOffsets[i]); |
| pAtt.Payload = payloads[i]; |
| ++i; |
| return true; |
| } |
| else |
| { |
| return false; |
| } |
| } |
| } |
| |
| protected internal class RandomDocument |
| { |
| private readonly BaseTermVectorsFormatTestCase outerInstance; |
| |
| internal readonly string[] fieldNames; |
| internal readonly FieldType[] fieldTypes; |
| internal readonly RandomTokenStream[] tokenStreams; |
| |
| protected internal RandomDocument(BaseTermVectorsFormatTestCase outerInstance, int fieldCount, int maxTermCount, Options options, string[] fieldNames, string[] sampleTerms, BytesRef[] sampleTermBytes) |
| { |
| this.outerInstance = outerInstance; |
| if (fieldCount > fieldNames.Length) |
| { |
| throw new ArgumentException(); |
| } |
| this.fieldNames = new string[fieldCount]; |
| fieldTypes = new FieldType[fieldCount]; |
| tokenStreams = new RandomTokenStream[fieldCount]; |
| Arrays.Fill(fieldTypes, outerInstance.FieldType(options)); |
| ISet<string> usedFileNames = new JCG.HashSet<string>(); |
| for (int i = 0; i < fieldCount; ++i) |
| { |
| // LUCENENET NOTE: Using a simple Linq query to filter rather than using brute force makes this a lot |
| // faster (and won't infinitely retry due to poor random distribution). |
| this.fieldNames[i] = RandomPicks.RandomFrom(Random, fieldNames.Except(usedFileNames).ToArray()); |
| //do |
| //{ |
| // this.FieldNames[i] = RandomPicks.RandomFrom(Random(), fieldNames); |
| //} while (usedFileNames.Contains(this.FieldNames[i])); |
| |
| usedFileNames.Add(this.fieldNames[i]); |
| tokenStreams[i] = new RandomTokenStream(outerInstance, TestUtil.NextInt32(Random, 1, maxTermCount), sampleTerms, sampleTermBytes); |
| } |
| } |
| |
| public virtual Document ToDocument() |
| { |
| Document doc = new Document(); |
| for (int i = 0; i < fieldNames.Length; ++i) |
| { |
| doc.Add(new Field(fieldNames[i], tokenStreams[i], fieldTypes[i])); |
| } |
| return doc; |
| } |
| } |
| |
| protected class RandomDocumentFactory |
| { |
| private readonly BaseTermVectorsFormatTestCase outerInstance; |
| |
| private readonly string[] fieldNames; |
| private readonly string[] terms; |
| private readonly BytesRef[] termBytes; |
| |
| protected internal RandomDocumentFactory(BaseTermVectorsFormatTestCase outerInstance, int distinctFieldNames, int disctinctTerms) |
| { |
| this.outerInstance = outerInstance; |
| ISet<string> fieldNames = new JCG.HashSet<string>(); |
| while (fieldNames.Count < distinctFieldNames) |
| { |
| fieldNames.Add(TestUtil.RandomSimpleString(Random)); |
| fieldNames.Remove("id"); |
| } |
| this.fieldNames = fieldNames.ToArray(/*new string[0]*/); |
| terms = new string[disctinctTerms]; |
| termBytes = new BytesRef[disctinctTerms]; |
| for (int i = 0; i < disctinctTerms; ++i) |
| { |
| terms[i] = TestUtil.RandomRealisticUnicodeString(Random); |
| termBytes[i] = new BytesRef(terms[i]); |
| } |
| } |
| |
| public virtual RandomDocument NewDocument(int fieldCount, int maxTermCount, Options options) |
| { |
| return new RandomDocument(outerInstance, fieldCount, maxTermCount, options, fieldNames, terms, termBytes); |
| } |
| } |
| |
| protected virtual void AssertEquals(RandomDocument doc, Fields fields) |
| { |
| // compare field names |
| Assert.AreEqual(doc == null, fields == null); |
| Assert.AreEqual(doc.fieldNames.Length, fields.Count); |
| ISet<string> fields1 = new JCG.HashSet<string>(); |
| ISet<string> fields2 = new JCG.HashSet<string>(); |
| for (int i = 0; i < doc.fieldNames.Length; ++i) |
| { |
| fields1.Add(doc.fieldNames[i]); |
| } |
| foreach (string field in fields) |
| { |
| fields2.Add(field); |
| } |
| Assert.IsTrue(fields1.SetEquals(fields2)); |
| |
| for (int i = 0; i < doc.fieldNames.Length; ++i) |
| { |
| AssertEquals(doc.tokenStreams[i], doc.fieldTypes[i], fields.GetTerms(doc.fieldNames[i])); |
| } |
| } |
| |
| new protected internal static bool Equals(object o1, object o2) |
| { |
| if (o1 == null) |
| { |
| return o2 == null; |
| } |
| else |
| { |
| return o1.Equals(o2); |
| } |
| } |
| |
| // to test reuse |
| private readonly DisposableThreadLocal<TermsEnum> termsEnum = new DisposableThreadLocal<TermsEnum>(); |
| |
| private readonly DisposableThreadLocal<DocsEnum> docsEnum = new DisposableThreadLocal<DocsEnum>(); |
| private readonly DisposableThreadLocal<DocsAndPositionsEnum> docsAndPositionsEnum = new DisposableThreadLocal<DocsAndPositionsEnum>(); |
| |
| // LUCENENET specific - cleanup DisposableThreadLocal instances after running tests |
| public override void AfterClass() |
| { |
| termsEnum.Dispose(); |
| docsEnum.Dispose(); |
| docsAndPositionsEnum.Dispose(); |
| base.AfterClass(); |
| } |
| |
| protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) |
| { |
| Assert.AreEqual(1, terms.DocCount); |
| int termCount = new JCG.HashSet<string>(tk.terms).Count; |
| Assert.AreEqual((long)termCount, terms.Count); // LUCENENET specific - cast required because types don't match (xUnit checks this) |
| Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this) |
| Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions); |
| Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets); |
| Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads); |
| ISet<BytesRef> uniqueTerms = new JCG.HashSet<BytesRef>(); |
| foreach (string term in tk.freqs.Keys) |
| { |
| uniqueTerms.Add(new BytesRef(term)); |
| } |
| BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); |
| Array.Sort(sortedTerms, terms.Comparer); |
| TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value); |
| this.termsEnum.Value = termsEnum; |
| for (int i = 0; i < sortedTerms.Length; ++i) |
| { |
| Assert.IsTrue(termsEnum.MoveNext()); |
| Assert.AreEqual(sortedTerms[i], termsEnum.Term); |
| Assert.AreEqual(1, termsEnum.DocFreq); |
| |
| FixedBitSet bits = new FixedBitSet(1); |
| DocsEnum docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value); |
| Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); |
| bits.Set(0); |
| |
| docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum); |
| Assert.IsNotNull(docsEnum); |
| Assert.AreEqual(0, docsEnum.NextDoc()); |
| Assert.AreEqual(0, docsEnum.DocID); |
| Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], (int?)docsEnum.Freq); |
| Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); |
| this.docsEnum.Value = docsEnum; |
| |
| bits.Clear(0); |
| DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value); |
| Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); |
| if (docsAndPositionsEnum != null) |
| { |
| Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); |
| } |
| bits.Set(0); |
| |
| docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum); |
| Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); |
| if (terms.HasPositions || terms.HasOffsets) |
| { |
| Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); |
| int freq = docsAndPositionsEnum.Freq; |
| Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], (int?)freq); |
| if (docsAndPositionsEnum != null) |
| { |
| for (int k = 0; k < freq; ++k) |
| { |
| int position = docsAndPositionsEnum.NextPosition(); |
| ISet<int?> indexes; |
| if (terms.HasPositions) |
| { |
| indexes = tk.positionToTerms[position]; |
| Assert.IsNotNull(indexes); |
| } |
| else |
| { |
| indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset]; |
| Assert.IsNotNull(indexes); |
| } |
| if (terms.HasPositions) |
| { |
| bool foundPosition = false; |
| foreach (int index in indexes) |
| { |
| if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position) |
| { |
| foundPosition = true; |
| break; |
| } |
| } |
| Assert.IsTrue(foundPosition); |
| } |
| if (terms.HasOffsets) |
| { |
| bool foundOffset = false; |
| foreach (int index in indexes) |
| { |
| if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset) |
| { |
| foundOffset = true; |
| break; |
| } |
| } |
| Assert.IsTrue(foundOffset); |
| } |
| if (terms.HasPayloads) |
| { |
| bool foundPayload = false; |
| foreach (int index in indexes) |
| { |
| if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload())) |
| { |
| foundPayload = true; |
| break; |
| } |
| } |
| Assert.IsTrue(foundPayload); |
| } |
| } |
| |
| // LUCENENET specific - In Lucene, there were assertions set up inside TVReaders which throw AssertionError |
| // (provided assertions are enabled), which in turn signaled this class to skip the check by catching AssertionError. |
| // In .NET, assertions are not included in the release and cannot be enabled, so there is nothing to catch. |
| // We have to explicitly exclude the types that rely on this behavior from the check. Otherwise, they would fall |
| // through to Assert.Fail(). |
| // |
| // We also have a fake AssertionException for testing mocks. We cannot throw InvalidOperationException in those |
| // cases because that exception is expected in other contexts. |
| Assert.ThrowsAnyOf<InvalidOperationException, AssertionError>(() => docsAndPositionsEnum.NextPosition()); |
| |
| // try |
| // { |
| // docsAndPositionsEnum.NextPosition(); |
| // Assert.Fail(); |
| // } |
| //#pragma warning disable 168 |
| // catch (Exception e) |
| //#pragma warning restore 168 |
| // { |
| // // ok |
| // } |
| } |
| Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); |
| } |
| this.docsAndPositionsEnum.Value = docsAndPositionsEnum; |
| } |
| Assert.IsFalse(termsEnum.MoveNext()); |
| for (int i = 0; i < 5; ++i) |
| { |
| if (Random.NextBoolean()) |
| { |
| Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes))); |
| } |
| else |
| { |
| Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes))); |
| } |
| } |
| } |
| |
| protected virtual Document AddId(Document doc, string id) |
| { |
| doc.Add(new StringField("id", id, Field.Store.NO)); |
| return doc; |
| } |
| |
| protected virtual int DocID(IndexReader reader, string id) |
| { |
| return (new IndexSearcher(reader)).Search(new TermQuery(new Term("id", id)), 1).ScoreDocs[0].Doc; |
| } |
| |
| [Test] |
| // only one doc with vectors |
| public virtual void TestRareVectors() |
| { |
| RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 10, 20); |
| foreach (Options options in ValidOptions()) |
| { |
| int numDocs = AtLeast(200); |
| int docWithVectors = Random.Next(numDocs); |
| Document emptyDoc = new Document(); |
| using (Directory dir = NewDirectory()) |
| using (RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir)) |
| { |
| RandomDocument doc = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), 20, options); |
| for (int i = 0; i < numDocs; ++i) |
| { |
| if (i == docWithVectors) |
| { |
| writer.AddDocument(AddId(doc.ToDocument(), "42")); |
| } |
| else |
| { |
| writer.AddDocument(emptyDoc); |
| } |
| } |
| using (IndexReader reader = writer.GetReader()) |
| { |
| int docWithVectorsID = DocID(reader, "42"); |
| for (int i = 0; i < 10; ++i) |
| { |
| int docID = Random.Next(numDocs); |
| Fields fields = reader.GetTermVectors(docID); |
| if (docID == docWithVectorsID) |
| { |
| AssertEquals(doc, fields); |
| } |
| else |
| { |
| Assert.IsNull(fields); |
| } |
| } |
| Fields fields_ = reader.GetTermVectors(docWithVectorsID); |
| AssertEquals(doc, fields_); |
| } // reader.Dispose(); |
| } // writer.Dispose();, dir.Dispose(); |
| } |
| } |
| |
| [Test] |
| public virtual void TestHighFreqs() |
| { |
| RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 3, 5); |
| foreach (Options options in ValidOptions()) |
| { |
| if (options == Options.NONE) |
| { |
| continue; |
| } |
| using (Directory dir = NewDirectory()) |
| using (RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir)) |
| { |
| RandomDocument doc = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 2), AtLeast(20000), |
| options); |
| writer.AddDocument(doc.ToDocument()); |
| using (IndexReader reader = writer.GetReader()) |
| AssertEquals(doc, reader.GetTermVectors(0)); |
| } |
| } |
| } |
| |
| [Test] |
| public virtual void TestLotsOfFields() |
| { |
| RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5000, 10); |
| foreach (Options options in ValidOptions()) |
| { |
| using (Directory dir = NewDirectory()) |
| using (RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir)) |
| { |
| RandomDocument doc = docFactory.NewDocument(AtLeast(100), 5, options); |
| writer.AddDocument(doc.ToDocument()); |
| using (IndexReader reader = writer.GetReader()) |
| AssertEquals(doc, reader.GetTermVectors(0)); |
| //reader.Dispose(); |
| } // writer.Dispose();, dir.Dispose(); |
| } |
| } |
| |
| [Test] |
| // different options for the same field |
| public virtual void TestMixedOptions() |
| { |
| int numFields = TestUtil.NextInt32(Random, 1, 3); |
| var docFactory = new RandomDocumentFactory(this, numFields, 10); |
| foreach (var options1 in ValidOptions()) |
| { |
| foreach (var options2 in ValidOptions()) |
| { |
| if (options1 == options2) |
| { |
| continue; |
| } |
| using (Directory dir = NewDirectory()) |
| using (var writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir)) |
| { |
| RandomDocument doc1 = docFactory.NewDocument(numFields, 20, options1); |
| RandomDocument doc2 = docFactory.NewDocument(numFields, 20, options2); |
| writer.AddDocument(AddId(doc1.ToDocument(), "1")); |
| writer.AddDocument(AddId(doc2.ToDocument(), "2")); |
| using (IndexReader reader = writer.GetReader()) |
| { |
| int doc1ID = DocID(reader, "1"); |
| AssertEquals(doc1, reader.GetTermVectors(doc1ID)); |
| int doc2ID = DocID(reader, "2"); |
| AssertEquals(doc2, reader.GetTermVectors(doc2ID)); |
| } |
| } |
| } |
| } |
| } |
| |
| [Test] |
| public virtual void TestRandom() |
| { |
| RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); |
| int numDocs = AtLeast(100); |
| RandomDocument[] docs = new RandomDocument[numDocs]; |
| for (int i = 0; i < numDocs; ++i) |
| { |
| docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), TestUtil.NextInt32(Random, 10, 50), RandomOptions()); |
| } |
| using (Directory dir = NewDirectory()) |
| using (RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir)) |
| { |
| for (int i = 0; i < numDocs; ++i) |
| { |
| writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); |
| } |
| using (IndexReader reader = writer.GetReader()) |
| { |
| for (int i = 0; i < numDocs; ++i) |
| { |
| int docID = DocID(reader, "" + i); |
| AssertEquals(docs[i], reader.GetTermVectors(docID)); |
| } |
| } // reader.Dispose(); |
| } // writer.Dispose();, dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestMerge() |
| { |
| RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); |
| int numDocs = AtLeast(100); |
| int numDeletes = Random.Next(numDocs); |
| ISet<int?> deletes = new JCG.HashSet<int?>(); |
| while (deletes.Count < numDeletes) |
| { |
| deletes.Add(Random.Next(numDocs)); |
| } |
| foreach (Options options in ValidOptions()) |
| { |
| RandomDocument[] docs = new RandomDocument[numDocs]; |
| for (int i = 0; i < numDocs; ++i) |
| { |
| docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options); |
| } |
| using (Directory dir = NewDirectory()) |
| using (RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir)) |
| { |
| for (int i = 0; i < numDocs; ++i) |
| { |
| writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); |
| if (Rarely()) |
| { |
| writer.Commit(); |
| } |
| } |
| foreach (int delete in deletes) |
| { |
| writer.DeleteDocuments(new Term("id", "" + delete)); |
| } |
| // merge with deletes |
| writer.ForceMerge(1); |
| using (IndexReader reader = writer.GetReader()) |
| { |
| for (int i = 0; i < numDocs; ++i) |
| { |
| if (!deletes.Contains(i)) |
| { |
| int docID = DocID(reader, "" + i); |
| AssertEquals(docs[i], reader.GetTermVectors(docID)); |
| } |
| } |
| } // reader.Dispose(); |
| |
| } // writer.Dispose();, dir.Dispose(); |
| } |
| } |
| |
| [Test] |
| // run random tests from different threads to make sure the per-thread clones |
| // don't share mutable data |
| public virtual void TestClone() |
| { |
| RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20); |
| int numDocs = AtLeast(100); |
| foreach (Options options in ValidOptions()) |
| { |
| RandomDocument[] docs = new RandomDocument[numDocs]; |
| for (int i = 0; i < numDocs; ++i) |
| { |
| docs[i] = docFactory.NewDocument(TestUtil.NextInt32(Random, 1, 3), AtLeast(10), options); |
| } |
| AtomicReference<Exception> exception = new AtomicReference<Exception>(); |
| using (Directory dir = NewDirectory()) |
| using (RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir)) |
| { |
| for (int i = 0; i < numDocs; ++i) |
| { |
| writer.AddDocument(AddId(docs[i].ToDocument(), "" + i)); |
| } |
| using (IndexReader reader = writer.GetReader()) |
| { |
| for (int i = 0; i < numDocs; ++i) |
| { |
| int docID = DocID(reader, "" + i); |
| AssertEquals(docs[i], reader.GetTermVectors(docID)); |
| } |
| |
| ThreadJob[] threads = new ThreadJob[2]; |
| for (int i = 0; i < threads.Length; ++i) |
| { |
| threads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, docs, reader, exception, i); |
| } |
| foreach (ThreadJob thread in threads) |
| { |
| thread.Start(); |
| } |
| foreach (ThreadJob thread in threads) |
| { |
| thread.Join(); |
| } |
| } // reader.Dispose(); |
| } // writer.Dispose();, dir.Dispose(); |
| Assert.IsNull(exception.Value, "One thread threw an exception"); |
| } |
| } |
| |
| private class ThreadAnonymousInnerClassHelper : ThreadJob |
| { |
| private readonly BaseTermVectorsFormatTestCase outerInstance; |
| |
| private readonly int numDocs; |
| private readonly RandomDocument[] docs; |
| private readonly IndexReader reader; |
| private readonly AtomicReference<Exception> exception; |
| private readonly int i; |
| |
| public ThreadAnonymousInnerClassHelper(BaseTermVectorsFormatTestCase outerInstance, int numDocs, RandomDocument[] docs, IndexReader reader, AtomicReference<Exception> exception, int i) |
| { |
| this.outerInstance = outerInstance; |
| this.numDocs = numDocs; |
| this.docs = docs; |
| this.reader = reader; |
| this.exception = exception; |
| this.i = i; |
| } |
| |
| public override void Run() |
| { |
| try |
| { |
| for (int i = 0; i < AtLeast(100); ++i) |
| { |
| int idx = Random.Next(numDocs); |
| int docID = outerInstance.DocID(reader, "" + idx); |
| outerInstance.AssertEquals(docs[idx], reader.GetTermVectors(docID)); |
| } |
| } |
| catch (Exception t) |
| { |
| this.exception.Value = t; |
| } |
| } |
| } |
| } |
| } |