| using J2N.Text; |
| using J2N.Threading; |
| using Lucene.Net.Analysis; |
| using Lucene.Net.Analysis.TokenAttributes; |
| using Lucene.Net.Codecs; |
| using Lucene.Net.Codecs.SimpleText; |
| using Lucene.Net.Diagnostics; |
| using Lucene.Net.Documents; |
| using Lucene.Net.Index.Extensions; |
| using Lucene.Net.Search; |
| using Lucene.Net.Support; |
| using Lucene.Net.Util; |
| using NUnit.Framework; |
| using System; |
| using System.Collections.Generic; |
| using System.IO; |
| using System.Text; |
| using System.Threading; |
| using Console = Lucene.Net.Util.SystemConsole; |
| using JCG = J2N.Collections.Generic; |
| |
| namespace Lucene.Net.Index |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using Assert = Lucene.Net.TestFramework.Assert; |
| using Automaton = Lucene.Net.Util.Automaton.Automaton; |
| using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper; |
| using BasicAutomata = Lucene.Net.Util.Automaton.BasicAutomata; |
| using BinaryDocValuesField = BinaryDocValuesField; |
| using BytesRef = Lucene.Net.Util.BytesRef; |
| using CharacterRunAutomaton = Lucene.Net.Util.Automaton.CharacterRunAutomaton; |
| using Constants = Lucene.Net.Util.Constants; |
| using Directory = Lucene.Net.Store.Directory; |
| using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; |
| using Document = Documents.Document; |
| using Field = Field; |
| using FieldType = FieldType; |
| using IBits = Lucene.Net.Util.IBits; |
| using IndexOutput = Lucene.Net.Store.IndexOutput; |
| using IndexSearcher = Lucene.Net.Search.IndexSearcher; |
| using IOContext = Lucene.Net.Store.IOContext; |
| using IOUtils = Lucene.Net.Util.IOUtils; |
| using Lock = Lucene.Net.Store.Lock; |
| using LockFactory = Lucene.Net.Store.LockFactory; |
| using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; |
| using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; |
| using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery; |
| using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; |
| using NoLockFactory = Lucene.Net.Store.NoLockFactory; |
| using NumericDocValuesField = NumericDocValuesField; |
| using PackedInt32s = Lucene.Net.Util.Packed.PackedInt32s; |
| using PhraseQuery = Lucene.Net.Search.PhraseQuery; |
| using RAMDirectory = Lucene.Net.Store.RAMDirectory; |
| using ScoreDoc = Lucene.Net.Search.ScoreDoc; |
| using SimpleFSLockFactory = Lucene.Net.Store.SimpleFSLockFactory; |
| using SingleInstanceLockFactory = Lucene.Net.Store.SingleInstanceLockFactory; |
| using SortedDocValuesField = SortedDocValuesField; |
| using SortedSetDocValuesField = SortedSetDocValuesField; |
| using StoredField = StoredField; |
| using StringField = StringField; |
| using TermQuery = Lucene.Net.Search.TermQuery; |
| using TestUtil = Lucene.Net.Util.TestUtil; |
| using TextField = TextField; |
| |
| [TestFixture] |
| public class TestIndexWriter : LuceneTestCase |
| { |
| private static readonly FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED); |
| |
| #if FEATURE_INDEXWRITER_TESTS |
| |
| [Test] |
| public virtual void TestDocCount() |
| { |
| Directory dir = NewDirectory(); |
| |
| IndexWriter writer = null; |
| IndexReader reader = null; |
| int i; |
| |
| long savedWriteLockTimeout = IndexWriterConfig.DefaultWriteLockTimeout; |
| try |
| { |
| IndexWriterConfig.DefaultWriteLockTimeout = 2000; |
| Assert.AreEqual(2000, IndexWriterConfig.DefaultWriteLockTimeout); |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| } |
| finally |
| { |
| IndexWriterConfig.DefaultWriteLockTimeout = savedWriteLockTimeout; |
| } |
| |
| // add 100 documents |
| for (i = 0; i < 100; i++) |
| { |
| AddDocWithIndex(writer, i); |
| } |
| Assert.AreEqual(100, writer.MaxDoc); |
| writer.Dispose(); |
| |
| // delete 40 documents |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)); |
| for (i = 0; i < 40; i++) |
| { |
| writer.DeleteDocuments(new Term("id", "" + i)); |
| } |
| writer.Dispose(); |
| |
| reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(60, reader.NumDocs); |
| reader.Dispose(); |
| |
| // merge the index down and check that the new doc count is correct |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Assert.AreEqual(60, writer.NumDocs); |
| writer.ForceMerge(1); |
| Assert.AreEqual(60, writer.MaxDoc); |
| Assert.AreEqual(60, writer.NumDocs); |
| writer.Dispose(); |
| |
| // check that the index reader gives the same numbers. |
| reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(60, reader.MaxDoc); |
| Assert.AreEqual(60, reader.NumDocs); |
| reader.Dispose(); |
| |
| // make sure opening a new index for create over |
| // this existing one works correctly: |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE)); |
| Assert.AreEqual(0, writer.MaxDoc); |
| Assert.AreEqual(0, writer.NumDocs); |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| /// <summary> |
| /// LUCENENET specific |
| /// Changed from internal static method to private to remove |
| /// inter-dependencies between TestIndexWriter*.cs, TestAddIndexes.cs |
| /// and TestDeletionPolicy.cs tests |
| /// </summary> |
| private void AddDoc(IndexWriter writer) |
| { |
| Document doc = new Document(); |
| doc.Add(NewTextField("content", "aaa", Field.Store.NO)); |
| writer.AddDocument(doc); |
| } |
| |
| /// <summary> |
| /// LUCENENET specific |
| /// Changed from internal static method to private to remove |
| /// inter-dependencies between TestIndexWriter*.cs, TestAddIndexes.cs |
| /// and TestDeletionPolicy.cs tests |
| /// </summary> |
| private void AddDocWithIndex(IndexWriter writer, int index) |
| { |
| Document doc = new Document(); |
| doc.Add(NewField("content", "aaa " + index, storedTextType)); |
| doc.Add(NewField("id", "" + index, storedTextType)); |
| writer.AddDocument(doc); |
| } |
| |
| #endif |
| |
| public static void AssertNoUnreferencedFiles(Directory dir, string message) |
| { |
| string[] startFiles = dir.ListAll(); |
| (new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Rollback(); |
| string[] endFiles = dir.ListAll(); |
| |
| Array.Sort(startFiles); |
| Array.Sort(endFiles); |
| |
| if (!Arrays.Equals(startFiles, endFiles)) |
| { |
| Assert.Fail(message + ": before delete:\n " + ArrayToString(startFiles) + "\n after delete:\n " + ArrayToString(endFiles)); |
| } |
| } |
| |
| internal static string ArrayToString(string[] l) |
| { |
| string s = ""; |
| for (int i = 0; i < l.Length; i++) |
| { |
| if (i > 0) |
| { |
| s += "\n "; |
| } |
| s += l[i]; |
| } |
| return s; |
| } |
| |
| #if FEATURE_INDEXWRITER_TESTS |
| |
| // Make sure we can open an index for create even when a |
| // reader holds it open (this fails pre lock-less |
| // commits on windows): |
| [Test] |
| public virtual void TestCreateWithReader() |
| { |
| Directory dir = NewDirectory(); |
| |
| // add one document & close writer |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| AddDoc(writer); |
| writer.Dispose(); |
| |
| // now open reader: |
| IndexReader reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(reader.NumDocs, 1, "should be one document"); |
| |
| // now open index for create: |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE)); |
| Assert.AreEqual(writer.MaxDoc, 0, "should be zero documents"); |
| AddDoc(writer); |
| writer.Dispose(); |
| |
| Assert.AreEqual(reader.NumDocs, 1, "should be one document"); |
| IndexReader reader2 = DirectoryReader.Open(dir); |
| Assert.AreEqual(reader2.NumDocs, 1, "should be one document"); |
| reader.Dispose(); |
| reader2.Dispose(); |
| |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestChangesAfterClose([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")]Func<IConcurrentMergeScheduler> newScheduler) |
| { |
| Directory dir = NewDirectory(); |
| |
| IndexWriter writer = null; |
| |
| var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(newScheduler()); |
| writer = new IndexWriter(dir, config); |
| AddDoc(writer); |
| |
| // close |
| writer.Dispose(); |
| try |
| { |
| AddDoc(writer); |
| Assert.Fail("did not hit ObjectDisposedException"); |
| } |
| #pragma warning disable 168 |
| catch (ObjectDisposedException e) |
| #pragma warning restore 168 |
| { |
| // expected |
| } |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestIndexNoDocuments() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| writer.Commit(); |
| writer.Dispose(); |
| |
| IndexReader reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(0, reader.MaxDoc); |
| Assert.AreEqual(0, reader.NumDocs); |
| reader.Dispose(); |
| |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND)); |
| writer.Commit(); |
| writer.Dispose(); |
| |
| reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(0, reader.MaxDoc); |
| Assert.AreEqual(0, reader.NumDocs); |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestManyFields() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(10)); |
| for (int j = 0; j < 100; j++) |
| { |
| Document doc = new Document(); |
| doc.Add(NewField("a" + j, "aaa" + j, storedTextType)); |
| doc.Add(NewField("b" + j, "aaa" + j, storedTextType)); |
| doc.Add(NewField("c" + j, "aaa" + j, storedTextType)); |
| doc.Add(NewField("d" + j, "aaa", storedTextType)); |
| doc.Add(NewField("e" + j, "aaa", storedTextType)); |
| doc.Add(NewField("f" + j, "aaa", storedTextType)); |
| writer.AddDocument(doc); |
| } |
| writer.Dispose(); |
| |
| IndexReader reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(100, reader.MaxDoc); |
| Assert.AreEqual(100, reader.NumDocs); |
| for (int j = 0; j < 100; j++) |
| { |
| Assert.AreEqual(1, reader.DocFreq(new Term("a" + j, "aaa" + j))); |
| Assert.AreEqual(1, reader.DocFreq(new Term("b" + j, "aaa" + j))); |
| Assert.AreEqual(1, reader.DocFreq(new Term("c" + j, "aaa" + j))); |
| Assert.AreEqual(1, reader.DocFreq(new Term("d" + j, "aaa"))); |
| Assert.AreEqual(1, reader.DocFreq(new Term("e" + j, "aaa"))); |
| Assert.AreEqual(1, reader.DocFreq(new Term("f" + j, "aaa"))); |
| } |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestSmallRAMBuffer() |
| { |
| |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetRAMBufferSizeMB(0.000001).SetMergePolicy(NewLogMergePolicy(10))); |
| int lastNumFile = dir.ListAll().Length; |
| for (int j = 0; j < 9; j++) |
| { |
| Document doc = new Document(); |
| doc.Add(NewField("field", "aaa" + j, storedTextType)); |
| writer.AddDocument(doc); |
| int numFile = dir.ListAll().Length; |
| // Verify that with a tiny RAM buffer we see new |
| // segment after every doc |
| Assert.IsTrue(numFile > lastNumFile); |
| lastNumFile = numFile; |
| } |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // Make sure it's OK to change RAM buffer size and |
| // maxBufferedDocs in a write session |
| [Test] |
| public virtual void TestChangingRAMBuffer() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| writer.Config.SetMaxBufferedDocs(10); |
| writer.Config.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| |
| int lastFlushCount = -1; |
| for (int j = 1; j < 52; j++) |
| { |
| Document doc = new Document(); |
| doc.Add(new Field("field", "aaa" + j, storedTextType)); |
| writer.AddDocument(doc); |
| TestUtil.SyncConcurrentMerges(writer); |
| int flushCount = writer.FlushCount; |
| if (j == 1) |
| { |
| lastFlushCount = flushCount; |
| } |
| else if (j < 10) |
| // No new files should be created |
| { |
| Assert.AreEqual(flushCount, lastFlushCount); |
| } |
| else if (10 == j) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| writer.Config.SetRAMBufferSizeMB(0.000001); |
| writer.Config.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } |
| else if (j < 20) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } |
| else if (20 == j) |
| { |
| writer.Config.SetRAMBufferSizeMB(16); |
| writer.Config.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } |
| else if (j < 30) |
| { |
| Assert.AreEqual(flushCount, lastFlushCount); |
| } |
| else if (30 == j) |
| { |
| writer.Config.SetRAMBufferSizeMB(0.000001); |
| writer.Config.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } |
| else if (j < 40) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } |
| else if (40 == j) |
| { |
| writer.Config.SetMaxBufferedDocs(10); |
| writer.Config.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } |
| else if (j < 50) |
| { |
| Assert.AreEqual(flushCount, lastFlushCount); |
| writer.Config.SetMaxBufferedDocs(10); |
| writer.Config.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } |
| else if (50 == j) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount); |
| } |
| } |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestChangingRAMBuffer2() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| writer.Config.SetMaxBufferedDocs(10); |
| writer.Config.SetMaxBufferedDeleteTerms(10); |
| writer.Config.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| |
| for (int j = 1; j < 52; j++) |
| { |
| Document doc = new Document(); |
| doc.Add(new Field("field", "aaa" + j, storedTextType)); |
| writer.AddDocument(doc); |
| } |
| |
| int lastFlushCount = -1; |
| for (int j = 1; j < 52; j++) |
| { |
| writer.DeleteDocuments(new Term("field", "aaa" + j)); |
| TestUtil.SyncConcurrentMerges(writer); |
| int flushCount = writer.FlushCount; |
| |
| if (j == 1) |
| { |
| lastFlushCount = flushCount; |
| } |
| else if (j < 10) |
| { |
| // No new files should be created |
| Assert.AreEqual(flushCount, lastFlushCount); |
| } |
| else if (10 == j) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount, "" + j); |
| lastFlushCount = flushCount; |
| writer.Config.SetRAMBufferSizeMB(0.000001); |
| writer.Config.SetMaxBufferedDeleteTerms(1); |
| } |
| else if (j < 20) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } |
| else if (20 == j) |
| { |
| writer.Config.SetRAMBufferSizeMB(16); |
| writer.Config.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } |
| else if (j < 30) |
| { |
| Assert.AreEqual(flushCount, lastFlushCount); |
| } |
| else if (30 == j) |
| { |
| writer.Config.SetRAMBufferSizeMB(0.000001); |
| writer.Config.SetMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| writer.Config.SetMaxBufferedDeleteTerms(1); |
| } |
| else if (j < 40) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount); |
| lastFlushCount = flushCount; |
| } |
| else if (40 == j) |
| { |
| writer.Config.SetMaxBufferedDeleteTerms(10); |
| writer.Config.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| lastFlushCount = flushCount; |
| } |
| else if (j < 50) |
| { |
| Assert.AreEqual(flushCount, lastFlushCount); |
| writer.Config.SetMaxBufferedDeleteTerms(10); |
| writer.Config.SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); |
| } |
| else if (50 == j) |
| { |
| Assert.IsTrue(flushCount > lastFlushCount); |
| } |
| } |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestDiverseDocs() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetRAMBufferSizeMB(0.5)); |
| int n = AtLeast(1); |
| for (int i = 0; i < n; i++) |
| { |
| // First, docs where every term is unique (heavy on |
| // Posting instances) |
| for (int j = 0; j < 100; j++) |
| { |
| Document doc = new Document(); |
| for (int k = 0; k < 100; k++) |
| { |
| doc.Add(NewField("field", Convert.ToString(Random.Next()), storedTextType)); |
| } |
| writer.AddDocument(doc); |
| } |
| |
| // Next, many single term docs where only one term |
| // occurs (heavy on byte blocks) |
| for (int j = 0; j < 100; j++) |
| { |
| Document doc = new Document(); |
| doc.Add(NewField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType)); |
| writer.AddDocument(doc); |
| } |
| |
| // Next, many single term docs where only one term |
| // occurs but the terms are very long (heavy on |
| // char[] arrays) |
| for (int j = 0; j < 100; j++) |
| { |
| StringBuilder b = new StringBuilder(); |
| string x = Convert.ToString(j) + "."; |
| for (int k = 0; k < 1000; k++) |
| { |
| b.Append(x); |
| } |
| string longTerm = b.ToString(); |
| |
| Document doc = new Document(); |
| doc.Add(NewField("field", longTerm, storedTextType)); |
| writer.AddDocument(doc); |
| } |
| } |
| writer.Dispose(); |
| |
| IndexReader reader = DirectoryReader.Open(dir); |
| IndexSearcher searcher = NewSearcher(reader); |
| int totalHits = searcher.Search(new TermQuery(new Term("field", "aaa")), null, 1).TotalHits; |
| Assert.AreEqual(n * 100, totalHits); |
| reader.Dispose(); |
| |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestEnablingNorms() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(10)); |
| // Enable norms for only 1 doc, pre flush |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.OmitNorms = true; |
| for (int j = 0; j < 10; j++) |
| { |
| Document doc = new Document(); |
| Field f = null; |
| if (j != 8) |
| { |
| f = NewField("field", "aaa", customType); |
| } |
| else |
| { |
| f = NewField("field", "aaa", storedTextType); |
| } |
| doc.Add(f); |
| writer.AddDocument(doc); |
| } |
| writer.Dispose(); |
| |
| Term searchTerm = new Term("field", "aaa"); |
| |
| IndexReader reader = DirectoryReader.Open(dir); |
| IndexSearcher searcher = NewSearcher(reader); |
| ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; |
| Assert.AreEqual(10, hits.Length); |
| reader.Dispose(); |
| |
| writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(10)); |
| // Enable norms for only 1 doc, post flush |
| for (int j = 0; j < 27; j++) |
| { |
| Document doc = new Document(); |
| Field f = null; |
| if (j != 26) |
| { |
| f = NewField("field", "aaa", customType); |
| } |
| else |
| { |
| f = NewField("field", "aaa", storedTextType); |
| } |
| doc.Add(f); |
| writer.AddDocument(doc); |
| } |
| writer.Dispose(); |
| reader = DirectoryReader.Open(dir); |
| searcher = NewSearcher(reader); |
| hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; |
| Assert.AreEqual(27, hits.Length); |
| reader.Dispose(); |
| |
| reader = DirectoryReader.Open(dir); |
| reader.Dispose(); |
| |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestHighFreqTerm() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetRAMBufferSizeMB(0.01)); |
| // Massive doc that has 128 K a's |
| StringBuilder b = new StringBuilder(1024 * 1024); |
| for (int i = 0; i < 4096; i++) |
| { |
| b.Append(" a a a a a a a a"); |
| b.Append(" a a a a a a a a"); |
| b.Append(" a a a a a a a a"); |
| b.Append(" a a a a a a a a"); |
| } |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| doc.Add(NewField("field", b.ToString(), customType)); |
| writer.AddDocument(doc); |
| writer.Dispose(); |
| |
| IndexReader reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(1, reader.MaxDoc); |
| Assert.AreEqual(1, reader.NumDocs); |
| Term t = new Term("field", "a"); |
| Assert.AreEqual(1, reader.DocFreq(t)); |
| DocsEnum td = TestUtil.Docs(Random, reader, "field", new BytesRef("a"), MultiFields.GetLiveDocs(reader), null, DocsFlags.FREQS); |
| td.NextDoc(); |
| Assert.AreEqual(128 * 1024, td.Freq); |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| //Helper class for TestNullLockFactory |
| public class MyRAMDirectory : MockDirectoryWrapper |
| { |
| private LockFactory myLockFactory; |
| |
| public MyRAMDirectory(Directory @delegate) |
| : base(Random, @delegate) |
| { |
| m_lockFactory = null; |
| myLockFactory = new SingleInstanceLockFactory(); |
| } |
| |
| public override Lock MakeLock(string name) |
| { |
| return myLockFactory.MakeLock(name); |
| } |
| } |
| |
| // Make sure that a Directory implementation that does |
| // not use LockFactory at all (ie overrides makeLock and |
| // implements its own private locking) works OK. this |
| // was raised on java-dev as loss of backwards |
| // compatibility. |
| [Test] |
| public virtual void TestNullLockFactory() |
| { |
| Directory dir = new MyRAMDirectory(new RAMDirectory()); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| for (int i = 0; i < 100; i++) |
| { |
| AddDoc(writer); |
| } |
| writer.Dispose(); |
| Term searchTerm = new Term("content", "aaa"); |
| IndexReader reader = DirectoryReader.Open(dir); |
| IndexSearcher searcher = NewSearcher(reader); |
| ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; |
| Assert.AreEqual(100, hits.Length, "did not get right number of hits"); |
| reader.Dispose(); |
| |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE)); |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestFlushWithNoMerging() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy(10))); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| doc.Add(NewField("field", "aaa", customType)); |
| for (int i = 0; i < 19; i++) |
| { |
| writer.AddDocument(doc); |
| } |
| writer.Flush(false, true); |
| writer.Dispose(); |
| SegmentInfos sis = new SegmentInfos(); |
| sis.Read(dir); |
| // Since we flushed w/o allowing merging we should now |
| // have 10 segments |
| Assert.AreEqual(10, sis.Count); |
| dir.Dispose(); |
| } |
| |
| // Make sure we can flush segment w/ norms, then add |
| // empty doc (no norms) and flush |
| [Test] |
| public virtual void TestEmptyDocAfterFlushingRealDoc() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| doc.Add(NewField("field", "aaa", customType)); |
| writer.AddDocument(doc); |
| writer.Commit(); |
| if (Verbose) |
| { |
| Console.WriteLine("\nTEST: now add empty doc"); |
| } |
| writer.AddDocument(new Document()); |
| writer.Dispose(); |
| IndexReader reader = DirectoryReader.Open(dir); |
| Assert.AreEqual(2, reader.NumDocs); |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| /// <summary> |
| /// Test that no NullPointerException will be raised, |
| /// when adding one document with a single, empty field |
| /// and term vectors enabled. |
| /// </summary> |
| [Test] |
| public virtual void TestBadSegment() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| |
| Document document = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.StoreTermVectors = true; |
| document.Add(NewField("tvtest", "", customType)); |
| iw.AddDocument(document); |
| iw.Dispose(); |
| dir.Dispose(); |
| } |
| |
| #if FEATURE_THREAD_PRIORITY |
| // LUCENE-1036 |
| [Test] |
| public virtual void TestMaxThreadPriority() |
| { |
| ThreadPriority pri = ThreadJob.CurrentThread.Priority; |
| try |
| { |
| Directory dir = NewDirectory(); |
| IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy()); |
| ((LogMergePolicy)conf.MergePolicy).MergeFactor = 2; |
| IndexWriter iw = new IndexWriter(dir, conf); |
| Document document = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.StoreTermVectors = true; |
| document.Add(NewField("tvtest", "a b c", customType)); |
| Thread.CurrentThread.Priority = ThreadPriority.Highest; |
| for (int i = 0; i < 4; i++) |
| { |
| iw.AddDocument(document); |
| } |
| iw.Dispose(); |
| dir.Dispose(); |
| } |
| finally |
| { |
| Thread.CurrentThread.Priority = pri; |
| } |
| } |
| #endif |
| |
| [Test] |
| public virtual void TestVariableSchema() |
| { |
| Directory dir = NewDirectory(); |
| for (int i = 0; i < 20; i++) |
| { |
| if (Verbose) |
| { |
| Console.WriteLine("TEST: iter=" + i); |
| } |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy())); |
| //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); |
| //lmp.setMergeFactor(2); |
| //lmp.setNoCFSRatio(0.0); |
| Document doc = new Document(); |
| string contents = "aa bb cc dd ee ff gg hh ii jj kk"; |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| FieldType type = null; |
| if (i == 7) |
| { |
| // Add empty docs here |
| doc.Add(NewTextField("content3", "", Field.Store.NO)); |
| } |
| else |
| { |
| if (i % 2 == 0) |
| { |
| doc.Add(NewField("content4", contents, customType)); |
| type = customType; |
| } |
| else |
| { |
| type = TextField.TYPE_NOT_STORED; |
| } |
| doc.Add(NewTextField("content1", contents, Field.Store.NO)); |
| doc.Add(NewField("content3", "", customType)); |
| doc.Add(NewField("content5", "", type)); |
| } |
| |
| for (int j = 0; j < 4; j++) |
| { |
| writer.AddDocument(doc); |
| } |
| |
| writer.Dispose(); |
| |
| if (0 == i % 4) |
| { |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy(); |
| //lmp2.setNoCFSRatio(0.0); |
| writer.ForceMerge(1); |
| writer.Dispose(); |
| } |
| } |
| dir.Dispose(); |
| } |
| |
| // LUCENE-1084: test unlimited field length |
| [Test] |
| public virtual void TestUnlimitedMaxFieldLength() |
| { |
| Directory dir = NewDirectory(); |
| |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| |
| Document doc = new Document(); |
| StringBuilder b = new StringBuilder(); |
| for (int i = 0; i < 10000; i++) |
| { |
| b.Append(" a"); |
| } |
| b.Append(" x"); |
| doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); |
| writer.AddDocument(doc); |
| writer.Dispose(); |
| |
| IndexReader reader = DirectoryReader.Open(dir); |
| Term t = new Term("field", "x"); |
| Assert.AreEqual(1, reader.DocFreq(t)); |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-1179 |
| [Test] |
| public virtual void TestEmptyFieldName() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| doc.Add(NewTextField("", "a b c", Field.Store.NO)); |
| writer.AddDocument(doc); |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestEmptyFieldNameTerms() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| doc.Add(NewTextField("", "a b c", Field.Store.NO)); |
| writer.AddDocument(doc); |
| writer.Dispose(); |
| DirectoryReader reader = DirectoryReader.Open(dir); |
| AtomicReader subreader = GetOnlySegmentReader(reader); |
| TermsEnum te = subreader.Fields.GetTerms("").GetEnumerator(); |
| Assert.IsTrue(te.MoveNext()); |
| Assert.AreEqual(new BytesRef("a"), te.Term); |
| Assert.IsTrue(te.MoveNext()); |
| Assert.AreEqual(new BytesRef("b"), te.Term); |
| Assert.IsTrue(te.MoveNext()); |
| Assert.AreEqual(new BytesRef("c"), te.Term); |
| Assert.IsFalse(te.MoveNext()); |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestEmptyFieldNameWithEmptyTerm() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| doc.Add(NewStringField("", "", Field.Store.NO)); |
| doc.Add(NewStringField("", "a", Field.Store.NO)); |
| doc.Add(NewStringField("", "b", Field.Store.NO)); |
| doc.Add(NewStringField("", "c", Field.Store.NO)); |
| writer.AddDocument(doc); |
| writer.Dispose(); |
| DirectoryReader reader = DirectoryReader.Open(dir); |
| AtomicReader subreader = GetOnlySegmentReader(reader); |
| TermsEnum te = subreader.Fields.GetTerms("").GetEnumerator(); |
| Assert.IsTrue(te.MoveNext()); |
| Assert.AreEqual(new BytesRef(""), te.Term); |
| Assert.IsTrue(te.MoveNext()); |
| Assert.AreEqual(new BytesRef("a"), te.Term); |
| Assert.IsTrue(te.MoveNext()); |
| Assert.AreEqual(new BytesRef("b"), te.Term); |
| Assert.IsTrue(te.MoveNext()); |
| Assert.AreEqual(new BytesRef("c"), te.Term); |
| Assert.IsFalse(te.MoveNext()); |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| private sealed class MockIndexWriter : IndexWriter |
| { |
| public MockIndexWriter(Directory dir, IndexWriterConfig conf) |
| : base(dir, conf) |
| { |
| } |
| |
| internal bool afterWasCalled; |
| internal bool beforeWasCalled; |
| |
| protected override void DoAfterFlush() |
| { |
| afterWasCalled = true; |
| } |
| |
| protected override void DoBeforeFlush() |
| { |
| beforeWasCalled = true; |
| } |
| } |
| |
| // LUCENE-1222 |
| [Test] |
| public virtual void TestDoBeforeAfterFlush() |
| { |
| Directory dir = NewDirectory(); |
| MockIndexWriter w = new MockIndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| doc.Add(NewField("field", "a field", customType)); |
| w.AddDocument(doc); |
| w.Commit(); |
| Assert.IsTrue(w.beforeWasCalled); |
| Assert.IsTrue(w.afterWasCalled); |
| w.beforeWasCalled = false; |
| w.afterWasCalled = false; |
| w.DeleteDocuments(new Term("field", "field")); |
| w.Commit(); |
| Assert.IsTrue(w.beforeWasCalled); |
| Assert.IsTrue(w.afterWasCalled); |
| w.Dispose(); |
| |
| IndexReader ir = DirectoryReader.Open(dir); |
| Assert.AreEqual(0, ir.NumDocs); |
| ir.Dispose(); |
| |
| dir.Dispose(); |
| } |
| |
| // LUCENE-1255 |
| [Test] |
| public virtual void TestNegativePositions() |
| { |
| TokenStream tokens = new TokenStreamAnonymousInnerClassHelper(this); |
| |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| doc.Add(new TextField("field", tokens)); |
| try |
| { |
| w.AddDocument(doc); |
| Assert.Fail("did not hit expected exception"); |
| } |
| #pragma warning disable 168 |
| catch (ArgumentException iea) |
| #pragma warning restore 168 |
| { |
| // expected |
| } |
| w.Dispose(); |
| dir.Dispose(); |
| } |
| |
| private class TokenStreamAnonymousInnerClassHelper : TokenStream |
| { |
| private readonly TestIndexWriter outerInstance; |
| |
| public TokenStreamAnonymousInnerClassHelper(TestIndexWriter outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| termAtt = AddAttribute<ICharTermAttribute>(); |
| posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); |
| terms = new List<string> { "a", "b", "c" }.GetEnumerator(); |
| first = true; |
| } |
| |
| internal readonly ICharTermAttribute termAtt; |
| internal readonly IPositionIncrementAttribute posIncrAtt; |
| |
| internal readonly IEnumerator<string> terms; |
| internal bool first; |
| |
| public sealed override bool IncrementToken() |
| { |
| if (!terms.MoveNext()) |
| { |
| return false; |
| } |
| ClearAttributes(); |
| termAtt.Append(terms.Current); |
| posIncrAtt.PositionIncrement = first ? 0 : 1; |
| first = false; |
| return true; |
| } |
| } |
| |
| // LUCENE-2529 |
| [Test] |
| public virtual void TestPositionIncrementGapEmptyField() |
| { |
| Directory dir = NewDirectory(); |
| MockAnalyzer analyzer = new MockAnalyzer(Random); |
| analyzer.SetPositionIncrementGap(100); |
| IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| Field f = NewField("field", "", customType); |
| Field f2 = NewField("field", "crunch man", customType); |
| doc.Add(f); |
| doc.Add(f2); |
| w.AddDocument(doc); |
| w.Dispose(); |
| |
| IndexReader r = DirectoryReader.Open(dir); |
| Terms tpv = r.GetTermVectors(0).GetTerms("field"); |
| TermsEnum termsEnum = tpv.GetEnumerator(); |
| Assert.IsTrue(termsEnum.MoveNext()); |
| DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); |
| Assert.IsNotNull(dpEnum); |
| Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Assert.AreEqual(1, dpEnum.Freq); |
| Assert.AreEqual(100, dpEnum.NextPosition()); |
| |
| Assert.IsTrue(termsEnum.MoveNext()); |
| dpEnum = termsEnum.DocsAndPositions(null, dpEnum); |
| Assert.IsNotNull(dpEnum); |
| Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Assert.AreEqual(1, dpEnum.Freq); |
| Assert.AreEqual(101, dpEnum.NextPosition()); |
| Assert.IsFalse(termsEnum.MoveNext()); |
| |
| r.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestDeadlock() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2)); |
| Document doc = new Document(); |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| |
| doc.Add(NewField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType)); |
| writer.AddDocument(doc); |
| writer.AddDocument(doc); |
| writer.AddDocument(doc); |
| writer.Commit(); |
| // index has 2 segments |
| |
| Directory dir2 = NewDirectory(); |
| IndexWriter writer2 = new IndexWriter(dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| writer2.AddDocument(doc); |
| writer2.Dispose(); |
| |
| IndexReader r1 = DirectoryReader.Open(dir2); |
| writer.AddIndexes(r1, r1); |
| writer.Dispose(); |
| |
| IndexReader r3 = DirectoryReader.Open(dir); |
| Assert.AreEqual(5, r3.NumDocs); |
| r3.Dispose(); |
| |
| r1.Dispose(); |
| |
| dir2.Dispose(); |
| dir.Dispose(); |
| } |
| |
| private class IndexerThreadInterrupt : ThreadJob |
| { |
| private readonly TestIndexWriter outerInstance; |
| |
| internal volatile bool failed; |
| internal volatile bool finish; |
| |
| internal volatile bool allowInterrupt = false; |
| internal readonly Random random; |
| internal readonly Directory adder; |
| |
| internal IndexerThreadInterrupt(TestIndexWriter outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| this.IsDebug = true; // LUCENENET: Rethrow with the original stack trace to assist with debugging |
| this.random = new Random(LuceneTestCase.Random.Next()); |
| // make a little directory for addIndexes |
| // LUCENE-2239: won't work with NIOFS/MMAP |
| adder = new MockDirectoryWrapper(this.random, new RAMDirectory()); |
| IndexWriterConfig conf = NewIndexWriterConfig( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| outerInstance, |
| #endif |
| this.random, TEST_VERSION_CURRENT, new MockAnalyzer(this.random)); |
| using IndexWriter w = new IndexWriter(adder, conf); |
| Document doc = new Document(); |
| doc.Add(NewStringField(this.random, "id", "500", Field.Store.NO)); |
| doc.Add(NewField(this.random, "field", "some prepackaged text contents", storedTextType)); |
| if (DefaultCodecSupportsDocValues) |
| { |
| doc.Add(new BinaryDocValuesField("binarydv", new BytesRef("500"))); |
| doc.Add(new NumericDocValuesField("numericdv", 500)); |
| doc.Add(new SortedDocValuesField("sorteddv", new BytesRef("500"))); |
| } |
| if (DefaultCodecSupportsSortedSet) |
| { |
| doc.Add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("one"))); |
| doc.Add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); |
| } |
| w.AddDocument(doc); |
| doc = new Document(); |
| doc.Add(NewStringField(this.random, "id", "501", Field.Store.NO)); |
| doc.Add(NewField(this.random, "field", "some more contents", storedTextType)); |
| if (DefaultCodecSupportsDocValues) |
| { |
| doc.Add(new BinaryDocValuesField("binarydv", new BytesRef("501"))); |
| doc.Add(new NumericDocValuesField("numericdv", 501)); |
| doc.Add(new SortedDocValuesField("sorteddv", new BytesRef("501"))); |
| } |
| if (DefaultCodecSupportsSortedSet) |
| { |
| doc.Add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); |
| doc.Add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("three"))); |
| } |
| w.AddDocument(doc); |
| w.DeleteDocuments(new Term("id", "500")); |
| } |
| |
| public override void Run() |
| { |
| // LUCENE-2239: won't work with NIOFS/MMAP |
| MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory()); |
| //var dir = new RAMDirectory(); |
| |
| // When interrupt arrives in w.Dispose(), when it's |
| // writing liveDocs, this can lead to double-write of |
| // _X_N.del: |
| //dir.setPreventDoubleWrite(false); |
| IndexWriter w = null; |
| while (!finish) |
| { |
| try |
| { |
| while (!finish) |
| { |
| if (w != null) |
| { |
| // If interrupt arrives inside here, it's |
| // fine: we will cycle back and the first |
| // thing we do is try to close again, |
| // i.e. we'll never try to open a new writer |
| // until this one successfully closes: |
| w.Dispose(); |
| w = null; |
| } |
| IndexWriterConfig conf = NewIndexWriterConfig( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| outerInstance, |
| #endif |
| random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMaxBufferedDocs(2); |
| w = new IndexWriter(dir, conf); |
| |
| Document doc = new Document(); |
| Field idField = NewStringField(random, "id", "", Field.Store.NO); |
| Field binaryDVField = null; |
| Field numericDVField = null; |
| Field sortedDVField = null; |
| Field sortedSetDVField = new SortedSetDocValuesField("sortedsetdv", new BytesRef()); |
| doc.Add(idField); |
| doc.Add(NewField(random, "field", "some text contents", storedTextType)); |
| if (DefaultCodecSupportsDocValues) |
| { |
| binaryDVField = new BinaryDocValuesField("binarydv", new BytesRef()); |
| numericDVField = new NumericDocValuesField("numericdv", 0); |
| sortedDVField = new SortedDocValuesField("sorteddv", new BytesRef()); |
| doc.Add(binaryDVField); |
| doc.Add(numericDVField); |
| doc.Add(sortedDVField); |
| } |
| if (DefaultCodecSupportsSortedSet) |
| { |
| doc.Add(sortedSetDVField); |
| } |
| for (int i = 0; i < 100; i++) |
| { |
| idField.SetStringValue(Convert.ToString(i)); |
| if (DefaultCodecSupportsDocValues) |
| { |
| binaryDVField.SetBytesValue(new BytesRef(idField.GetStringValue())); |
| numericDVField.SetInt64Value(i); |
| sortedDVField.SetBytesValue(new BytesRef(idField.GetStringValue())); |
| } |
| sortedSetDVField.SetBytesValue(new BytesRef(idField.GetStringValue())); |
| int action = random.Next(100); |
| if (action == 17) |
| { |
| w.AddIndexes(adder); |
| } |
| else if (action % 30 == 0) |
| { |
| w.DeleteAll(); |
| } |
| else if (action % 2 == 0) |
| { |
| w.UpdateDocument(new Term("id", idField.GetStringValue()), doc); |
| } |
| else |
| { |
| w.AddDocument(doc); |
| } |
| if (random.Next(3) == 0) |
| { |
| IndexReader r = null; |
| try |
| { |
| r = DirectoryReader.Open(w, random.NextBoolean()); |
| if (random.NextBoolean() && r.MaxDoc > 0) |
| { |
| int docid = random.Next(r.MaxDoc); |
| w.TryDeleteDocument(r, docid); |
| } |
| } |
| finally |
| { |
| IOUtils.DisposeWhileHandlingException(r); |
| } |
| } |
| if (i % 10 == 0) |
| { |
| w.Commit(); |
| } |
| if (random.Next(50) == 0) |
| { |
| w.ForceMerge(1); |
| } |
| } |
| w.Dispose(); |
| w = null; |
| //DirectoryReader.Open(dir).Dispose(); |
| using (var reader = DirectoryReader.Open(dir)) { } |
| |
| // Strangely, if we interrupt a thread before |
| // all classes are loaded, the class loader |
| // seems to do scary things with the interrupt |
| // status. In java 1.5, it'll throw an |
| // incorrect ClassNotFoundException. In java |
| // 1.6, it'll silently clear the interrupt. |
| // So, on first iteration through here we |
| // don't open ourselves up for interrupts |
| // until we've done the above loop. |
| allowInterrupt = true; |
| } |
| } |
| #if FEATURE_THREAD_INTERRUPT |
| catch (ThreadInterruptedException re) |
| { |
| // NOTE: important to leave this verbosity/noise |
| // on!! this test doesn't repro easily so when |
| // Jenkins hits a fail we need to study where the |
| // interrupts struck! |
| Console.WriteLine("TEST: got interrupt"); |
| Console.WriteLine(re.ToString()); |
| |
| // LUCENENET NOTE: Since our original exception is ThreadInterruptException instead of InterruptException |
| // in .NET, our expectation is typically that the InnerException is null (but it doesn't have to be). |
| // So, this assertion is not needed in .NET. And if we get to this catch block, we already know we have |
| // the right exception type, so there is nothing to test here. |
| //Exception e = re.InnerException; |
| //Assert.IsTrue(e is ThreadInterruptedException); |
| if (finish) |
| { |
| break; |
| } |
| } |
| #endif |
| catch (Exception t) |
| { |
| Console.WriteLine("FAILED; unexpected exception"); |
| Console.WriteLine(t.ToString()); |
| failed = true; |
| break; |
| } |
| } |
| |
| if (!failed) |
| { |
| if (Verbose) |
| { |
| Console.WriteLine("TEST: now rollback"); |
| } |
| // clear interrupt state: |
| ThreadJob.Interrupted(); |
| if (w != null) |
| { |
| try |
| { |
| w.Rollback(); |
| } |
| #if FEATURE_THREAD_INTERRUPT |
| // LUCENENET specific - there is a chance that our thread will be |
| // interrupted here, so we need to catch and ignore that exception |
| // when our MockDirectoryWrapper throws it. |
| catch (ThreadInterruptedException) |
| { |
| // ignore |
| } |
| #endif |
| catch (IOException ioe) |
| { |
| throw new Exception(ioe.ToString(), ioe); |
| } |
| } |
| |
| try |
| { |
| TestUtil.CheckIndex(dir); |
| } |
| catch (Exception e) |
| { |
| failed = true; |
| Console.WriteLine("CheckIndex FAILED: unexpected exception"); |
| Console.WriteLine(e.ToString()); |
| } |
| try |
| { |
| using IndexReader r = DirectoryReader.Open(dir); |
| //System.out.println("doc count=" + r.NumDocs); |
| } |
| catch (Exception e) |
| { |
| failed = true; |
| Console.WriteLine("DirectoryReader.open FAILED: unexpected exception"); |
| Console.WriteLine(e.ToString()); |
| } |
| } |
| try |
| { |
| IOUtils.Dispose(dir); |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.ToString(), e); |
| } |
| try |
| { |
| IOUtils.Dispose(adder); |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.ToString(), e); |
| } |
| } |
| } |
| |
| [Test] |
| [Slow] |
| [AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test occasionally fails |
| public virtual void TestThreadInterruptDeadlock() |
| { |
| IndexerThreadInterrupt t = new IndexerThreadInterrupt(this); |
| t.IsBackground = (true); |
| t.Start(); |
| |
| // Force class loader to load ThreadInterruptedException |
| // up front... else we can see a false failure if 2nd |
| // interrupt arrives while class loader is trying to |
| // init this class (in servicing a first interrupt): |
| //Assert.IsTrue((new ThreadInterruptedException(new Exception("Thread interrupted"))).InnerException is ThreadInterruptedException); |
| |
| // issue 300 interrupts to child thread |
| int numInterrupts = AtLeast(300); |
| int i = 0; |
| while (i < numInterrupts) |
| { |
| // TODO: would be nice to also sometimes interrupt the |
| // CMS merge threads too ... |
| Thread.Sleep(10); |
| if (t.allowInterrupt) |
| { |
| i++; |
| t.Interrupt(); |
| } |
| if (!t.IsAlive) |
| { |
| break; |
| } |
| } |
| t.finish = true; |
| t.Join(); |
| |
| Assert.IsFalse(t.failed); |
| } |
| |
| /// <summary> |
| /// testThreadInterruptDeadlock but with 2 indexer threads </summary> |
| [Test] |
| [Slow] |
| [AwaitsFix(BugUrl = "https://github.com/apache/lucenenet/issues/269")] // LUCENENET TODO: this test occasionally fails |
| public virtual void TestTwoThreadsInterruptDeadlock() |
| { |
| IndexerThreadInterrupt t1 = new IndexerThreadInterrupt(this); |
| t1.IsBackground = (true); |
| t1.Start(); |
| |
| IndexerThreadInterrupt t2 = new IndexerThreadInterrupt(this); |
| t2.IsBackground = (true); |
| t2.Start(); |
| |
| // Force class loader to load ThreadInterruptedException |
| // up front... else we can see a false failure if 2nd |
| // interrupt arrives while class loader is trying to |
| // init this class (in servicing a first interrupt): |
| // C# does not have the late load problem. |
| //Assert.IsTrue((new ThreadInterruptedException(new Exception("Thread interrupted"))).InnerException is ThreadInterruptedException); |
| |
| // issue 300 interrupts to child thread |
| int numInterrupts = AtLeast(300); |
| int i = 0; |
| while (i < numInterrupts) |
| { |
| // TODO: would be nice to also sometimes interrupt the |
| // CMS merge threads too ... |
| Thread.Sleep(10); |
| IndexerThreadInterrupt t = Random.NextBoolean() ? t1 : t2; |
| if (t.allowInterrupt) |
| { |
| i++; |
| t.Interrupt(); |
| } |
| if (!t1.IsAlive && !t2.IsAlive) |
| { |
| break; |
| } |
| } |
| t1.finish = true; |
| t2.finish = true; |
| t1.Join(); |
| t2.Join(); |
| |
| Assert.IsFalse(t1.failed); |
| Assert.IsFalse(t2.failed); |
| } |
| |
| [Test] |
| public virtual void TestIndexStoreCombos() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| var b = new byte[50]; |
| for (int i = 0; i < 50; i++) |
| { |
| b[i] = (byte)(i + 77); |
| } |
| |
| Document doc = new Document(); |
| |
| FieldType customType = new FieldType(StoredField.TYPE); |
| customType.IsTokenized = true; |
| |
| Field f = new Field("binary", b, 10, 17, customType); |
| customType.IsIndexed = true; |
| f.SetTokenStream(new MockTokenizer(new StringReader("doc1field1"), MockTokenizer.WHITESPACE, false)); |
| |
| FieldType customType2 = new FieldType(TextField.TYPE_STORED); |
| |
| Field f2 = NewField("string", "value", customType2); |
| f2.SetTokenStream(new MockTokenizer(new StringReader("doc1field2"), MockTokenizer.WHITESPACE, false)); |
| doc.Add(f); |
| doc.Add(f2); |
| w.AddDocument(doc); |
| |
| // add 2 docs to test in-memory merging |
| f.SetTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false)); |
| f2.SetTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false)); |
| w.AddDocument(doc); |
| |
| // force segment flush so we can force a segment merge with doc3 later. |
| w.Commit(); |
| |
| f.SetTokenStream(new MockTokenizer(new StringReader("doc3field1"), MockTokenizer.WHITESPACE, false)); |
| f2.SetTokenStream(new MockTokenizer(new StringReader("doc3field2"), MockTokenizer.WHITESPACE, false)); |
| |
| w.AddDocument(doc); |
| w.Commit(); |
| w.ForceMerge(1); // force segment merge. |
| w.Dispose(); |
| |
| IndexReader ir = DirectoryReader.Open(dir); |
| Document doc2 = ir.Document(0); |
| IIndexableField f3 = doc2.GetField("binary"); |
| b = f3.GetBinaryValue().Bytes; |
| Assert.IsTrue(b != null); |
| Assert.AreEqual(17, b.Length, 17); |
| Assert.AreEqual((byte)87, b[0]); |
| |
| Assert.IsTrue(ir.Document(0).GetField("binary").GetBinaryValue() != null); |
| Assert.IsTrue(ir.Document(1).GetField("binary").GetBinaryValue() != null); |
| Assert.IsTrue(ir.Document(2).GetField("binary").GetBinaryValue() != null); |
| |
| Assert.AreEqual("value", ir.Document(0).Get("string")); |
| Assert.AreEqual("value", ir.Document(1).Get("string")); |
| Assert.AreEqual("value", ir.Document(2).Get("string")); |
| |
| // test that the terms were indexed. |
| Assert.IsTrue(TestUtil.Docs(Random, ir, "binary", new BytesRef("doc1field1"), null, null, DocsFlags.NONE).NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Assert.IsTrue(TestUtil.Docs(Random, ir, "binary", new BytesRef("doc2field1"), null, null, DocsFlags.NONE).NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Assert.IsTrue(TestUtil.Docs(Random, ir, "binary", new BytesRef("doc3field1"), null, null, DocsFlags.NONE).NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Assert.IsTrue(TestUtil.Docs(Random, ir, "string", new BytesRef("doc1field2"), null, null, DocsFlags.NONE).NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Assert.IsTrue(TestUtil.Docs(Random, ir, "string", new BytesRef("doc2field2"), null, null, DocsFlags.NONE).NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Assert.IsTrue(TestUtil.Docs(Random, ir, "string", new BytesRef("doc3field2"), null, null, DocsFlags.NONE).NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| |
| ir.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestNoDocsIndex() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| writer.AddDocument(new Document()); |
| writer.Dispose(); |
| |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestIndexDivisor() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| config.SetTermIndexInterval(2); |
| IndexWriter w = new IndexWriter(dir, config); |
| StringBuilder s = new StringBuilder(); |
| // must be > 256 |
| for (int i = 0; i < 300; i++) |
| { |
| s.Append(' ').Append(i); |
| } |
| Document d = new Document(); |
| Field f = NewTextField("field", s.ToString(), Field.Store.NO); |
| d.Add(f); |
| w.AddDocument(d); |
| |
| AtomicReader r = GetOnlySegmentReader(w.GetReader()); |
| TermsEnum t = r.Fields.GetTerms("field").GetEnumerator(); |
| int count = 0; |
| while (t.MoveNext()) |
| { |
| DocsEnum docs = TestUtil.Docs(Random, t, null, null, DocsFlags.NONE); |
| Assert.AreEqual(0, docs.NextDoc()); |
| Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docs.NextDoc()); |
| count++; |
| } |
| Assert.AreEqual(300, count); |
| r.Dispose(); |
| w.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestDeleteUnusedFiles() |
| { |
| for (int iter = 0; iter < 2; iter++) |
| { |
| Directory dir = NewMockDirectory(); // relies on windows semantics |
| |
| MergePolicy mergePolicy = NewLogMergePolicy(true); |
| |
| // this test expects all of its segments to be in CFS |
| mergePolicy.NoCFSRatio = 1.0; |
| mergePolicy.MaxCFSSegmentSizeMB = double.PositiveInfinity; |
| |
| IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(mergePolicy).SetUseCompoundFile(true)); |
| Document doc = new Document(); |
| doc.Add(NewTextField("field", "go", Field.Store.NO)); |
| w.AddDocument(doc); |
| DirectoryReader r; |
| if (iter == 0) |
| { |
| // use NRT |
| r = w.GetReader(); |
| } |
| else |
| { |
| // don't use NRT |
| w.Commit(); |
| r = DirectoryReader.Open(dir); |
| } |
| |
| IList<string> files = new List<string>(dir.ListAll()); |
| |
| // RAMDir won't have a write.lock, but fs dirs will: |
| files.Remove("write.lock"); |
| |
| Assert.IsTrue(files.Contains("_0.cfs")); |
| Assert.IsTrue(files.Contains("_0.cfe")); |
| Assert.IsTrue(files.Contains("_0.si")); |
| if (iter == 1) |
| { |
| // we run a full commit so there should be a segments file etc. |
| Assert.IsTrue(files.Contains("segments_1")); |
| Assert.IsTrue(files.Contains("segments.gen")); |
| Assert.AreEqual(files.Count, 5, files.ToString()); |
| } |
| else |
| { |
| // this is an NRT reopen - no segments files yet |
| |
| Assert.AreEqual(files.Count, 3, files.ToString()); |
| } |
| w.AddDocument(doc); |
| w.ForceMerge(1); |
| if (iter == 1) |
| { |
| w.Commit(); |
| } |
| IndexReader r2 = DirectoryReader.OpenIfChanged(r); |
| Assert.IsNotNull(r2); |
| Assert.IsTrue(r != r2); |
| files = dir.ListAll(); |
| |
| // NOTE: here we rely on "Windows" behavior, ie, even |
| // though IW wanted to delete _0.cfs since it was |
| // merged away, because we have a reader open |
| // against this file, it should still be here: |
| Assert.IsTrue(files.Contains("_0.cfs")); |
| // forceMerge created this |
| //Assert.IsTrue(files.Contains("_2.cfs")); |
| w.DeleteUnusedFiles(); |
| |
| files = dir.ListAll(); |
| // r still holds this file open |
| Assert.IsTrue(files.Contains("_0.cfs")); |
| //Assert.IsTrue(files.Contains("_2.cfs")); |
| |
| r.Dispose(); |
| if (iter == 0) |
| { |
| // on closing NRT reader, it calls writer.deleteUnusedFiles |
| files = dir.ListAll(); |
| Assert.IsFalse(files.Contains("_0.cfs")); |
| } |
| else |
| { |
| // now writer can remove it |
| w.DeleteUnusedFiles(); |
| files = dir.ListAll(); |
| Assert.IsFalse(files.Contains("_0.cfs")); |
| } |
| //Assert.IsTrue(files.Contains("_2.cfs")); |
| |
| w.Dispose(); |
| r2.Dispose(); |
| |
| dir.Dispose(); |
| } |
| } |
| |
| [Test] |
| public virtual void TestDeleteUnsedFiles2() |
| { |
| // Validates that iw.DeleteUnusedFiles() also deletes unused index commits |
| // in case a deletion policy which holds onto commits is used. |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()))); |
| SnapshotDeletionPolicy sdp = (SnapshotDeletionPolicy)writer.Config.IndexDeletionPolicy; |
| |
| // First commit |
| Document doc = new Document(); |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| |
| doc.Add(NewField("c", "val", customType)); |
| writer.AddDocument(doc); |
| writer.Commit(); |
| Assert.AreEqual(1, DirectoryReader.ListCommits(dir).Count); |
| |
| // Keep that commit |
| IndexCommit id = sdp.Snapshot(); |
| |
| // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. |
| doc = new Document(); |
| doc.Add(NewField("c", "val", customType)); |
| writer.AddDocument(doc); |
| writer.Commit(); |
| Assert.AreEqual(2, DirectoryReader.ListCommits(dir).Count); |
| |
| // Should delete the unreferenced commit |
| sdp.Release(id); |
| writer.DeleteUnusedFiles(); |
| Assert.AreEqual(1, DirectoryReader.ListCommits(dir).Count); |
| |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestEmptyFSDirWithNoLock() |
| { |
| // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), |
| // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed |
| // when listAll() was called in IndexFileDeleter. |
| Directory dir = NewFSDirectory(CreateTempDir("emptyFSDirNoLock"), NoLockFactory.GetNoLockFactory()); |
| (new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestEmptyDirRollback() |
| { |
| // TODO: generalize this test |
| AssumeFalse("test makes assumptions about file counts", Codec.Default is SimpleTextCodec); |
| |
| // Tests that if IW is created over an empty Directory, some documents are |
| // indexed, flushed (but not committed) and then IW rolls back, then no |
| // files are left in the Directory. |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetMergePolicy(NewLogMergePolicy()).SetUseCompoundFile(false)); |
| string[] files = dir.ListAll(); |
| |
| // Creating over empty dir should not create any files, |
| // or, at most the write.lock file |
| int extraFileCount; |
| if (files.Length == 1) |
| { |
| Assert.IsTrue(files[0].EndsWith("write.lock", StringComparison.Ordinal)); |
| extraFileCount = 1; |
| } |
| else |
| { |
| Assert.AreEqual(0, files.Length); |
| extraFileCount = 0; |
| } |
| |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| // create as many files as possible |
| doc.Add(NewField("c", "val", customType)); |
| writer.AddDocument(doc); |
| // Adding just one document does not call flush yet. |
| int computedExtraFileCount = 0; |
| foreach (string file in dir.ListAll()) |
| { |
| if (file.LastIndexOf('.') < 0 || !new List<string> { "fdx", "fdt", "tvx", "tvd", "tvf" }.Contains(file.Substring(file.LastIndexOf('.') + 1))) |
| // don't count stored fields and term vectors in |
| { |
| ++computedExtraFileCount; |
| } |
| } |
| Assert.AreEqual(extraFileCount, computedExtraFileCount, "only the stored and term vector files should exist in the directory"); |
| |
| doc = new Document(); |
| doc.Add(NewField("c", "val", customType)); |
| writer.AddDocument(doc); |
| |
| // The second document should cause a flush. |
| Assert.IsTrue(dir.ListAll().Length > 5 + extraFileCount, "flush should have occurred and files should have been created"); |
| |
| // After rollback, IW should remove all files |
| writer.Rollback(); |
| string[] allFiles = dir.ListAll(); |
| Assert.IsTrue(allFiles.Length == 0 || Arrays.Equals(allFiles, new string[] { IndexWriter.WRITE_LOCK_NAME }), "no files should exist in the directory after rollback"); |
| |
| // Since we rolled-back above, that close should be a no-op |
| writer.Dispose(); |
| allFiles = dir.ListAll(); |
| Assert.IsTrue(allFiles.Length == 0 || Arrays.Equals(allFiles, new string[] { IndexWriter.WRITE_LOCK_NAME }), "expected a no-op close after IW.Rollback()"); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestNoSegmentFile() |
| { |
| BaseDirectoryWrapper dir = NewDirectory(); |
| dir.SetLockFactory(NoLockFactory.GetNoLockFactory()); |
| IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2)); |
| |
| Document doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| doc.Add(NewField("c", "val", customType)); |
| w.AddDocument(doc); |
| w.AddDocument(doc); |
| IndexWriter w2 = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetOpenMode(OpenMode.CREATE)); |
| |
| w2.Dispose(); |
| // If we don't do that, the test fails on Windows |
| w.Rollback(); |
| |
| // this test leaves only segments.gen, which causes |
| // DirectoryReader.indexExists to return true: |
| dir.CheckIndexOnDispose = false; |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestNoUnwantedTVFiles() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter indexWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetRAMBufferSizeMB(0.01).SetMergePolicy(NewLogMergePolicy())); |
| indexWriter.Config.MergePolicy.NoCFSRatio = 0.0; |
| |
| string BIG = "alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg"; |
| BIG = BIG + BIG + BIG + BIG; |
| |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| customType.OmitNorms = true; |
| FieldType customType2 = new FieldType(TextField.TYPE_STORED); |
| customType2.IsTokenized = false; |
| FieldType customType3 = new FieldType(TextField.TYPE_STORED); |
| customType3.IsTokenized = false; |
| customType3.OmitNorms = true; |
| |
| for (int i = 0; i < 2; i++) |
| { |
| Document doc = new Document(); |
| doc.Add(new Field("id", Convert.ToString(i) + BIG, customType3)); |
| doc.Add(new Field("str", Convert.ToString(i) + BIG, customType2)); |
| doc.Add(new Field("str2", Convert.ToString(i) + BIG, storedTextType)); |
| doc.Add(new Field("str3", Convert.ToString(i) + BIG, customType)); |
| indexWriter.AddDocument(doc); |
| } |
| |
| indexWriter.Dispose(); |
| |
| TestUtil.CheckIndex(dir); |
| |
| AssertNoUnreferencedFiles(dir, "no tv files"); |
| DirectoryReader r0 = DirectoryReader.Open(dir); |
| foreach (AtomicReaderContext ctx in r0.Leaves) |
| { |
| SegmentReader sr = (SegmentReader)ctx.Reader; |
| Assert.IsFalse(sr.FieldInfos.HasVectors); |
| } |
| |
| r0.Dispose(); |
| dir.Dispose(); |
| } |
| |
| internal sealed class StringSplitAnalyzer : Analyzer |
| { |
| protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| return new TokenStreamComponents(new StringSplitTokenizer(reader)); |
| } |
| } |
| |
| private class StringSplitTokenizer : Tokenizer |
| { |
| private string[] tokens; |
| private int upto; |
| private readonly ICharTermAttribute termAtt; |
| |
| public StringSplitTokenizer(TextReader r) |
| : base(r) |
| { |
| termAtt = AddAttribute<ICharTermAttribute>(); |
| try |
| { |
| SetReader(r); |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.Message, e); |
| } |
| } |
| |
| public sealed override bool IncrementToken() |
| { |
| ClearAttributes(); |
| if (upto < tokens.Length) |
| { |
| termAtt.SetEmpty(); |
| termAtt.Append(tokens[upto]); |
| upto++; |
| return true; |
| } |
| else |
| { |
| return false; |
| } |
| } |
| |
| public override void Reset() |
| { |
| base.Reset(); |
| this.upto = 0; |
| StringBuilder b = new StringBuilder(); |
| char[] buffer = new char[1024]; |
| int n; |
| while ((n = m_input.Read(buffer, 0, buffer.Length)) > 0) |
| { |
| b.Append(buffer, 0, n); |
| } |
| this.tokens = b.ToString().Split(' ').TrimEnd(); |
| } |
| } |
| |
| /// <summary> |
| /// Make sure we skip wicked long terms. |
| /// </summary> |
| [Test] |
| public virtual void TestWickedLongTerm() |
| { |
| Directory dir = NewDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir, new StringSplitAnalyzer()); |
| |
| char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8]; |
| Arrays.Fill(chars, 'x'); |
| Document doc = new Document(); |
| string bigTerm = new string(chars); |
| BytesRef bigTermBytesRef = new BytesRef(bigTerm); |
| |
| // this contents produces a too-long term: |
| string contents = "abc xyz x" + bigTerm + " another term"; |
| doc.Add(new TextField("content", contents, Field.Store.NO)); |
| try |
| { |
| w.AddDocument(doc); |
| Assert.Fail("should have hit exception"); |
| } |
| #pragma warning disable 168 |
| catch (ArgumentException iae) |
| #pragma warning restore 168 |
| { |
| // expected |
| } |
| |
| // Make sure we can add another normal document |
| doc = new Document(); |
| doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO)); |
| w.AddDocument(doc); |
| |
| // So we remove the deleted doc: |
| w.ForceMerge(1); |
| |
| IndexReader reader = w.GetReader(); |
| w.Dispose(); |
| |
| // Make sure all terms < max size were indexed |
| Assert.AreEqual(1, reader.DocFreq(new Term("content", "abc"))); |
| Assert.AreEqual(1, reader.DocFreq(new Term("content", "bbb"))); |
| Assert.AreEqual(0, reader.DocFreq(new Term("content", "term"))); |
| |
| // Make sure the doc that has the massive term is NOT in |
| // the index: |
| Assert.AreEqual(1, reader.NumDocs, "document with wicked long term is in the index!"); |
| |
| reader.Dispose(); |
| dir.Dispose(); |
| dir = NewDirectory(); |
| |
| // Make sure we can add a document with exactly the |
| // maximum length term, and search on that term: |
| doc = new Document(); |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.IsTokenized = false; |
| Field contentField = new Field("content", "", customType); |
| doc.Add(contentField); |
| |
| w = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir); |
| |
| contentField.SetStringValue("other"); |
| w.AddDocument(doc); |
| |
| contentField.SetStringValue("term"); |
| w.AddDocument(doc); |
| |
| contentField.SetStringValue(bigTerm); |
| w.AddDocument(doc); |
| |
| contentField.SetStringValue("zzz"); |
| w.AddDocument(doc); |
| |
| reader = w.GetReader(); |
| w.Dispose(); |
| Assert.AreEqual(1, reader.DocFreq(new Term("content", bigTerm))); |
| |
| SortedDocValues dti = FieldCache.DEFAULT.GetTermsIndex(SlowCompositeReaderWrapper.Wrap(reader), "content", (float)Random.NextDouble() * PackedInt32s.FAST); |
| Assert.AreEqual(4, dti.ValueCount); |
| BytesRef br = new BytesRef(); |
| dti.LookupOrd(2, br); |
| Assert.AreEqual(bigTermBytesRef, br); |
| reader.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-3183 |
| [Test] |
| public virtual void TestEmptyFieldNameTIIOne() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| iwc.SetTermIndexInterval(1); |
| iwc.SetReaderTermsIndexDivisor(1); |
| IndexWriter writer = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| doc.Add(NewTextField("", "a b c", Field.Store.NO)); |
| writer.AddDocument(doc); |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestDeleteAllNRTLeftoverFiles() |
| { |
| Directory d = new MockDirectoryWrapper(Random, new RAMDirectory()); |
| IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| for (int i = 0; i < 20; i++) |
| { |
| for (int j = 0; j < 100; ++j) |
| { |
| w.AddDocument(doc); |
| } |
| w.Commit(); |
| DirectoryReader.Open(w, true).Dispose(); |
| |
| w.DeleteAll(); |
| w.Commit(); |
| // Make sure we accumulate no files except for empty |
| // segments_N and segments.gen: |
| Assert.IsTrue(d.ListAll().Length <= 2); |
| } |
| |
| w.Dispose(); |
| d.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestNRTReaderVersion() |
| { |
| Directory d = new MockDirectoryWrapper(Random, new RAMDirectory()); |
| IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Document doc = new Document(); |
| doc.Add(NewStringField("id", "0", Field.Store.YES)); |
| w.AddDocument(doc); |
| DirectoryReader r = w.GetReader(); |
| long version = r.Version; |
| r.Dispose(); |
| |
| w.AddDocument(doc); |
| r = w.GetReader(); |
| long version2 = r.Version; |
| r.Dispose(); |
| if (Debugging.AssertsEnabled) Debugging.Assert(version2 > version); |
| |
| w.DeleteDocuments(new Term("id", "0")); |
| r = w.GetReader(); |
| w.Dispose(); |
| long version3 = r.Version; |
| r.Dispose(); |
| if (Debugging.AssertsEnabled) Debugging.Assert(version3 > version2); |
| d.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestWhetherDeleteAllDeletesWriteLock() |
| { |
| Directory d = NewFSDirectory(CreateTempDir("TestIndexWriter.testWhetherDeleteAllDeletesWriteLock")); |
| // Must use SimpleFSLockFactory... NativeFSLockFactory |
| // somehow "knows" a lock is held against write.lock |
| // even if you remove that file: |
| d.SetLockFactory(new SimpleFSLockFactory()); |
| RandomIndexWriter w1 = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, d); |
| w1.DeleteAll(); |
| try |
| { |
| new RandomIndexWriter(Random, d, NewIndexWriterConfig(TEST_VERSION_CURRENT, null).SetWriteLockTimeout(100)); |
| Assert.Fail("should not be able to create another writer"); |
| } |
| #pragma warning disable 168 |
| catch (LockObtainFailedException lofe) |
| #pragma warning restore 168 |
| { |
| // expected |
| } |
| w1.Dispose(); |
| d.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestChangeIndexOptions() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| |
| FieldType docsAndFreqs = new FieldType(TextField.TYPE_NOT_STORED); |
| docsAndFreqs.IndexOptions = IndexOptions.DOCS_AND_FREQS; |
| |
| FieldType docsOnly = new FieldType(TextField.TYPE_NOT_STORED); |
| docsOnly.IndexOptions = IndexOptions.DOCS_ONLY; |
| |
| Document doc = new Document(); |
| doc.Add(new Field("field", "a b c", docsAndFreqs)); |
| w.AddDocument(doc); |
| w.AddDocument(doc); |
| |
| doc = new Document(); |
| doc.Add(new Field("field", "a b c", docsOnly)); |
| w.AddDocument(doc); |
| w.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestOnlyUpdateDocuments() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| |
| IList<Document> docs = new List<Document>(); |
| docs.Add(new Document()); |
| w.UpdateDocuments(new Term("foo", "bar"), docs); |
| w.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-3872 |
| [Test] |
| public virtual void TestPrepareCommitThenClose() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| |
| w.PrepareCommit(); |
| try |
| { |
| w.Dispose(); |
| Assert.Fail("should have hit exception"); |
| } |
| #pragma warning disable 168 |
| catch (InvalidOperationException ise) |
| #pragma warning restore 168 |
| { |
| // expected |
| } |
| w.Commit(); |
| w.Dispose(); |
| IndexReader r = DirectoryReader.Open(dir); |
| Assert.AreEqual(0, r.MaxDoc); |
| r.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-3872 |
| [Test] |
| public virtual void TestPrepareCommitThenRollback() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| |
| w.PrepareCommit(); |
| w.Rollback(); |
| Assert.IsFalse(DirectoryReader.IndexExists(dir)); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-3872 |
| [Test] |
| public virtual void TestPrepareCommitThenRollback2() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| |
| w.Commit(); |
| w.AddDocument(new Document()); |
| w.PrepareCommit(); |
| w.Rollback(); |
| Assert.IsTrue(DirectoryReader.IndexExists(dir)); |
| IndexReader r = DirectoryReader.Open(dir); |
| Assert.AreEqual(0, r.MaxDoc); |
| r.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestDontInvokeAnalyzerForUnAnalyzedFields() |
| { |
| Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); |
| Document doc = new Document(); |
| FieldType customType = new FieldType(StringField.TYPE_NOT_STORED); |
| customType.StoreTermVectors = true; |
| customType.StoreTermVectorPositions = true; |
| customType.StoreTermVectorOffsets = true; |
| Field f = NewField("field", "abcd", customType); |
| doc.Add(f); |
| doc.Add(f); |
| Field f2 = NewField("field", "", customType); |
| doc.Add(f2); |
| doc.Add(f); |
| w.AddDocument(doc); |
| w.Dispose(); |
| dir.Dispose(); |
| } |
| |
| private class AnalyzerAnonymousInnerClassHelper : Analyzer |
| { |
| private readonly TestIndexWriter outerInstance; |
| |
| public AnalyzerAnonymousInnerClassHelper(TestIndexWriter outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| throw new InvalidOperationException("don't invoke me!"); |
| } |
| |
| public override int GetPositionIncrementGap(string fieldName) |
| { |
| throw new InvalidOperationException("don't invoke me!"); |
| } |
| |
| public override int GetOffsetGap(string fieldName) |
| { |
| throw new InvalidOperationException("don't invoke me!"); |
| } |
| } |
| |
| //LUCENE-1468 -- make sure opening an IndexWriter with |
| // create=true does not remove non-index files |
| |
| [Test] |
| public virtual void TestOtherFiles() |
| { |
| Directory dir = NewDirectory(); |
| var iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| iw.AddDocument(new Document()); |
| iw.Dispose(); |
| try |
| { |
| // Create my own random file: |
| IndexOutput @out = dir.CreateOutput("myrandomfile", NewIOContext(Random)); |
| @out.WriteByte((byte)(sbyte)42); |
| @out.Dispose(); |
| |
| (new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose(); |
| |
| Assert.IsTrue(SlowFileExists(dir, "myrandomfile")); |
| } |
| finally |
| { |
| dir.Dispose(); |
| } |
| } |
| |
| // LUCENE-3849 |
| [Test] |
| public virtual void TestStopwordsPosIncHole() |
| { |
| Directory dir = NewDirectory(); |
| Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader); |
| TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); |
| return new TokenStreamComponents(tokenizer, stream); |
| }); |
| RandomIndexWriter iw = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir, a); |
| Document doc = new Document(); |
| doc.Add(new TextField("body", "just a", Field.Store.NO)); |
| doc.Add(new TextField("body", "test of gaps", Field.Store.NO)); |
| iw.AddDocument(doc); |
| IndexReader ir = iw.GetReader(); |
| iw.Dispose(); |
| IndexSearcher @is = NewSearcher(ir); |
| PhraseQuery pq = new PhraseQuery(); |
| pq.Add(new Term("body", "just"), 0); |
| pq.Add(new Term("body", "test"), 2); |
| // body:"just ? test" |
| Assert.AreEqual(1, @is.Search(pq, 5).TotalHits); |
| ir.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-3849 |
| [Test] |
| public virtual void TestStopwordsPosIncHole2() |
| { |
| // use two stopfilters for testing here |
| Directory dir = NewDirectory(); |
| Automaton secondSet = BasicAutomata.MakeString("foobar"); |
| Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => |
| { |
| Tokenizer tokenizer = new MockTokenizer(reader); |
| TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); |
| stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet)); |
| return new TokenStreamComponents(tokenizer, stream); |
| }); |
| RandomIndexWriter iw = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir, a); |
| Document doc = new Document(); |
| doc.Add(new TextField("body", "just a foobar", Field.Store.NO)); |
| doc.Add(new TextField("body", "test of gaps", Field.Store.NO)); |
| iw.AddDocument(doc); |
| IndexReader ir = iw.GetReader(); |
| iw.Dispose(); |
| IndexSearcher @is = NewSearcher(ir); |
| PhraseQuery pq = new PhraseQuery(); |
| pq.Add(new Term("body", "just"), 0); |
| pq.Add(new Term("body", "test"), 3); |
| // body:"just ? ? test" |
| Assert.AreEqual(1, @is.Search(pq, 5).TotalHits); |
| ir.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // here we do better, there is no current segments file, so we don't delete anything. |
| // however, if you actually go and make a commit, the next time you run indexwriter |
| // this file will be gone. |
| [Test] |
| public virtual void TestOtherFiles2() |
| { |
| Directory dir = NewDirectory(); |
| try |
| { |
| // Create my own random file: |
| IndexOutput @out = dir.CreateOutput("_a.frq", NewIOContext(Random)); |
| @out.WriteByte((byte)(sbyte)42); |
| @out.Dispose(); |
| |
| (new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)))).Dispose(); |
| |
| Assert.IsTrue(SlowFileExists(dir, "_a.frq")); |
| |
| IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| iw.AddDocument(new Document()); |
| iw.Dispose(); |
| |
| Assert.IsFalse(SlowFileExists(dir, "_a.frq")); |
| } |
| finally |
| { |
| dir.Dispose(); |
| } |
| } |
| |
| // LUCENE-4398 |
| [Test] |
| public virtual void TestRotatingFieldNames() |
| { |
| Directory dir = NewFSDirectory(CreateTempDir("TestIndexWriter.testChangingFields")); |
| IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| iwc.SetRAMBufferSizeMB(0.2); |
| iwc.SetMaxBufferedDocs(-1); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| int upto = 0; |
| |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.OmitNorms = true; |
| |
| int firstDocCount = -1; |
| for (int iter = 0; iter < 10; iter++) |
| { |
| int startFlushCount = w.FlushCount; |
| int docCount = 0; |
| while (w.FlushCount == startFlushCount) |
| { |
| Document doc = new Document(); |
| for (int i = 0; i < 10; i++) |
| { |
| doc.Add(new Field("field" + (upto++), "content", ft)); |
| } |
| w.AddDocument(doc); |
| docCount++; |
| } |
| |
| if (Verbose) |
| { |
| Console.WriteLine("TEST: iter=" + iter + " flushed after docCount=" + docCount); |
| } |
| |
| if (iter == 0) |
| { |
| firstDocCount = docCount; |
| } |
| |
| Assert.IsTrue(((float)docCount) / firstDocCount > 0.9, "flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter); |
| |
| if (upto > 5000) |
| { |
| // Start re-using field names after a while |
| // ... important because otherwise we can OOME due |
| // to too many FieldInfo instances. |
| upto = 0; |
| } |
| } |
| w.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-4575 |
| [Test] |
| public virtual void TestCommitWithUserDataOnly() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); |
| writer.Commit(); // first commit to complete IW create transaction. |
| |
| // this should store the commit data, even though no other changes were made |
| writer.SetCommitData(new Dictionary<string, string>() { |
| {"key", "value"} |
| }); |
| writer.Commit(); |
| |
| DirectoryReader r = DirectoryReader.Open(dir); |
| Assert.AreEqual("value", r.IndexCommit.UserData["key"]); |
| r.Dispose(); |
| |
| // now check setCommitData and prepareCommit/commit sequence |
| writer.SetCommitData(new Dictionary<string, string>() { |
| {"key", "value1"} |
| }); |
| writer.PrepareCommit(); |
| writer.SetCommitData(new Dictionary<string, string>() { |
| {"key", "value2"} |
| }); |
| writer.Commit(); // should commit the first commitData only, per protocol |
| |
| r = DirectoryReader.Open(dir); |
| Assert.AreEqual("value1", r.IndexCommit.UserData["key"]); |
| r.Dispose(); |
| |
| // now should commit the second commitData - there was a bug where |
| // IndexWriter.finishCommit overrode the second commitData |
| writer.Commit(); |
| r = DirectoryReader.Open(dir); |
| Assert.AreEqual("value2", r.IndexCommit.UserData["key"], "IndexWriter.finishCommit may have overridden the second commitData"); |
| r.Dispose(); |
| |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestGetCommitData() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); |
| writer.SetCommitData(new Dictionary<string, string>() { |
| {"key", "value"} |
| }); |
| Assert.AreEqual("value", writer.CommitData["key"]); |
| writer.Dispose(); |
| |
| // validate that it's also visible when opening a new IndexWriter |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null).SetOpenMode(OpenMode.APPEND)); |
| Assert.AreEqual("value", writer.CommitData["key"]); |
| writer.Dispose(); |
| |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestIterableThrowsException() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| int iters = AtLeast(100); |
| int docCount = 0; |
| int docId = 0; |
| ISet<string> liveIds = new JCG.HashSet<string>(); |
| for (int i = 0; i < iters; i++) |
| { |
| IList<IEnumerable<IIndexableField>> docs = new List<IEnumerable<IIndexableField>>(); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| FieldType idFt = new FieldType(TextField.TYPE_STORED); |
| |
| int numDocs = AtLeast(4); |
| for (int j = 0; j < numDocs; j++) |
| { |
| Document doc = new Document(); |
| doc.Add(NewField("id", "" + (docId++), idFt)); |
| doc.Add(NewField("foo", TestUtil.RandomSimpleString(Random), ft)); |
| docs.Add(doc); |
| } |
| bool success = false; |
| try |
| { |
| w.AddDocuments(new RandomFailingFieldIterable(docs, Random)); |
| success = true; |
| } |
| catch (Exception e) |
| { |
| Assert.AreEqual("boom", e.Message); |
| } |
| finally |
| { |
| if (success) |
| { |
| docCount += docs.Count; |
| foreach (IEnumerable<IIndexableField> indexDocument in docs) |
| { |
| liveIds.Add(((Document)indexDocument).Get("id")); |
| } |
| } |
| } |
| } |
| DirectoryReader reader = w.GetReader(); |
| Assert.AreEqual(docCount, reader.NumDocs); |
| IList<AtomicReaderContext> leaves = reader.Leaves; |
| foreach (AtomicReaderContext atomicReaderContext in leaves) |
| { |
| AtomicReader ar = (AtomicReader)atomicReaderContext.Reader; |
| IBits liveDocs = ar.LiveDocs; |
| int maxDoc = ar.MaxDoc; |
| for (int i = 0; i < maxDoc; i++) |
| { |
| if (liveDocs == null || liveDocs.Get(i)) |
| { |
| Assert.IsTrue(liveIds.Remove(ar.Document(i).Get("id"))); |
| } |
| } |
| } |
| Assert.IsTrue(liveIds.Count == 0); |
| IOUtils.Dispose(reader, w, dir); |
| } |
| |
| private class RandomFailingFieldIterable : IEnumerable<IEnumerable<IIndexableField>> |
| { |
| internal readonly IList<IEnumerable<IIndexableField>> docList; |
| internal readonly Random random; |
| |
| public RandomFailingFieldIterable(IList<IEnumerable<IIndexableField>> docList, Random random) |
| { |
| this.docList = docList; |
| this.random = random; |
| } |
| |
| public virtual IEnumerator<IEnumerable<IIndexableField>> GetEnumerator() |
| { |
| return docList.GetEnumerator(); |
| //return new IteratorAnonymousInnerClassHelper(this, docIter); |
| } |
| |
| System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() |
| { |
| return GetEnumerator(); |
| } |
| |
| /* |
| private class IteratorAnonymousInnerClassHelper : IEnumerator<IEnumerable<IndexableField>> |
| { |
| private readonly RandomFailingFieldIterable outerInstance; |
| |
| private IEnumerator<IEnumerable<IndexableField>> DocIter; |
| |
| public IteratorAnonymousInnerClassHelper(RandomFailingFieldIterable outerInstance, IEnumerator<IEnumerable<IndexableField>> docIter) |
| { |
| this.outerInstance = outerInstance; |
| this.DocIter = docIter; |
| } |
| |
| public virtual bool HasNext() |
| { |
| return DocIter.hasNext(); |
| } |
| |
| public virtual IEnumerable<IndexableField> Next() |
| { |
| if (outerInstance.Random.Next(5) == 0) |
| { |
| throw new Exception("boom"); |
| } |
| return DocIter.Next(); |
| } |
| |
| public virtual void Remove() |
| { |
| throw new NotSupportedException(); |
| } |
| }*/ |
| } |
| |
| // LUCENE-2727/LUCENE-2812/LUCENE-4738: |
| [Test] |
| public virtual void TestCorruptFirstCommit() |
| { |
| for (int i = 0; i < 6; i++) |
| { |
| BaseDirectoryWrapper dir = NewDirectory(); |
| dir.CreateOutput("segments_0", IOContext.DEFAULT).Dispose(); |
| IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| int mode = i / 2; |
| if (mode == 0) |
| { |
| iwc.SetOpenMode(OpenMode.CREATE); |
| } |
| else if (mode == 1) |
| { |
| iwc.SetOpenMode(OpenMode.APPEND); |
| } |
| else if (mode == 2) |
| { |
| iwc.SetOpenMode(OpenMode.CREATE_OR_APPEND); |
| } |
| |
| if (Verbose) |
| { |
| Console.WriteLine("\nTEST: i=" + i); |
| } |
| |
| try |
| { |
| if ((i & 1) == 0) |
| { |
| (new IndexWriter(dir, iwc)).Dispose(); |
| } |
| else |
| { |
| (new IndexWriter(dir, iwc)).Rollback(); |
| } |
| if (mode != 0) |
| { |
| Assert.Fail("expected exception"); |
| } |
| } |
| catch (IOException /*ioe*/) |
| { |
| // OpenMode_e.APPEND should throw an exception since no |
| // index exists: |
| if (mode == 0) |
| { |
| // Unexpected |
| throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) |
| } |
| } |
| |
| if (Verbose) |
| { |
| Console.WriteLine(" at close: " + Arrays.ToString(dir.ListAll())); |
| } |
| |
| if (mode != 0) |
| { |
| dir.CheckIndexOnDispose = false; |
| } |
| dir.Dispose(); |
| } |
| } |
| |
| [Test] |
| public virtual void TestHasUncommittedChanges() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Assert.IsTrue(writer.HasUncommittedChanges()); // this will be true because a commit will create an empty index |
| Document doc = new Document(); |
| doc.Add(NewTextField("myfield", "a b c", Field.Store.NO)); |
| writer.AddDocument(doc); |
| Assert.IsTrue(writer.HasUncommittedChanges()); |
| |
| // Must commit, waitForMerges, commit again, to be |
| // certain that hasUncommittedChanges returns false: |
| writer.Commit(); |
| writer.WaitForMerges(); |
| writer.Commit(); |
| Assert.IsFalse(writer.HasUncommittedChanges()); |
| writer.AddDocument(doc); |
| Assert.IsTrue(writer.HasUncommittedChanges()); |
| writer.Commit(); |
| doc = new Document(); |
| doc.Add(NewStringField("id", "xyz", Field.Store.YES)); |
| writer.AddDocument(doc); |
| Assert.IsTrue(writer.HasUncommittedChanges()); |
| |
| // Must commit, waitForMerges, commit again, to be |
| // certain that hasUncommittedChanges returns false: |
| writer.Commit(); |
| writer.WaitForMerges(); |
| writer.Commit(); |
| Assert.IsFalse(writer.HasUncommittedChanges()); |
| writer.DeleteDocuments(new Term("id", "xyz")); |
| Assert.IsTrue(writer.HasUncommittedChanges()); |
| |
| // Must commit, waitForMerges, commit again, to be |
| // certain that hasUncommittedChanges returns false: |
| writer.Commit(); |
| writer.WaitForMerges(); |
| writer.Commit(); |
| Assert.IsFalse(writer.HasUncommittedChanges()); |
| writer.Dispose(); |
| |
| writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); |
| Assert.IsFalse(writer.HasUncommittedChanges()); |
| writer.AddDocument(doc); |
| Assert.IsTrue(writer.HasUncommittedChanges()); |
| |
| writer.Dispose(); |
| dir.Dispose(); |
| } |
| |
| [Test] |
| public virtual void TestMergeAllDeleted() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| SetOnce<IndexWriter> iwRef = new SetOnce<IndexWriter>(); |
| iwc.SetInfoStream(new TestPointInfoStream(iwc.InfoStream, new TestPointAnonymousInnerClassHelper(this, iwRef))); |
| IndexWriter evilWriter = new IndexWriter(dir, iwc); |
| iwRef.Set(evilWriter); |
| for (int i = 0; i < 1000; i++) |
| { |
| AddDoc(evilWriter); |
| if (Random.Next(17) == 0) |
| { |
| evilWriter.Commit(); |
| } |
| } |
| evilWriter.DeleteDocuments(new MatchAllDocsQuery()); |
| evilWriter.ForceMerge(1); |
| evilWriter.Dispose(); |
| dir.Dispose(); |
| } |
| |
| private class TestPointAnonymousInnerClassHelper : ITestPoint |
| { |
| private readonly TestIndexWriter outerInstance; |
| |
| private SetOnce<IndexWriter> iwRef; |
| |
| public TestPointAnonymousInnerClassHelper(TestIndexWriter outerInstance, SetOnce<IndexWriter> iwRef) |
| { |
| this.outerInstance = outerInstance; |
| this.iwRef = iwRef; |
| } |
| |
| public void Apply(string message) |
| { |
| if ("startCommitMerge".Equals(message, StringComparison.Ordinal)) |
| { |
| iwRef.Get().KeepFullyDeletedSegments = false; |
| } |
| else if ("startMergeInit".Equals(message, StringComparison.Ordinal)) |
| { |
| iwRef.Get().KeepFullyDeletedSegments = true; |
| } |
| } |
| } |
| |
| // LUCENE-5239 |
| [Test] |
| public virtual void TestDeleteSameTermAcrossFields() |
| { |
| Directory dir = NewDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| IndexWriter w = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| doc.Add(new TextField("a", "foo", Field.Store.NO)); |
| w.AddDocument(doc); |
| |
| // Should not delete the document; with LUCENE-5239 the |
| // "foo" from the 2nd delete term would incorrectly |
| // match field a's "foo": |
| w.DeleteDocuments(new Term("a", "xxx")); |
| w.DeleteDocuments(new Term("b", "foo")); |
| IndexReader r = w.GetReader(); |
| w.Dispose(); |
| |
| // Make sure document was not (incorrectly) deleted: |
| Assert.AreEqual(1, r.NumDocs); |
| r.Dispose(); |
| dir.Dispose(); |
| } |
| |
| // LUCENE-5574 |
| [Test] |
| public virtual void TestClosingNRTReaderDoesNotCorruptYourIndex() |
| { |
| // Windows disallows deleting & overwriting files still |
| // open for reading: |
| AssumeFalse("this test can't run on Windows", Constants.WINDOWS); |
| |
| MockDirectoryWrapper dir = NewMockDirectory(); |
| |
| // Allow deletion of still open files: |
| dir.NoDeleteOpenFile = false; |
| |
| // Allow writing to same file more than once: |
| dir.PreventDoubleWrite = false; |
| |
| IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); |
| LogMergePolicy lmp = new LogDocMergePolicy(); |
| lmp.MergeFactor = 2; |
| iwc.SetMergePolicy(lmp); |
| |
| RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); |
| Document doc = new Document(); |
| doc.Add(new TextField("a", "foo", Field.Store.NO)); |
| w.AddDocument(doc); |
| w.Commit(); |
| w.AddDocument(doc); |
| |
| // Get a new reader, but this also sets off a merge: |
| IndexReader r = w.GetReader(); |
| w.Dispose(); |
| |
| // Blow away index and make a new writer: |
| foreach (string fileName in dir.ListAll()) |
| { |
| dir.DeleteFile(fileName); |
| } |
| |
| w = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir); |
| w.AddDocument(doc); |
| w.Dispose(); |
| r.Dispose(); |
| dir.Dispose(); |
| } |
| #endif |
| } |
| } |