blob: ad0b9c9cb0ed0bf7ed8882a9c5100a3872c096d3 [file] [log] [blame]
using Lucene.Net.Codecs;
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Store;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.IO;
using Assert = Lucene.Net.TestFramework.Assert;
using Console = Lucene.Net.Util.SystemConsole;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Directory = Lucene.Net.Store.Directory;
using Document = Documents.Document;
using Field = Field;
using FieldType = FieldType;
using IndexSearcher = Lucene.Net.Search.IndexSearcher;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper;
using NumericDocValuesField = NumericDocValuesField;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
using ScoreDoc = Lucene.Net.Search.ScoreDoc;
using TermQuery = Lucene.Net.Search.TermQuery;
using TestUtil = Lucene.Net.Util.TestUtil;
using TextField = TextField;
/// <summary>
/// Tests for IndexWriter when the disk runs out of space
/// </summary>
[TestFixture]
public class TestIndexWriterOnDiskFull : LuceneTestCase
{
/*
* Make sure IndexWriter cleans up on hitting a disk
* full exception in addDocument.
* TODO: how to do this on windows with FSDirectory?
*/
[Test]
public virtual void TestAddDocumentOnDiskFull()
{
for (int pass = 0; pass < 2; pass++)
{
if (Verbose)
{
Console.WriteLine("TEST: pass=" + pass);
}
bool doAbort = pass == 1;
long diskFree = TestUtil.NextInt32(Random, 100, 300);
while (true)
{
if (Verbose)
{
Console.WriteLine("TEST: cycle: diskFree=" + diskFree);
}
MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory());
dir.MaxSizeInBytes = diskFree;
IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
IMergeScheduler ms = writer.Config.MergeScheduler;
if (ms is IConcurrentMergeScheduler)
{
// this test intentionally produces exceptions
// in the threads that CMS launches; we don't
// want to pollute test output with these.
((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
}
bool hitError = false;
try
{
for (int i = 0; i < 200; i++)
{
AddDoc(writer);
}
if (Verbose)
{
Console.WriteLine("TEST: done adding docs; now commit");
}
writer.Commit();
}
catch (IOException e)
{
if (Verbose)
{
Console.WriteLine("TEST: exception on addDoc");
Console.WriteLine(e.StackTrace);
}
hitError = true;
}
if (hitError)
{
if (doAbort)
{
if (Verbose)
{
Console.WriteLine("TEST: now rollback");
}
writer.Rollback();
}
else
{
try
{
if (Verbose)
{
Console.WriteLine("TEST: now close");
}
writer.Dispose();
}
catch (IOException e)
{
if (Verbose)
{
Console.WriteLine("TEST: exception on close; retry w/ no disk space limit");
Console.WriteLine(e.StackTrace);
}
dir.MaxSizeInBytes = 0;
writer.Dispose();
}
}
//TestUtil.SyncConcurrentMerges(ms);
if (TestUtil.AnyFilesExceptWriteLock(dir))
{
TestIndexWriter.AssertNoUnreferencedFiles(dir, "after disk full during addDocument");
// Make sure reader can open the index:
DirectoryReader.Open(dir).Dispose();
}
dir.Dispose();
// Now try again w/ more space:
diskFree += TestNightly ? TestUtil.NextInt32(Random, 400, 600) : TestUtil.NextInt32(Random, 3000, 5000);
}
else
{
//TestUtil.SyncConcurrentMerges(writer);
dir.MaxSizeInBytes = 0;
writer.Dispose();
dir.Dispose();
break;
}
}
}
}
// TODO: make @Nightly variant that provokes more disk
// fulls
// TODO: have test fail if on any given top
// iter there was not a single IOE hit
/*
Test: make sure when we run out of disk space or hit
random IOExceptions in any of the addIndexes(*) calls
that 1) index is not corrupt (searcher can open/search
it) and 2) transactional semantics are followed:
either all or none of the incoming documents were in
fact added.
*/
[Test]
public virtual void TestAddIndexOnDiskFull()
{
// MemoryCodec, since it uses FST, is not necessarily
// "additive", ie if you add up N small FSTs, then merge
// them, the merged result can easily be larger than the
// sum because the merged FST may use array encoding for
// some arcs (which uses more space):
string idFormat = TestUtil.GetPostingsFormat("id");
string contentFormat = TestUtil.GetPostingsFormat("content");
AssumeFalse("this test cannot run with Memory codec", idFormat.Equals("Memory", StringComparison.Ordinal) || contentFormat.Equals("Memory", StringComparison.Ordinal));
int START_COUNT = 57;
int NUM_DIR = TestNightly ? 50 : 5;
int END_COUNT = START_COUNT + NUM_DIR * (TestNightly ? 25 : 5);
// Build up a bunch of dirs that have indexes which we
// will then merge together by calling addIndexes(*):
Directory[] dirs = new Directory[NUM_DIR];
long inputDiskUsage = 0;
for (int i = 0; i < NUM_DIR; i++)
{
dirs[i] = NewDirectory();
IndexWriter writer = new IndexWriter(dirs[i], NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
for (int j = 0; j < 25; j++)
{
AddDocWithIndex(writer, 25 * i + j);
}
writer.Dispose();
string[] files = dirs[i].ListAll();
for (int j = 0; j < files.Length; j++)
{
inputDiskUsage += dirs[i].FileLength(files[j]);
}
}
// Now, build a starting index that has START_COUNT docs. We
// will then try to addIndexes into a copy of this:
MockDirectoryWrapper startDir = NewMockDirectory();
IndexWriter indWriter = new IndexWriter(startDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
for (int j = 0; j < START_COUNT; j++)
{
AddDocWithIndex(indWriter, j);
}
indWriter.Dispose();
// Make sure starting index seems to be working properly:
Term searchTerm = new Term("content", "aaa");
IndexReader reader = DirectoryReader.Open(startDir);
Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq");
IndexSearcher searcher = NewSearcher(reader);
ScoreDoc[] hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs;
Assert.AreEqual(57, hits.Length, "first number of hits");
reader.Dispose();
// Iterate with larger and larger amounts of free
// disk space. With little free disk space,
// addIndexes will certainly run out of space &
// fail. Verify that when this happens, index is
// not corrupt and index in fact has added no
// documents. Then, we increase disk space by 2000
// bytes each iteration. At some point there is
// enough free disk space and addIndexes should
// succeed and index should show all documents were
// added.
// String[] files = startDir.ListAll();
long diskUsage = startDir.GetSizeInBytes();
long startDiskUsage = 0;
string[] files_ = startDir.ListAll();
for (int i = 0; i < files_.Length; i++)
{
startDiskUsage += startDir.FileLength(files_[i]);
}
for (int iter = 0; iter < 3; iter++)
{
if (Verbose)
{
Console.WriteLine("TEST: iter=" + iter);
}
// Start with 100 bytes more than we are currently using:
long diskFree = diskUsage + TestUtil.NextInt32(Random, 50, 200);
int method = iter;
bool success = false;
bool done = false;
string methodName;
if (0 == method)
{
methodName = "addIndexes(Directory[]) + forceMerge(1)";
}
else if (1 == method)
{
methodName = "addIndexes(IndexReader[])";
}
else
{
methodName = "addIndexes(Directory[])";
}
while (!done)
{
if (Verbose)
{
Console.WriteLine("TEST: cycle...");
}
// Make a new dir that will enforce disk usage:
MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new RAMDirectory(startDir, NewIOContext(Random)));
indWriter = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy(false)));
IOException err = null;
IMergeScheduler ms = indWriter.Config.MergeScheduler;
for (int x = 0; x < 2; x++)
{
if (ms is IConcurrentMergeScheduler)
// this test intentionally produces exceptions
// in the threads that CMS launches; we don't
// want to pollute test output with these.
{
if (0 == x)
{
((IConcurrentMergeScheduler)ms).SetSuppressExceptions();
}
else
{
((IConcurrentMergeScheduler)ms).ClearSuppressExceptions();
}
}
// Two loops: first time, limit disk space &
// throw random IOExceptions; second time, no
// disk space limit:
double rate = 0.05;
double diskRatio = ((double)diskFree) / diskUsage;
long thisDiskFree;
string testName = null;
if (0 == x)
{
dir.RandomIOExceptionRateOnOpen = Random.NextDouble() * 0.01;
thisDiskFree = diskFree;
if (diskRatio >= 2.0)
{
rate /= 2;
}
if (diskRatio >= 4.0)
{
rate /= 2;
}
if (diskRatio >= 6.0)
{
rate = 0.0;
}
if (Verbose)
{
testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes";
}
}
else
{
dir.RandomIOExceptionRateOnOpen = 0.0;
thisDiskFree = 0;
rate = 0.0;
if (Verbose)
{
testName = "disk full test " + methodName + " with unlimited disk space";
}
}
if (Verbose)
{
Console.WriteLine("\ncycle: " + testName);
}
dir.TrackDiskUsage = true;
dir.MaxSizeInBytes = thisDiskFree;
dir.RandomIOExceptionRate = rate;
try
{
if (0 == method)
{
if (Verbose)
{
Console.WriteLine("TEST: now addIndexes count=" + dirs.Length);
}
indWriter.AddIndexes(dirs);
if (Verbose)
{
Console.WriteLine("TEST: now forceMerge");
}
indWriter.ForceMerge(1);
}
else if (1 == method)
{
IndexReader[] readers = new IndexReader[dirs.Length];
for (int i = 0; i < dirs.Length; i++)
{
readers[i] = DirectoryReader.Open(dirs[i]);
}
try
{
indWriter.AddIndexes(readers);
}
finally
{
for (int i = 0; i < dirs.Length; i++)
{
readers[i].Dispose();
}
}
}
else
{
indWriter.AddIndexes(dirs);
}
success = true;
if (Verbose)
{
Console.WriteLine(" success!");
}
if (0 == x)
{
done = true;
}
}
catch (IOException e)
{
success = false;
err = e;
if (Verbose)
{
Console.WriteLine(" hit IOException: " + e);
Console.WriteLine(e.StackTrace);
}
if (1 == x)
{
Console.WriteLine(e.StackTrace);
Assert.Fail(methodName + " hit IOException after disk space was freed up");
}
}
// Make sure all threads from
// ConcurrentMergeScheduler are done
TestUtil.SyncConcurrentMerges(indWriter);
if (Verbose)
{
Console.WriteLine(" now test readers");
}
// Finally, verify index is not corrupt, and, if
// we succeeded, we see all docs added, and if we
// failed, we see either all docs or no docs added
// (transactional semantics):
dir.RandomIOExceptionRateOnOpen = 0.0;
try
{
reader = DirectoryReader.Open(dir);
}
catch (IOException e)
{
Console.WriteLine(e.StackTrace);
Assert.Fail(testName + ": exception when creating IndexReader: " + e);
}
int result = reader.DocFreq(searchTerm);
if (success)
{
if (result != START_COUNT)
{
Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT);
}
}
else
{
// On hitting exception we still may have added
// all docs:
if (result != START_COUNT && result != END_COUNT)
{
Console.WriteLine(err.StackTrace);
Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
}
}
searcher = NewSearcher(reader);
try
{
hits = searcher.Search(new TermQuery(searchTerm), null, END_COUNT).ScoreDocs;
}
catch (IOException e)
{
Console.WriteLine(e.StackTrace);
Assert.Fail(testName + ": exception when searching: " + e);
}
int result2 = hits.Length;
if (success)
{
if (result2 != result)
{
Assert.Fail(testName + ": method did not throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
}
}
else
{
// On hitting exception we still may have added
// all docs:
if (result2 != result)
{
Console.WriteLine(err.StackTrace);
Assert.Fail(testName + ": method did throw exception but hits.Length for search on term 'aaa' is " + result2 + " instead of expected " + result);
}
}
reader.Dispose();
if (Verbose)
{
Console.WriteLine(" count is " + result);
}
if (done || result == END_COUNT)
{
break;
}
}
if (Verbose)
{
Console.WriteLine(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.MaxUsedSizeInBytes);
}
if (done)
{
// Javadocs state that temp free Directory space
// required is at most 2X total input size of
// indices so let's make sure:
Assert.IsTrue((dir.MaxUsedSizeInBytes - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.MaxUsedSizeInBytes - startDiskUsage) + " bytes vs limit=" + (2 * (startDiskUsage + inputDiskUsage)) + "; starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes");
}
// Make sure we don't hit disk full during close below:
dir.MaxSizeInBytes = 0;
dir.RandomIOExceptionRate = 0.0;
dir.RandomIOExceptionRateOnOpen = 0.0;
indWriter.Dispose();
// Wait for all BG threads to finish else
// dir.Dispose() will throw IOException because
// there are still open files
TestUtil.SyncConcurrentMerges(ms);
dir.Dispose();
// Try again with more free space:
diskFree += TestNightly ? TestUtil.NextInt32(Random, 4000, 8000) : TestUtil.NextInt32(Random, 40000, 80000);
}
}
startDir.Dispose();
foreach (Directory dir in dirs)
{
dir.Dispose();
}
}
private class FailTwiceDuringMerge : Failure
{
public bool didFail1;
public bool didFail2;
public override void Eval(MockDirectoryWrapper dir)
{
if (!m_doFail)
{
return;
}
// LUCENENET specific: for these to work in release mode, we have added [MethodImpl(MethodImplOptions.NoInlining)]
// to each possible target of the StackTraceHelper. If these change, so must the attribute on the target methods.
if (StackTraceHelper.DoesStackTraceContainMethod(typeof(SegmentMerger).Name, "MergeTerms") && !didFail1)
{
didFail1 = true;
throw new IOException("fake disk full during mergeTerms");
}
// LUCENENET specific: for these to work in release mode, we have added [MethodImpl(MethodImplOptions.NoInlining)]
// to each possible target of the StackTraceHelper. If these change, so must the attribute on the target methods.
if (StackTraceHelper.DoesStackTraceContainMethod(typeof(LiveDocsFormat).Name, "WriteLiveDocs") && !didFail2)
{
didFail2 = true;
throw new IOException("fake disk full while writing LiveDocs");
}
}
}
// LUCENE-2593
[Test]
public virtual void TestCorruptionAfterDiskFullDuringMerge()
{
MockDirectoryWrapper dir = NewMockDirectory();
//IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setReaderPooling(true));
IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(new SerialMergeScheduler()).SetReaderPooling(true).SetMergePolicy(NewLogMergePolicy(2)));
// we can do this because we add/delete/add (and dont merge to "nothing")
w.KeepFullyDeletedSegments = true;
Document doc = new Document();
doc.Add(NewTextField("f", "doctor who", Field.Store.NO));
w.AddDocument(doc);
w.Commit();
w.DeleteDocuments(new Term("f", "who"));
w.AddDocument(doc);
// disk fills up!
FailTwiceDuringMerge ftdm = new FailTwiceDuringMerge();
ftdm.SetDoFail();
dir.FailOn(ftdm);
try
{
w.Commit();
Assert.Fail("fake disk full IOExceptions not hit");
}
#pragma warning disable 168
catch (IOException ioe)
#pragma warning restore 168
{
// expected
Assert.IsTrue(ftdm.didFail1 || ftdm.didFail2);
}
TestUtil.CheckIndex(dir);
ftdm.ClearDoFail();
w.AddDocument(doc);
w.Dispose();
dir.Dispose();
}
// LUCENE-1130: make sure immeidate disk full on creating
// an IndexWriter (hit during DW.ThreadState.Init()) is
// OK:
[Test]
public virtual void TestImmediateDiskFull([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")]Func<IConcurrentMergeScheduler> newScheduler)
{
MockDirectoryWrapper dir = NewMockDirectory();
var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
.SetMaxBufferedDocs(2)
.SetMergeScheduler(newScheduler());
IndexWriter writer = new IndexWriter(dir, config);
dir.MaxSizeInBytes = Math.Max(1, dir.GetRecomputedActualSizeInBytes());
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
doc.Add(NewField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
try
{
writer.AddDocument(doc);
Assert.Fail("did not hit disk full");
}
catch (IOException)
{
}
// Without fix for LUCENE-1130: this call will hang:
try
{
writer.AddDocument(doc);
Assert.Fail("did not hit disk full");
}
catch (IOException)
{
}
try
{
writer.Dispose(false);
Assert.Fail("did not hit disk full");
}
catch (IOException)
{
}
// Make sure once disk space is avail again, we can
// cleanly close:
dir.MaxSizeInBytes = 0;
writer.Dispose(false);
dir.Dispose();
}
// TODO: these are also in TestIndexWriter... add a simple doc-writing method
// like this to LuceneTestCase?
private void AddDoc(IndexWriter writer)
{
Document doc = new Document();
doc.Add(NewTextField("content", "aaa", Field.Store.NO));
if (DefaultCodecSupportsDocValues)
{
doc.Add(new NumericDocValuesField("numericdv", 1));
}
writer.AddDocument(doc);
}
private void AddDocWithIndex(IndexWriter writer, int index)
{
Document doc = new Document();
doc.Add(NewTextField("content", "aaa " + index, Field.Store.NO));
doc.Add(NewTextField("id", "" + index, Field.Store.NO));
if (DefaultCodecSupportsDocValues)
{
doc.Add(new NumericDocValuesField("numericdv", 1));
}
writer.AddDocument(doc);
}
}
}