blob: c43ebc0cdf946858772a267fa97c3d6f37c6ca52 [file] [log] [blame]
using Lucene.Net.Codecs.BlockTerms;
using Lucene.Net.Codecs.Lucene41;
using Lucene.Net.Codecs.Memory;
using Lucene.Net.Codecs.MockIntBlock;
using Lucene.Net.Codecs.MockSep;
using Lucene.Net.Codecs.Pulsing;
using Lucene.Net.Codecs.Sep;
using Lucene.Net.Diagnostics;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using Console = Lucene.Net.Util.SystemConsole;
namespace Lucene.Net.Codecs.MockRandom
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Randomly combines terms index impl w/ postings impls.
/// </summary>
[PostingsFormatName("MockRandom")]
public sealed class MockRandomPostingsFormat : PostingsFormat
{
private readonly Random seedRandom;
private const string SEED_EXT = "sd";
private class RandomAnonymousClassHelper : Random
{
public RandomAnonymousClassHelper()
: base(0)
{ }
public override int Next(int maxValue)
{
throw new InvalidOperationException("Please use MockRandomPostingsFormat(Random)");
}
}
public MockRandomPostingsFormat()
: this(null)
{
// This ctor should *only* be used at read-time: get NPE if you use it!
}
public MockRandomPostingsFormat(Random random)
: base()
{
if (random == null)
this.seedRandom = new RandomAnonymousClassHelper();
else
this.seedRandom = new Random(random.Next());
}
// Chooses random IntStreamFactory depending on file's extension
private class MockInt32StreamFactory : Int32StreamFactory
{
private readonly int salt;
private readonly IList<Int32StreamFactory> delegates = new List<Int32StreamFactory>();
public MockInt32StreamFactory(Random random)
{
salt = random.nextInt();
delegates.Add(new MockSingleInt32Factory());
int blockSize = TestUtil.NextInt32(random, 1, 2000);
delegates.Add(new MockFixedInt32BlockPostingsFormat.MockInt32Factory(blockSize));
int baseBlockSize = TestUtil.NextInt32(random, 1, 127);
delegates.Add(new MockVariableInt32BlockPostingsFormat.MockInt32Factory(baseBlockSize));
// TODO: others
}
private static string GetExtension(string fileName)
{
int idx = fileName.IndexOf('.');
if (Debugging.AssertsEnabled) Debugging.Assert(idx != -1);
return fileName.Substring(idx);
}
public override Int32IndexInput OpenInput(Directory dir, string fileName, IOContext context)
{
// Must only use extension, because IW.addIndexes can
// rename segment!
Int32StreamFactory f = delegates[(Math.Abs(salt ^ GetExtension(fileName).GetHashCode())) % delegates.size()];
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: read using int factory " + f + " from fileName=" + fileName);
}
return f.OpenInput(dir, fileName, context);
}
public override Int32IndexOutput CreateOutput(Directory dir, string fileName, IOContext context)
{
Int32StreamFactory f = delegates[(Math.Abs(salt ^ GetExtension(fileName).GetHashCode())) % delegates.size()];
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: write using int factory " + f + " to fileName=" + fileName);
}
return f.CreateOutput(dir, fileName, context);
}
}
private class IndexTermSelectorAnonymousClass : VariableGapTermsIndexWriter.IndexTermSelector
{
private readonly Random rand;
private readonly int gap;
public IndexTermSelectorAnonymousClass(int seed, int gap)
{
rand = new Random(seed);
this.gap = gap;
}
public override bool IsIndexTerm(BytesRef term, TermStats stats)
{
return rand.Next(gap) == gap / 2;
}
public override void NewField(FieldInfo fieldInfo)
{
}
}
public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
{
int minSkipInterval;
if (state.SegmentInfo.DocCount > 1000000)
{
// Test2BPostings can OOME otherwise:
minSkipInterval = 3;
}
else
{
minSkipInterval = 2;
}
// we pull this before the seed intentionally: because its not consumed at runtime
// (the skipInterval is written into postings header)
int skipInterval = TestUtil.NextInt32(seedRandom, minSkipInterval, 10);
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval);
}
long seed = seedRandom.nextLong();
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed);
}
string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT);
IndexOutput @out = state.Directory.CreateOutput(seedFileName, state.Context);
try
{
@out.WriteInt64(seed);
}
finally
{
@out.Dispose();
}
Random random = new Random((int)seed);
random.nextInt(); // consume a random for buffersize
PostingsWriterBase postingsWriter;
if (random.nextBoolean())
{
postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval);
}
else
{
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: writing Standard postings");
}
// TODO: randomize variables like acceptibleOverHead?!
postingsWriter = new Lucene41PostingsWriter(state, skipInterval);
}
if (random.nextBoolean())
{
int totTFCutoff = TestUtil.NextInt32(random, 1, 20);
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter);
}
FieldsConsumer fields;
int t1 = random.nextInt(4);
if (t1 == 0)
{
bool success = false;
try
{
fields = new FSTTermsWriter(state, postingsWriter);
success = true;
}
finally
{
if (!success)
{
postingsWriter.Dispose();
}
}
}
else if (t1 == 1)
{
bool success = false;
try
{
fields = new FSTOrdTermsWriter(state, postingsWriter);
success = true;
}
finally
{
if (!success)
{
postingsWriter.Dispose();
}
}
}
else if (t1 == 2)
{
// Use BlockTree terms dict
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: writing BlockTree terms dict");
}
// TODO: would be nice to allow 1 but this is very
// slow to write
int minTermsInBlock = TestUtil.NextInt32(random, 2, 100);
int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));
bool success = false;
try
{
fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
success = true;
}
finally
{
if (!success)
{
postingsWriter.Dispose();
}
}
}
else
{
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: writing Block terms dict");
}
bool success = false;
TermsIndexWriterBase indexWriter;
try
{
if (random.nextBoolean())
{
state.TermIndexInterval = TestUtil.NextInt32(random, 1, 100);
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")");
}
indexWriter = new FixedGapTermsIndexWriter(state);
}
else
{
VariableGapTermsIndexWriter.IndexTermSelector selector;
int n2 = random.nextInt(3);
if (n2 == 0)
{
int tii = TestUtil.NextInt32(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
}
}
else if (n2 == 1)
{
int docFreqThresh = TestUtil.NextInt32(random, 2, 100);
int tii = TestUtil.NextInt32(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
}
else
{
int seed2 = random.Next();
int gap = TestUtil.NextInt32(random, 2, 40);
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
}
selector = new IndexTermSelectorAnonymousClass(seed2, gap);
}
indexWriter = new VariableGapTermsIndexWriter(state, selector);
}
success = true;
}
finally
{
if (!success)
{
postingsWriter.Dispose();
}
}
success = false;
try
{
fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
success = true;
}
finally
{
if (!success)
{
try
{
postingsWriter.Dispose();
}
finally
{
indexWriter.Dispose();
}
}
}
}
return fields;
}
public override FieldsProducer FieldsProducer(SegmentReadState state)
{
string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT);
IndexInput @in = state.Directory.OpenInput(seedFileName, state.Context);
long seed = @in.ReadInt64();
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed);
}
@in.Dispose();
Random random = new Random((int)seed);
int readBufferSize = TestUtil.NextInt32(random, 1, 4096);
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize);
}
PostingsReaderBase postingsReader;
if (random.nextBoolean())
{
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: reading Sep postings");
}
postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo,
state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix);
}
else
{
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: reading Standard postings");
}
postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
}
if (random.nextBoolean())
{
int totTFCutoff = TestUtil.NextInt32(random, 1, 20);
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsReader = new PulsingPostingsReader(state, postingsReader);
}
FieldsProducer fields;
int t1 = random.nextInt(4);
if (t1 == 0)
{
bool success = false;
try
{
fields = new FSTTermsReader(state, postingsReader);
success = true;
}
finally
{
if (!success)
{
postingsReader.Dispose();
}
}
}
else if (t1 == 1)
{
bool success = false;
try
{
fields = new FSTOrdTermsReader(state, postingsReader);
success = true;
}
finally
{
if (!success)
{
postingsReader.Dispose();
}
}
}
else if (t1 == 2)
{
// Use BlockTree terms dict
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: reading BlockTree terms dict");
}
bool success = false;
try
{
fields = new BlockTreeTermsReader(state.Directory,
state.FieldInfos,
state.SegmentInfo,
postingsReader,
state.Context,
state.SegmentSuffix,
state.TermsIndexDivisor);
success = true;
}
finally
{
if (!success)
{
postingsReader.Dispose();
}
}
}
else
{
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: reading Block terms dict");
}
TermsIndexReaderBase indexReader;
bool success = false;
try
{
bool doFixedGap = random.NextBoolean();
// randomness diverges from writer, here:
if (state.TermsIndexDivisor != -1)
{
state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10);
}
if (doFixedGap)
{
// if termsIndexDivisor is set to -1, we should not touch it. It means a
// test explicitly instructed not to load the terms index.
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
}
indexReader = new FixedGapTermsIndexReader(state.Directory,
state.FieldInfos,
state.SegmentInfo.Name,
state.TermsIndexDivisor,
BytesRef.UTF8SortedAsUnicodeComparer,
state.SegmentSuffix, state.Context);
}
else
{
int n2 = random.Next(3);
if (n2 == 1)
{
random.Next();
}
else if (n2 == 2)
{
random.NextInt64();
}
if (LuceneTestCase.Verbose)
{
Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
}
indexReader = new VariableGapTermsIndexReader(state.Directory,
state.FieldInfos,
state.SegmentInfo.Name,
state.TermsIndexDivisor,
state.SegmentSuffix, state.Context);
}
success = true;
}
finally
{
if (!success)
{
postingsReader.Dispose();
}
}
success = false;
try
{
fields = new BlockTermsReader(indexReader,
state.Directory,
state.FieldInfos,
state.SegmentInfo,
postingsReader,
state.Context,
state.SegmentSuffix);
success = true;
}
finally
{
if (!success)
{
try
{
postingsReader.Dispose();
}
finally
{
indexReader.Dispose();
}
}
}
}
return fields;
}
}
}