blob: 939e8e7c1d113a48f0bcbf9b4b05bb4c8ae92322 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.mockrandom;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.blockterms.BlockTermsReader;
import org.apache.lucene.codecs.blockterms.BlockTermsWriter;
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader;
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader;
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsReader;
import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsReader;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsWriter;
import org.apache.lucene.codecs.memory.FSTTermsReader;
import org.apache.lucene.codecs.memory.FSTTermsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Randomly combines terms index impl w/ postings impls.
*/
public final class MockRandomPostingsFormat extends PostingsFormat {
private final Random seedRandom;
private static final String SEED_EXT = "sd";
public MockRandomPostingsFormat() {
// This ctor should *only* be used at read-time: get NPE if you use it!
this(null);
}
public MockRandomPostingsFormat(Random random) {
super("MockRandom");
if (random == null) {
this.seedRandom = new Random(0L) {
@Override
protected int next(int arg0) {
throw new IllegalStateException("Please use MockRandomPostingsFormat(Random)");
}
};
} else {
this.seedRandom = new Random(random.nextLong());
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
int minSkipInterval;
if (state.segmentInfo.maxDoc() > 1000000) {
// Test2BPostings can OOME otherwise:
minSkipInterval = 3;
} else {
minSkipInterval = 2;
}
// we pull this before the seed intentionally: because it's not consumed at runtime
// (the skipInterval is written into postings header).
// NOTE: Currently not passed to postings writer.
// before, it was being passed in wrongly as acceptableOverhead!
int skipInterval = TestUtil.nextInt(seedRandom, minSkipInterval, 10);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
}
final long seed = seedRandom.nextLong();
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing to seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
}
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
try(IndexOutput out = state.directory.createOutput(seedFileName, state.context)) {
CodecUtil.writeIndexHeader(out, "MockRandomSeed", 0, state.segmentInfo.getId(), state.segmentSuffix);
out.writeLong(seed);
CodecUtil.writeFooter(out);
}
final Random random = new Random(seed);
random.nextInt(); // consume a random for buffersize
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
final FieldsConsumer fields;
final int t1 = random.nextInt(4);
if (t1 == 0) {
boolean success = false;
try {
fields = new FSTTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 1) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing BlockTree terms dict");
}
// TODO: would be nice to allow 1 but this is very
// slow to write
final int minTermsInBlock = TestUtil.nextInt(random, 2, 100);
final int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random.nextInt(100));
boolean success = false;
try {
fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 2) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing Block terms dict");
}
boolean success = false;
final TermsIndexWriterBase indexWriter;
try {
if (random.nextBoolean()) {
int termIndexInterval = TestUtil.nextInt(random, 1, 100);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + termIndexInterval + ")");
}
indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval);
} else {
final VariableGapTermsIndexWriter.IndexTermSelector selector;
final int n2 = random.nextInt(3);
if (n2 == 0) {
final int tii = TestUtil.nextInt(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
}
} else if (n2 == 1) {
final int docFreqThresh = TestUtil.nextInt(random, 2, 100);
final int tii = TestUtil.nextInt(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
} else {
final long seed2 = random.nextLong();
final int gap = TestUtil.nextInt(random, 2, 40);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
}
selector = new VariableGapTermsIndexWriter.IndexTermSelector() {
final Random rand = new Random(seed2);
@Override
public boolean isIndexTerm(BytesRef term, TermStats stats) {
return rand.nextInt(gap) == gap/2;
}
@Override
public void newField(FieldInfo fieldInfo) {
}
};
}
indexWriter = new VariableGapTermsIndexWriter(state, selector);
}
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
success = false;
try {
fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
success = true;
} finally {
if (!success) {
try {
postingsWriter.close();
} finally {
indexWriter.close();
}
}
}
} else if (t1 == 3) {
// Use OrdsBlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing OrdsBlockTree");
}
// TODO: would be nice to allow 1 but this is very
// slow to write
final int minTermsInBlock = TestUtil.nextInt(random, 2, 100);
final int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random.nextInt(100));
boolean success = false;
try {
fields = new OrdsBlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else {
// BUG!
throw new AssertionError();
}
return fields;
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
final ChecksumIndexInput in = state.directory.openChecksumInput(seedFileName, state.context);
CodecUtil.checkIndexHeader(in, "MockRandomSeed", 0, 0, state.segmentInfo.getId(), state.segmentSuffix);
final long seed = in.readLong();
CodecUtil.checkFooter(in);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
}
in.close();
final Random random = new Random(seed);
int readBufferSize = TestUtil.nextInt(random, 1, 4096);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize);
}
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
final FieldsProducer fields;
final int t1 = random.nextInt(4);
if (t1 == 0) {
boolean success = false;
try {
fields = new FSTTermsReader(state, postingsReader);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
} else if (t1 == 1) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading BlockTree terms dict");
}
boolean success = false;
try {
fields = new BlockTreeTermsReader(postingsReader, state);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
} else if (t1 == 2) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading Block terms dict");
}
final TermsIndexReaderBase indexReader;
boolean success = false;
try {
final boolean doFixedGap = random.nextBoolean();
// randomness diverges from writer, here:
if (doFixedGap) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: fixed-gap terms index");
}
indexReader = new FixedGapTermsIndexReader(state);
} else {
final int n2 = random.nextInt(3);
if (n2 == 1) {
random.nextInt();
} else if (n2 == 2) {
random.nextLong();
}
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: variable-gap terms index");
}
indexReader = new VariableGapTermsIndexReader(state);
}
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
success = false;
try {
fields = new BlockTermsReader(indexReader, postingsReader, state);
success = true;
} finally {
if (!success) {
try {
postingsReader.close();
} finally {
indexReader.close();
}
}
}
} else if (t1 == 3) {
// Use OrdsBlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading OrdsBlockTree terms dict");
}
boolean success = false;
try {
fields = new OrdsBlockTreeTermsReader(postingsReader, state);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
} else {
// BUG!
throw new AssertionError();
}
return fields;
}
}