blob: 05d8797b8d9bc23b7cd23906f7878fe44151883e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestStressIndexing2 extends LuceneTestCase {
static int maxFields=4;
static int bigFieldSize=10;
static boolean sameFieldOrder=false;
static int mergeFactor=3;
static int maxBufferedDocs=3;
static int seed=0;
public void testRandomIWReader() throws Throwable {
Directory dir = newMaybeVirusCheckingDirectory();
// TODO: verify equals using IW.getReader
DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir);
DirectoryReader reader = dw.writer.getReader();
dw.writer.commit();
verifyEquals(random(), reader, dir, "id");
reader.close();
dw.writer.close();
dir.close();
}
public void testRandom() throws Throwable {
Directory dir1 = newMaybeVirusCheckingDirectory();
Directory dir2 = newMaybeVirusCheckingDirectory();
// mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
boolean doReaderPooling = random().nextBoolean();
Map<String,Document> docs = indexRandom(5, 3, 100, dir1, doReaderPooling);
indexSerial(random(), docs, dir2);
// verifying verify
// verifyEquals(dir1, dir1, "id");
// verifyEquals(dir2, dir2, "id");
verifyEquals(dir1, dir2, "id");
dir1.close();
dir2.close();
}
public void testMultiConfig() throws Throwable {
// test lots of smaller different params together
int num = atLeast(3);
for (int i = 0; i < num; i++) { // increase iterations for better testing
if (VERBOSE) {
System.out.println("\n\nTEST: top iter=" + i);
}
sameFieldOrder=random().nextBoolean();
mergeFactor=random().nextInt(3)+2;
maxBufferedDocs=random().nextInt(3)+2;
boolean doReaderPooling = random().nextBoolean();
seed++;
int nThreads=random().nextInt(5)+1;
int iter=random().nextInt(5)+1;
int range=random().nextInt(20)+1;
Directory dir1 = newDirectory();
Directory dir2 = newDirectory();
if (VERBOSE) {
System.out.println(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor + " maxBufferedDocs=" + maxBufferedDocs);
}
Map<String,Document> docs = indexRandom(nThreads, iter, range, dir1, doReaderPooling);
if (VERBOSE) {
System.out.println("TEST: index serial");
}
indexSerial(random(), docs, dir2);
if (VERBOSE) {
System.out.println("TEST: verify");
}
verifyEquals(dir1, dir2, "id");
dir1.close();
dir2.close();
}
}
static Term idTerm = new Term("id","");
IndexingThread[] threads;
static Comparator<IndexableField> fieldNameComparator = new Comparator<IndexableField>() {
@Override
public int compare(IndexableField o1, IndexableField o2) {
return o1.name().compareTo(o2.name());
}
};
// This test avoids using any extra synchronization in the multiple
// indexing threads to test that IndexWriter does correctly synchronize
// everything.
public static class DocsAndWriter {
Map<String,Document> docs;
IndexWriter writer;
}
public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
Map<String,Document> docs = new HashMap<>();
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.CREATE)
.setRAMBufferSizeMB(0.1)
.setMaxBufferedDocs(maxBufferedDocs)
.setMergePolicy(newLogMergePolicy()), random());
w.commit();
LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(mergeFactor);
/***
w.setMaxMergeDocs(Integer.MAX_VALUE);
w.setMaxFieldLength(10000);
w.setRAMBufferSizeMB(1);
w.setMergeFactor(10);
***/
threads = new IndexingThread[nThreads];
for (int i=0; i<threads.length; i++) {
IndexingThread th = new IndexingThread();
th.w = w;
th.base = 1000000*i;
th.range = range;
th.iterations = iterations;
threads[i] = th;
}
for (int i=0; i<threads.length; i++) {
threads[i].start();
}
for (int i=0; i<threads.length; i++) {
threads[i].join();
}
// w.forceMerge(1);
//w.close();
for (int i=0; i<threads.length; i++) {
IndexingThread th = threads[i];
synchronized(th) {
docs.putAll(th.docs);
}
}
TestUtil.checkIndex(dir);
DocsAndWriter dw = new DocsAndWriter();
dw.docs = docs;
dw.writer = w;
return dw;
}
public Map<String,Document> indexRandom(int nThreads, int iterations, int range, Directory dir,
boolean doReaderPooling) throws IOException, InterruptedException {
Map<String,Document> docs = new HashMap<>();
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.CREATE)
.setRAMBufferSizeMB(0.1)
.setMaxBufferedDocs(maxBufferedDocs)
.setReaderPooling(doReaderPooling)
.setMergePolicy(newLogMergePolicy()), random());
LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(mergeFactor);
threads = new IndexingThread[nThreads];
for (int i=0; i<threads.length; i++) {
IndexingThread th = new IndexingThread();
th.w = w;
th.base = 1000000*i;
th.range = range;
th.iterations = iterations;
threads[i] = th;
}
for (int i=0; i<threads.length; i++) {
threads[i].start();
}
for (int i=0; i<threads.length; i++) {
threads[i].join();
}
//w.forceMerge(1);
w.close();
for (int i=0; i<threads.length; i++) {
IndexingThread th = threads[i];
synchronized(th) {
docs.putAll(th.docs);
}
}
//System.out.println("TEST: checkindex");
TestUtil.checkIndex(dir);
return docs;
}
public static void indexSerial(Random random, Map<String,Document> docs, Directory dir) throws IOException {
IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
// index all docs in a single thread
Iterator<Document> iter = docs.values().iterator();
while (iter.hasNext()) {
Document d = iter.next();
ArrayList<IndexableField> fields = new ArrayList<>();
fields.addAll(d.getFields());
// put fields in same order each time
Collections.sort(fields, fieldNameComparator);
Document d1 = new Document();
for (int i=0; i<fields.size(); i++) {
d1.add(fields.get(i));
}
w.addDocument(d1);
// System.out.println("indexing "+d1);
}
w.close();
}
public void verifyEquals(Random r, DirectoryReader r1, Directory dir2, String idField) throws Throwable {
DirectoryReader r2 = DirectoryReader.open(dir2);
verifyEquals(r1, r2, idField);
r2.close();
}
public void verifyEquals(Directory dir1, Directory dir2, String idField) throws Throwable {
DirectoryReader r1 = DirectoryReader.open(dir1);
DirectoryReader r2 = DirectoryReader.open(dir2);
verifyEquals(r1, r2, idField);
r1.close();
r2.close();
}
private static void printDocs(DirectoryReader r) throws Throwable {
for(LeafReaderContext ctx : r.leaves()) {
// TODO: improve this
LeafReader sub = ctx.reader();
Bits liveDocs = sub.getLiveDocs();
System.out.println(" " + ((SegmentReader) sub).getSegmentInfo());
for(int docID=0;docID<sub.maxDoc();docID++) {
Document doc = sub.document(docID);
if (liveDocs == null || liveDocs.get(docID)) {
System.out.println(" docID=" + docID + " id:" + doc.get("id"));
} else {
System.out.println(" DEL docID=" + docID + " id:" + doc.get("id"));
}
}
}
}
private static int nextNonDeletedDoc(PostingsEnum it, Bits liveDocs) throws IOException {
int doc = it.nextDoc();
while (doc != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && liveDocs.get(doc) == false) {
doc = it.nextDoc();
}
return doc;
}
public void verifyEquals(DirectoryReader r1, DirectoryReader r2, String idField) throws Throwable {
if (VERBOSE) {
System.out.println("\nr1 docs:");
printDocs(r1);
System.out.println("\nr2 docs:");
printDocs(r2);
}
if (r1.numDocs() != r2.numDocs()) {
assert false: "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs();
}
boolean hasDeletes = !(r1.maxDoc()==r2.maxDoc() && r1.numDocs()==r1.maxDoc());
int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping
// create mapping from id2 space to id2 based on idField
if (FieldInfos.getIndexedFields(r1).isEmpty()) {
assertTrue(FieldInfos.getIndexedFields(r2).isEmpty());
return;
}
final Terms terms1 = MultiTerms.getTerms(r1, idField);
if (terms1 == null) {
assertTrue(MultiTerms.getTerms(r2, idField) == null);
return;
}
final TermsEnum termsEnum = terms1.iterator();
final Bits liveDocs1 = MultiBits.getLiveDocs(r1);
final Bits liveDocs2 = MultiBits.getLiveDocs(r2);
Terms terms2 = MultiTerms.getTerms(r2, idField);
if (terms2 == null) {
// make sure r1 is in fact empty (eg has only all
// deleted docs):
Bits liveDocs = MultiBits.getLiveDocs(r1);
PostingsEnum docs = null;
while(termsEnum.next() != null) {
docs = TestUtil.docs(random(), termsEnum, docs, PostingsEnum.NONE);
while(nextNonDeletedDoc(docs, liveDocs) != DocIdSetIterator.NO_MORE_DOCS) {
fail("r1 is not empty but r2 is");
}
}
return;
}
TermsEnum termsEnum2 = terms2.iterator();
PostingsEnum termDocs1 = null;
PostingsEnum termDocs2 = null;
while(true) {
BytesRef term = termsEnum.next();
//System.out.println("TEST: match id term=" + term);
if (term == null) {
break;
}
termDocs1 = TestUtil.docs(random(), termsEnum, termDocs1, PostingsEnum.NONE);
if (termsEnum2.seekExact(term)) {
termDocs2 = TestUtil.docs(random(), termsEnum2, termDocs2, PostingsEnum.NONE);
} else {
termDocs2 = null;
}
if (nextNonDeletedDoc(termDocs1, liveDocs1) == DocIdSetIterator.NO_MORE_DOCS) {
// This doc is deleted and wasn't replaced
assertTrue(termDocs2 == null || nextNonDeletedDoc(termDocs2, liveDocs2) == DocIdSetIterator.NO_MORE_DOCS);
continue;
}
int id1 = termDocs1.docID();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, nextNonDeletedDoc(termDocs1, liveDocs1));
assertTrue(nextNonDeletedDoc(termDocs2, liveDocs2) != DocIdSetIterator.NO_MORE_DOCS);
int id2 = termDocs2.docID();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, nextNonDeletedDoc(termDocs2, liveDocs2));
r2r1[id2] = id1;
// verify stored fields are equivalent
try {
verifyEquals(r1.document(id1), r2.document(id2));
} catch (Throwable t) {
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term="+ term);
System.out.println(" d1=" + r1.document(id1));
System.out.println(" d2=" + r2.document(id2));
throw t;
}
try {
// verify term vectors are equivalent
verifyEquals(r1.getTermVectors(id1), r2.getTermVectors(id2));
} catch (Throwable e) {
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
Fields tv1 = r1.getTermVectors(id1);
System.out.println(" d1=" + tv1);
if (tv1 != null) {
PostingsEnum dpEnum = null;
PostingsEnum dEnum = null;
for (String field : tv1) {
System.out.println(" " + field + ":");
Terms terms3 = tv1.terms(field);
assertNotNull(terms3);
TermsEnum termsEnum3 = terms3.iterator();
BytesRef term2;
while((term2 = termsEnum3.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
dpEnum = termsEnum3.postings(dpEnum, PostingsEnum.ALL);
if (terms3.hasPositions()) {
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
final int freq = dpEnum.freq();
System.out.println(" doc=" + dpEnum.docID() + " freq=" + freq);
for(int posUpto=0;posUpto<freq;posUpto++) {
System.out.println(" pos=" + dpEnum.nextPosition());
}
} else {
dEnum = TestUtil.docs(random(), termsEnum3, dEnum, PostingsEnum.FREQS);
assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
final int freq = dEnum.freq();
System.out.println(" doc=" + dEnum.docID() + " freq=" + freq);
}
}
}
}
Fields tv2 = r2.getTermVectors(id2);
System.out.println(" d2=" + tv2);
if (tv2 != null) {
PostingsEnum dpEnum = null;
PostingsEnum dEnum = null;
for (String field : tv2) {
System.out.println(" " + field + ":");
Terms terms3 = tv2.terms(field);
assertNotNull(terms3);
TermsEnum termsEnum3 = terms3.iterator();
BytesRef term2;
while((term2 = termsEnum3.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
dpEnum = termsEnum3.postings(dpEnum, PostingsEnum.ALL);
if (dpEnum != null) {
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
final int freq = dpEnum.freq();
System.out.println(" doc=" + dpEnum.docID() + " freq=" + freq);
for(int posUpto=0;posUpto<freq;posUpto++) {
System.out.println(" pos=" + dpEnum.nextPosition());
}
} else {
dEnum = TestUtil.docs(random(), termsEnum3, dEnum, PostingsEnum.FREQS);
assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
final int freq = dEnum.freq();
System.out.println(" doc=" + dEnum.docID() + " freq=" + freq);
}
}
}
}
throw e;
}
}
//System.out.println("TEST: done match id");
// Verify postings
//System.out.println("TEST: create te1");
final Iterator<String> fields1Enum = FieldInfos.getIndexedFields(r1).stream().sorted().iterator();
final Iterator<String> fields2Enum = FieldInfos.getIndexedFields(r2).stream().sorted().iterator();
String field1=null, field2=null;
TermsEnum termsEnum1 = null;
termsEnum2 = null;
PostingsEnum docs1=null, docs2=null;
// pack both doc and freq into single element for easy sorting
long[] info1 = new long[r1.numDocs()];
long[] info2 = new long[r2.numDocs()];
for(;;) {
BytesRef term1=null, term2=null;
// iterate until we get some docs
int len1;
for(;;) {
len1=0;
if (termsEnum1 == null) {
if (!fields1Enum.hasNext()) {
break;
}
field1 = fields1Enum.next();
Terms terms = MultiTerms.getTerms(r1, field1);
if (terms == null) {
continue;
}
termsEnum1 = terms.iterator();
}
term1 = termsEnum1.next();
if (term1 == null) {
// no more terms in this field
termsEnum1 = null;
continue;
}
//System.out.println("TEST: term1=" + term1);
docs1 = TestUtil.docs(random(), termsEnum1, docs1, PostingsEnum.FREQS);
while (docs1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int d = docs1.docID();
if (liveDocs1 != null && liveDocs1.get(d) == false) {
continue;
}
int f = docs1.freq();
info1[len1] = (((long)d)<<32) | f;
len1++;
}
if (len1>0) break;
}
// iterate until we get some docs
int len2;
for(;;) {
len2=0;
if (termsEnum2 == null) {
if (!fields2Enum.hasNext()) {
break;
}
field2 = fields2Enum.next();
Terms terms = MultiTerms.getTerms(r2, field2);
if (terms == null) {
continue;
}
termsEnum2 = terms.iterator();
}
term2 = termsEnum2.next();
if (term2 == null) {
// no more terms in this field
termsEnum2 = null;
continue;
}
//System.out.println("TEST: term1=" + term1);
docs2 = TestUtil.docs(random(), termsEnum2, docs2, PostingsEnum.FREQS);
while (docs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (liveDocs2 != null && liveDocs2.get(docs2.docID()) == false) {
continue;
}
int d = r2r1[docs2.docID()];
int f = docs2.freq();
info2[len2] = (((long)d)<<32) | f;
len2++;
}
if (len2>0) break;
}
assertEquals(len1, len2);
if (len1==0) break; // no more terms
assertEquals(field1, field2);
assertEquals(term1, term2);
if (!hasDeletes)
assertEquals(termsEnum1.docFreq(), termsEnum2.docFreq());
assertEquals("len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes, term1, term2);
// sort info2 to get it into ascending docid
Arrays.sort(info2, 0, len2);
// now compare
for (int i=0; i<len1; i++) {
assertEquals("i=" + i + " len=" + len1 + " d1=" + (info1[i]>>>32) + " f1=" + (info1[i]&Integer.MAX_VALUE) + " d2=" + (info2[i]>>>32) + " f2=" + (info2[i]&Integer.MAX_VALUE) +
" field=" + field1 + " term=" + term1.utf8ToString(),
info1[i],
info2[i]);
}
}
}
public static void verifyEquals(Document d1, Document d2) {
List<IndexableField> ff1 = new ArrayList<>(d1.getFields());
List<IndexableField> ff2 = new ArrayList<>(d2.getFields());
Collections.sort(ff1, fieldNameComparator);
Collections.sort(ff2, fieldNameComparator);
assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size());
for (int i=0; i<ff1.size(); i++) {
IndexableField f1 = ff1.get(i);
IndexableField f2 = ff2.get(i);
if (f1.binaryValue() != null) {
assert(f2.binaryValue() != null);
} else {
String s1 = f1.stringValue();
String s2 = f2.stringValue();
assertEquals(ff1 + " : " + ff2, s1,s2);
}
}
}
public static void verifyEquals(Fields d1, Fields d2) throws IOException {
if (d1 == null) {
assertTrue(d2 == null || d2.size() == 0);
return;
}
assertTrue(d2 != null);
Iterator<String> fieldsEnum2 = d2.iterator();
for (String field1 : d1) {
String field2 = fieldsEnum2.next();
assertEquals(field1, field2);
Terms terms1 = d1.terms(field1);
assertNotNull(terms1);
TermsEnum termsEnum1 = terms1.iterator();
Terms terms2 = d2.terms(field2);
assertNotNull(terms2);
TermsEnum termsEnum2 = terms2.iterator();
PostingsEnum dpEnum1 = null;
PostingsEnum dpEnum2 = null;
PostingsEnum dEnum1 = null;
PostingsEnum dEnum2 = null;
BytesRef term1;
while ((term1 = termsEnum1.next()) != null) {
BytesRef term2 = termsEnum2.next();
assertEquals(term1, term2);
assertEquals(termsEnum1.totalTermFreq(),
termsEnum2.totalTermFreq());
dpEnum1 = termsEnum1.postings(dpEnum1, PostingsEnum.ALL);
dpEnum2 = termsEnum2.postings(dpEnum2, PostingsEnum.ALL);
if (terms1.hasPositions()) {
assertTrue(terms2.hasPositions());
int docID1 = dpEnum1.nextDoc();
dpEnum2.nextDoc();
// docIDs are not supposed to be equal
//int docID2 = dpEnum2.nextDoc();
//assertEquals(docID1, docID2);
assertTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
int freq1 = dpEnum1.freq();
int freq2 = dpEnum2.freq();
assertEquals(freq1, freq2);
for(int posUpto=0;posUpto<freq1;posUpto++) {
int pos1 = dpEnum1.nextPosition();
int pos2 = dpEnum2.nextPosition();
assertEquals(pos1, pos2);
if (terms1.hasOffsets()) {
assertTrue(terms2.hasOffsets());
assertEquals(dpEnum1.startOffset(),
dpEnum2.startOffset());
assertEquals(dpEnum1.endOffset(),
dpEnum2.endOffset());
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.nextDoc());
} else {
dEnum1 = TestUtil.docs(random(), termsEnum1, dEnum1, PostingsEnum.FREQS);
dEnum2 = TestUtil.docs(random(), termsEnum2, dEnum2, PostingsEnum.FREQS);
assertNotNull(dEnum1);
assertNotNull(dEnum2);
int docID1 = dEnum1.nextDoc();
dEnum2.nextDoc();
// docIDs are not supposed to be equal
//int docID2 = dEnum2.nextDoc();
//assertEquals(docID1, docID2);
assertTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS);
int freq1 = dEnum1.freq();
int freq2 = dEnum2.freq();
assertEquals(freq1, freq2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dEnum1.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dEnum2.nextDoc());
}
}
assertNull(termsEnum2.next());
}
assertFalse(fieldsEnum2.hasNext());
}
private static class IndexingThread extends Thread {
IndexWriter w;
int base;
int range;
int iterations;
Map<String,Document> docs = new HashMap<>();
Random r;
public int nextInt(int lim) {
return r.nextInt(lim);
}
// start is inclusive and end is exclusive
public int nextInt(int start, int end) {
return start + r.nextInt(end-start);
}
char[] buffer = new char[100];
private int addUTF8Token(int start) {
final int end = start + nextInt(20);
if (buffer.length < 1+end) {
char[] newBuffer = new char[(int) ((1+end)*1.25)];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
for(int i=start;i<end;i++) {
int t = nextInt(5);
if (0 == t && i < end-1) {
// Make a surrogate pair
// High surrogate
buffer[i++] = (char) nextInt(0xd800, 0xdc00);
// Low surrogate
buffer[i] = (char) nextInt(0xdc00, 0xe000);
} else if (t <= 1)
buffer[i] = (char) nextInt(0x80);
else if (2 == t)
buffer[i] = (char) nextInt(0x80, 0x800);
else if (3 == t)
buffer[i] = (char) nextInt(0x800, 0xd800);
else if (4 == t)
buffer[i] = (char) nextInt(0xe000, 0xffff);
}
buffer[end] = ' ';
return 1+end;
}
public String getString(int nTokens) {
nTokens = nTokens!=0 ? nTokens : r.nextInt(4)+1;
// Half the time make a random UTF8 string
if (r.nextBoolean())
return getUTF8String(nTokens);
// avoid StringBuffer because it adds extra synchronization.
char[] arr = new char[nTokens*2];
for (int i=0; i<nTokens; i++) {
arr[i*2] = (char)('A' + r.nextInt(10));
arr[i*2+1] = ' ';
}
return new String(arr);
}
public String getUTF8String(int nTokens) {
int upto = 0;
Arrays.fill(buffer, (char) 0);
for(int i=0;i<nTokens;i++)
upto = addUTF8Token(upto);
return new String(buffer, 0, upto);
}
public String getIdString() {
return Integer.toString(base + nextInt(range));
}
public void indexDoc() throws IOException {
Document d = new Document();
FieldType customType1 = new FieldType(TextField.TYPE_STORED);
customType1.setTokenized(false);
customType1.setOmitNorms(true);
ArrayList<Field> fields = new ArrayList<>();
String idString = getIdString();
Field idField = newField("id", idString, customType1);
fields.add(idField);
Map<String,FieldType> tvTypes = new HashMap<>();
int nFields = nextInt(maxFields);
for (int i=0; i<nFields; i++) {
String fieldName = "f" + nextInt(100);
FieldType customType;
// Use the same term vector settings if we already
// added this field to the doc:
FieldType oldTVType = tvTypes.get(fieldName);
if (oldTVType != null) {
customType = new FieldType(oldTVType);
} else {
customType = new FieldType();
switch (nextInt(4)) {
case 0:
break;
case 1:
customType.setStoreTermVectors(true);
break;
case 2:
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
break;
case 3:
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
break;
}
FieldType newType = new FieldType(customType);
newType.freeze();
tvTypes.put(fieldName, newType);
}
switch (nextInt(4)) {
case 0:
customType.setStored(true);
customType.setOmitNorms(true);
customType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
customType.freeze();
fields.add(newField(fieldName, getString(1), customType));
break;
case 1:
customType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
customType.setTokenized(true);
customType.freeze();
fields.add(newField(fieldName, getString(0), customType));
break;
case 2:
customType.setStored(true);
customType.setStoreTermVectors(false);
customType.setStoreTermVectorOffsets(false);
customType.setStoreTermVectorPositions(false);
customType.freeze();
fields.add(newField(fieldName, getString(0), customType));
break;
case 3:
customType.setStored(true);
customType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
customType.setTokenized(true);
customType.freeze();
fields.add(newField(fieldName, getString(bigFieldSize), customType));
break;
}
}
if (sameFieldOrder) {
Collections.sort(fields, fieldNameComparator);
} else {
// random placement of id field also
Collections.swap(fields,nextInt(fields.size()), 0);
}
for (int i=0; i<fields.size(); i++) {
d.add(fields.get(i));
}
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
}
w.updateDocument(new Term("id", idString), d);
//System.out.println(Thread.currentThread().getName() + ": indexing "+d);
docs.put(idString, d);
}
public void deleteDoc() throws IOException {
String idString = getIdString();
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": del id:" + idString);
}
w.deleteDocuments(new Term("id", idString));
docs.remove(idString);
}
public void deleteByQuery() throws IOException {
String idString = getIdString();
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": del query id:" + idString);
}
w.deleteDocuments(new TermQuery(new Term("id", idString)));
docs.remove(idString);
}
@Override
public void run() {
try {
r = new Random(base+range+seed);
for (int i=0; i<iterations; i++) {
int what = nextInt(100);
if (what < 5) {
deleteDoc();
} else if (what < 10) {
deleteByQuery();
} else {
indexDoc();
}
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
synchronized (this) {
docs.size();
}
}
}
}