blob: 8c0f15fca7390e0e43adee3c2a15a4322128ec69 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
/**
* Abstract class to do basic tests for a docvalues format.
* NOTE: This test focuses on the docvalues impl, nothing else.
* The [stretch] goal is for this test to be
* so thorough in testing a new DocValuesFormat that if this
* test passes, then all Lucene/Solr tests should also pass. Ie,
* if there is some bug in a given DocValuesFormat that this
* test fails to catch then this test needs to be improved! */
public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTestCase {
@Override
protected void addRandomFields(Document doc) {
if (usually()) {
doc.add(new NumericDocValuesField("ndv", random().nextInt(1 << 12)));
doc.add(new BinaryDocValuesField("bdv", newBytesRef(TestUtil.randomSimpleString(random()))));
doc.add(
new SortedDocValuesField("sdv", newBytesRef(TestUtil.randomSimpleString(random(), 2))));
}
int numValues = random().nextInt(5);
for (int i = 0; i < numValues; ++i) {
doc.add(
new SortedSetDocValuesField(
"ssdv", newBytesRef(TestUtil.randomSimpleString(random(), 2))));
}
numValues = random().nextInt(5);
for (int i = 0; i < numValues; ++i) {
doc.add(new SortedNumericDocValuesField("sndv", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
}
}
public void testOneNumber() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv", 5));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
int docID = hits.scoreDocs[i].doc;
assertEquals(docID, dv.advance(docID));
assertEquals(5, dv.longValue());
}
ireader.close();
directory.close();
}
public void testOneFloat() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new FloatDocValuesField("dv", 5.7f));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(docID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(docID, dv.advance(docID));
assertEquals(Float.floatToRawIntBits(5.7f), dv.longValue());
}
ireader.close();
directory.close();
}
public void testTwoNumbers() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new NumericDocValuesField("dv2", 17));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(docID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(docID, dv.advance(docID));
assertEquals(5, dv.longValue());
dv = ireader.leaves().get(0).reader().getNumericDocValues("dv2");
assertEquals(docID, dv.advance(docID));
assertEquals(17, dv.longValue());
}
ireader.close();
directory.close();
}
public void testTwoBinaryValues() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", newBytesRef(longTerm)));
doc.add(new BinaryDocValuesField("dv2", newBytesRef(text)));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int hitDocID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(hitDocID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv1");
assertEquals(hitDocID, dv.advance(hitDocID));
BytesRef scratch = dv.binaryValue();
assertEquals(newBytesRef(longTerm), scratch);
dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
assertEquals(hitDocID, dv.advance(hitDocID));
scratch = dv.binaryValue();
assertEquals(newBytesRef(text), scratch);
}
ireader.close();
directory.close();
}
public void testVariouslyCompressibleBinaryValues() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
int numDocs = 1 + random().nextInt(100);
HashMap<Integer,BytesRef> writtenValues = new HashMap<>(numDocs);
// Small vocabulary ranges will be highly compressible
int vocabRange = 1 + random().nextInt(Byte.MAX_VALUE - 1);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
// Generate random-sized byte array with random choice of bytes in vocab range
byte[] value = new byte[500 + random().nextInt(1024)];
for (int j = 0; j < value.length; j++) {
value[j] = (byte) random().nextInt(vocabRange);
}
BytesRef bytesRef = newBytesRef(value);
writtenValues.put(i, bytesRef);
doc.add(newTextField("id", Integer.toString(i), Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", bytesRef));
iwriter.addDocument(doc);
}
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
Query query = new TermQuery(new Term("id", id));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
int hitDocID = hits.scoreDocs[0].doc;
Document hitDoc = isearcher.doc(hitDocID);
assertEquals(id, hitDoc.get("id"));
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv1");
assertEquals(hitDocID, dv.advance(hitDocID));
BytesRef scratch = dv.binaryValue();
assertEquals(writtenValues.get(i), scratch);
}
ireader.close();
directory.close();
}
public void testTwoFieldsMixed() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new BinaryDocValuesField("dv2", newBytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(docID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(docID, dv.advance(docID));
assertEquals(5, dv.longValue());
BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
assertEquals(docID, dv2.advance(docID));
assertEquals(newBytesRef("hello world"), dv2.binaryValue());
}
ireader.close();
directory.close();
}
public void testThreeFieldsMixed() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new SortedDocValuesField("dv1", newBytesRef("hello hello")));
doc.add(new NumericDocValuesField("dv2", 5));
doc.add(new BinaryDocValuesField("dv3", newBytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(docID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv1");
assertEquals(docID, dv.advance(docID));
int ord = dv.ordValue();
BytesRef scratch = dv.lookupOrd(ord);
assertEquals(newBytesRef("hello hello"), scratch);
NumericDocValues dv2 = ireader.leaves().get(0).reader().getNumericDocValues("dv2");
assertEquals(docID, dv2.advance(docID));
assertEquals(5, dv2.longValue());
BinaryDocValues dv3 = ireader.leaves().get(0).reader().getBinaryDocValues("dv3");
assertEquals(docID, dv3.advance(docID));
assertEquals(newBytesRef("hello world"), dv3.binaryValue());
}
ireader.close();
directory.close();
}
public void testThreeFieldsMixed2() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", newBytesRef("hello world")));
doc.add(new SortedDocValuesField("dv2", newBytesRef("hello hello")));
doc.add(new NumericDocValuesField("dv3", 5));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
BytesRef scratch = newBytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(docID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv2");
assertEquals(docID, dv.advance(docID));
int ord = dv.ordValue();
scratch = dv.lookupOrd(ord);
assertEquals(newBytesRef("hello hello"), scratch);
NumericDocValues dv2 = ireader.leaves().get(0).reader().getNumericDocValues("dv3");
assertEquals(docID, dv2.advance(docID));
assertEquals(5, dv2.longValue());
BinaryDocValues dv3 = ireader.leaves().get(0).reader().getBinaryDocValues("dv1");
assertEquals(docID, dv3.advance(docID));
assertEquals(newBytesRef("hello world"), dv3.binaryValue());
}
ireader.close();
directory.close();
}
public void testTwoDocumentsNumeric() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv", 1));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("dv", 2));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(1, dv.longValue());
assertEquals(1, dv.nextDoc());
assertEquals(2, dv.longValue());
ireader.close();
directory.close();
}
public void testTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(newField("id", "0", StringField.TYPE_STORED));
doc.add(new NumericDocValuesField("dv", -10));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new NumericDocValuesField("dv", 99));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
for(int i=0;i<2;i++) {
Document doc2 = ireader.leaves().get(0).reader().document(i);
long expected;
if (doc2.get("id").equals("0")) {
expected = -10;
} else {
expected = 99;
}
assertEquals(i, dv.nextDoc());
assertEquals(expected, dv.longValue());
}
ireader.close();
directory.close();
}
public void testBigNumericRange() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv", Long.MIN_VALUE));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("dv", Long.MAX_VALUE));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(Long.MIN_VALUE, dv.longValue());
assertEquals(1, dv.nextDoc());
assertEquals(Long.MAX_VALUE, dv.longValue());
ireader.close();
directory.close();
}
public void testBigNumericRange2() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv", -8841491950446638677L));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("dv", 9062230939892376225L));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(-8841491950446638677L, dv.longValue());
assertEquals(1, dv.nextDoc());
assertEquals(9062230939892376225L, dv.longValue());
ireader.close();
directory.close();
}
public void testBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new BinaryDocValuesField("dv", newBytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int hitDocID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(hitDocID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(hitDocID, dv.advance(hitDocID));
assertEquals(newBytesRef("hello world"), dv.binaryValue());
}
ireader.close();
directory.close();
}
public void testBytesTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(newField("id", "0", StringField.TYPE_STORED));
doc.add(new BinaryDocValuesField("dv", newBytesRef("hello world 1")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new BinaryDocValuesField("dv", newBytesRef("hello 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
for(int i=0;i<2;i++) {
Document doc2 = ireader.leaves().get(0).reader().document(i);
String expected;
if (doc2.get("id").equals("0")) {
expected = "hello world 1";
} else {
expected = "hello 2";
}
assertEquals(i, dv.nextDoc());
assertEquals(expected, dv.binaryValue().utf8ToString());
}
ireader.close();
directory.close();
}
public void testBytesMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new BinaryDocValuesField("field", newBytesRef("hi")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
BinaryDocValues dv = getOnlyLeafReader(ireader).getBinaryDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
public void testSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
BytesRef scratch = newBytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(docID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(docID, dv.advance(docID));
scratch = dv.lookupOrd(dv.ordValue());
assertEquals(newBytesRef("hello world"), scratch);
}
ireader.close();
directory.close();
}
public void testSortedBytesTwoDocuments() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 1")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = newBytesRef();
assertEquals(0, dv.nextDoc());
scratch = dv.lookupOrd(dv.ordValue());
assertEquals("hello world 1", scratch.utf8ToString());
assertEquals(1, dv.nextDoc());
scratch = dv.lookupOrd(dv.ordValue());
assertEquals("hello world 2", scratch.utf8ToString());
ireader.close();
directory.close();
}
public void testSortedBytesThreeDocuments() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 1")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 2")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 1")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(2, dv.getValueCount());
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.ordValue());
BytesRef scratch = dv.lookupOrd(0);
assertEquals("hello world 1", scratch.utf8ToString());
assertEquals(1, dv.nextDoc());
assertEquals(1, dv.ordValue());
scratch = dv.lookupOrd(1);
assertEquals("hello world 2", scratch.utf8ToString());
assertEquals(2, dv.nextDoc());
assertEquals(0, dv.ordValue());
ireader.close();
directory.close();
}
public void testSortedBytesTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(newField("id", "0", StringField.TYPE_STORED));
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 1")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(2, dv.getValueCount()); // 2 ords
assertEquals(0, dv.nextDoc());
BytesRef scratch = dv.lookupOrd(dv.ordValue());
assertEquals(newBytesRef("hello world 1"), scratch);
scratch = dv.lookupOrd(1);
assertEquals(newBytesRef("hello world 2"), scratch);
for (int i = 0; i < 2; i++) {
Document doc2 = ireader.leaves().get(0).reader().document(i);
String expected;
if (doc2.get("id").equals("0")) {
expected = "hello world 1";
} else {
expected = "hello world 2";
}
if (dv.docID() < i) {
assertEquals(i, dv.nextDoc());
}
scratch = dv.lookupOrd(dv.ordValue());
assertEquals(expected, scratch.utf8ToString());
}
ireader.close();
directory.close();
}
public void testSortedMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
public void testBytesWithNewline() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new BinaryDocValuesField("dv", newBytesRef("hello\nworld\r1")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(newBytesRef("hello\nworld\r1"), dv.binaryValue());
ireader.close();
directory.close();
}
public void testMissingSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("hello world 2")));
iwriter.addDocument(doc);
// 2nd doc missing the DV field
iwriter.addDocument(new Document());
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(0, dv.nextDoc());
BytesRef scratch = dv.lookupOrd(dv.ordValue());
assertEquals(newBytesRef("hello world 2"), scratch);
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
public void testSortedTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", newBytesRef("world")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", newBytesRef("beer")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(newBytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(newBytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(newBytesRef("zzz")));
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(newBytesRef("aba")));
assertEquals(0, termsEnum.ord());
// seekExact()
assertTrue(termsEnum.seekExact(newBytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(newBytesRef("hello")));
assertEquals(Codec.getDefault().toString(), "hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(newBytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(newBytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
// NORMAL automaton
termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
assertNull(termsEnum.next());
// SINGLE automaton
termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertNull(termsEnum.next());
ireader.close();
directory.close();
}
public void testEmptySortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.ordValue());
assertEquals(1, dv.nextDoc());
assertEquals(0, dv.ordValue());
BytesRef scratch = dv.lookupOrd(0);
assertEquals("", scratch.utf8ToString());
ireader.close();
directory.close();
}
public void testEmptyBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new BinaryDocValuesField("dv", newBytesRef("")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new BinaryDocValuesField("dv", newBytesRef("")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals("", dv.binaryValue().utf8ToString());
assertEquals(1, dv.nextDoc());
assertEquals("", dv.binaryValue().utf8ToString());
ireader.close();
directory.close();
}
public void testVeryLargeButLegalBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
byte bytes[] = new byte[32766];
random().nextBytes(bytes);
BytesRef b = newBytesRef(bytes);
doc.add(new BinaryDocValuesField("dv", b));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(newBytesRef(bytes), dv.binaryValue());
ireader.close();
directory.close();
}
public void testVeryLargeButLegalSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
byte bytes[] = new byte[32766];
random().nextBytes(bytes);
BytesRef b = newBytesRef(bytes);
doc.add(new SortedDocValuesField("dv", b));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = DocValues.getSorted(ireader.leaves().get(0).reader(), "dv");
assertEquals(0, dv.nextDoc());
assertEquals(newBytesRef(bytes), dv.lookupOrd(dv.ordValue()));
ireader.close();
directory.close();
}
public void testCodecUsesOwnBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new BinaryDocValuesField("dv", newBytesRef("boo!")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals("boo!", dv.binaryValue().utf8ToString());
ireader.close();
directory.close();
}
public void testCodecUsesOwnSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", newBytesRef("boo!")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = DocValues.getBinary(ireader.leaves().get(0).reader(), "dv");
byte mybytes[] = new byte[20];
assertEquals(0, dv.nextDoc());
assertEquals("boo!", dv.binaryValue().utf8ToString());
assertFalse(dv.binaryValue().bytes == mybytes);
ireader.close();
directory.close();
}
/*
* Simple test case to show how to use the API
*/
public void testDocValuesSimple() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("docId", i));
doc.add(new TextField("docId", "" + i, Field.Store.NO));
writer.addDocument(doc);
}
writer.commit();
writer.forceMerge(1, true);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 10);
assertEquals(5, search.totalHits.value);
ScoreDoc[] scoreDocs = search.scoreDocs;
NumericDocValues docValues = getOnlyLeafReader(reader).getNumericDocValues("docId");
for (int i = 0; i < scoreDocs.length; i++) {
assertEquals(i, scoreDocs[i].doc);
assertEquals(i, docValues.advance(i));
assertEquals(i, docValues.longValue());
}
reader.close();
dir.close();
}
public void testRandomSortedBytes() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
int numDocs = atLeast(100);
BytesRefHash hash = new BytesRefHash();
Map<String, String> docToString = new HashMap<>();
int maxLength = TestUtil.nextInt(random(), 1, 50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newTextField("id", "" + i, Field.Store.YES));
String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
BytesRef br = newBytesRef(string);
doc.add(new SortedDocValuesField("field", br));
hash.add(br);
docToString.put("" + i, string);
w.addDocument(doc);
}
if (rarely()) {
w.commit();
}
int numDocsNoValue = atLeast(10);
for (int i = 0; i < numDocsNoValue; i++) {
Document doc = new Document();
doc.add(newTextField("id", "noValue", Field.Store.YES));
w.addDocument(doc);
}
if (rarely()) {
w.commit();
}
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String id = "" + i + numDocs;
doc.add(newTextField("id", id, Field.Store.YES));
String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
BytesRef br = newBytesRef(string);
hash.add(br);
docToString.put(id, string);
doc.add(new SortedDocValuesField("field", br));
w.addDocument(doc);
}
w.commit();
IndexReader reader = w.getReader();
SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
int[] sort = hash.sort();
BytesRef expected = newBytesRef();
assertEquals(hash.size(), docValues.getValueCount());
for (int i = 0; i < hash.size(); i++) {
hash.get(sort[i], expected);
final BytesRef actual = docValues.lookupOrd(i);
assertEquals(expected.utf8ToString(), actual.utf8ToString());
int ord = docValues.lookupTerm(expected);
assertEquals(i, ord);
}
Set<Entry<String, String>> entrySet = docToString.entrySet();
for (Entry<String, String> entry : entrySet) {
// pk lookup
PostingsEnum termPostingsEnum =
TestUtil.docs(random(), reader, "id", newBytesRef(entry.getKey()), null, 0);
int docId = termPostingsEnum.nextDoc();
expected = newBytesRef(entry.getValue());
docValues = MultiDocValues.getSortedValues(reader, "field");
assertEquals(docId, docValues.advance(docId));
final BytesRef actual = docValues.binaryValue();
assertEquals(expected, actual);
}
reader.close();
w.close();
dir.close();
}
private void doTestNumericsVsStoredFields(double density, LongSupplier longs) throws Exception {
doTestNumericsVsStoredFields(density, longs, 256);
}
private void doTestNumericsVsStoredFields(double density, LongSupplier longs, int minDocs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = newStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast((int) (minDocs*1.172));
// numDocs should be always > 256 so that in case of a codec that optimizes
// for numbers of values <= 256, all storage layouts are tested
assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
idField.setStringValue(Integer.toString(i));
long value = longs.getAsLong();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// merge some segments and ensure that at least one of them has more than
// max(256, minDocs) values
writer.forceMerge(numDocs / Math.max(256, minDocs));
writer.close();
// compare
assertDVIterate(dir);
dir.close();
}
// Asserts equality of stored value vs. DocValue by iterating DocValues one at a time
protected void assertDVIterate(Directory dir) throws IOException {
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
NumericDocValues docValues = DocValues.getNumeric(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
String storedValue = r.document(i).get("stored");
if (storedValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(Long.parseLong(storedValue), docValues.longValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
}
private void doTestSortedNumericsVsStoredFields(LongSupplier counts, LongSupplier values) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
int numDocs = atLeast(300);
// numDocs should be always > 256 so that in case of a codec that optimizes
// for numbers of values <= 256, all storage layouts are tested
assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Field.Store.NO));
int valueCount = (int) counts.getAsLong();
long valueArray[] = new long[valueCount];
for (int j = 0; j < valueCount; j++) {
long value = values.getAsLong();
valueArray[j] = value;
doc.add(new SortedNumericDocValuesField("dv", value));
}
Arrays.sort(valueArray);
for (int j = 0; j < valueCount; j++) {
doc.add(new StoredField("stored", Long.toString(valueArray[j])));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.forceMerge(numDocs / 256);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
for (int i = 0; i < r.maxDoc(); i++) {
if (i > docValues.docID()) {
docValues.nextDoc();
}
String expected[] = r.document(i).getValues("stored");
if (i < docValues.docID()) {
assertEquals(0, expected.length);
} else {
String actual[] = new String[docValues.docValueCount()];
for (int j = 0; j < actual.length; j++) {
actual[j] = Long.toString(docValues.nextValue());
}
assertArrayEquals(expected, actual);
}
}
}
ir.close();
dir.close();
}
public void testBooleanNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(1, () -> random().nextInt(2));
}
}
public void testSparseBooleanNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(random().nextDouble(), () -> random().nextInt(2));
}
}
public void testByteNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(1, () -> TestUtil.nextInt(random(), Byte.MIN_VALUE, Byte.MAX_VALUE));
}
}
public void testSparseByteNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(random().nextDouble(), () -> TestUtil.nextInt(random(), Byte.MIN_VALUE, Byte.MAX_VALUE));
}
}
public void testShortNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(1, () -> TestUtil.nextInt(random(), Short.MIN_VALUE, Short.MAX_VALUE));
}
}
public void testSparseShortNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(random().nextDouble(), () -> TestUtil.nextInt(random(), Short.MIN_VALUE, Short.MAX_VALUE));
}
}
public void testIntNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(1, random()::nextInt);
}
}
public void testSparseIntNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(random().nextDouble(), random()::nextInt);
}
}
public void testLongNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(1, random()::nextLong);
}
}
public void testSparseLongNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(random().nextDouble(), random()::nextLong);
}
}
private void doTestBinaryVsStoredFields(double density, Supplier<byte[]> bytes) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", new byte[0]);
Field dvField = new BinaryDocValuesField("dv", newBytesRef());
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
idField.setStringValue(Integer.toString(i));
byte[] buffer = bytes.get();
storedField.setBytesValue(buffer);
dvField.setBytesValue(buffer);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
// compare again
writer.forceMerge(1);
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
writer.close();
dir.close();
}
public void testBinaryFixedLengthVsStoredFields() throws Exception {
doTestBinaryFixedLengthVsStoredFields(1);
}
public void testSparseBinaryFixedLengthVsStoredFields() throws Exception {
doTestBinaryFixedLengthVsStoredFields(random().nextDouble());
}
private void doTestBinaryFixedLengthVsStoredFields(double density) throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 0, 10);
doTestBinaryVsStoredFields(density, () -> {
byte buffer[] = new byte[fixedLength];
random().nextBytes(buffer);
return buffer;
});
}
}
public void testBinaryVariableLengthVsStoredFields() throws Exception {
doTestBinaryVariableLengthVsStoredFields(1);
}
public void testSparseBinaryVariableLengthVsStoredFields() throws Exception {
doTestBinaryVariableLengthVsStoredFields(random().nextDouble());
}
public void doTestBinaryVariableLengthVsStoredFields(double density) throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestBinaryVsStoredFields(density, () -> {
final int length = random().nextInt(10);
byte buffer[] = new byte[length];
random().nextBytes(buffer);
return buffer;
});
}
}
protected void doTestSortedVsStoredFields(int numDocs, double density, Supplier<byte[]> bytes) throws Exception {
Directory dir = newFSDirectory(createTempDir("dvduel"));
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", new byte[0]);
Field dvField = new SortedDocValuesField("dv", newBytesRef());
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
idField.setStringValue(Integer.toString(i));
byte[] buffer = bytes.get();
storedField.setBytesValue(buffer);
dvField.setBytesValue(buffer);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
writer.forceMerge(1);
// compare again
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
writer.close();
dir.close();
}
public void testSortedFixedLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedVsStoredFields(atLeast(300), 1, fixedLength, fixedLength);
}
}
public void testSparseSortedFixedLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedVsStoredFields(atLeast(300), random().nextDouble(), fixedLength, fixedLength);
}
}
public void testSortedVariableLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedVsStoredFields(atLeast(300), 1, 1, 10);
}
}
public void testSparseSortedVariableLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedVsStoredFields(atLeast(300), random().nextDouble(), 1, 10);
}
}
protected void doTestSortedVsStoredFields(int numDocs, double density, int minLength, int maxLength) throws Exception {
doTestSortedVsStoredFields(numDocs, density, () -> {
int length = TestUtil.nextInt(random(), minLength, maxLength);
byte[] buffer = new byte[length];
random().nextBytes(buffer);
return buffer;
});
}
public void testSortedSetOneValue() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoFields() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
doc.add(new SortedSetDocValuesField("field2", newBytesRef("world")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field2");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsMerged() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("world")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(2, dv.getValueCount());
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
assertEquals(1, dv.nextDoc());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
bytes = dv.lookupOrd(1);
assertEquals(newBytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoValues() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", newBytesRef("world")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
bytes = dv.lookupOrd(1);
assertEquals(newBytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoValuesUnordered() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("world")));
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
bytes = dv.lookupOrd(1);
assertEquals(newBytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetThreeValuesTwoDocs() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", newBytesRef("world")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", newBytesRef("beer")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(3, dv.getValueCount());
assertEquals(0, dv.nextDoc());
assertEquals(1, dv.nextOrd());
assertEquals(2, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
assertEquals(1, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("beer"), bytes);
bytes = dv.lookupOrd(1);
assertEquals(newBytesRef("hello"), bytes);
bytes = dv.lookupOrd(2);
assertEquals(newBytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsLastMissing() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
doc = new Document();
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsFirstMissing() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
assertEquals(1, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
assertEquals(1, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(newBytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.getValueCount());
ireader.close();
directory.close();
}
public void testSortedSetTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", newBytesRef("world")));
doc.add(new SortedSetDocValuesField("field", newBytesRef("beer")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(newBytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(newBytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(newBytesRef("zzz")));
// seekExact()
assertTrue(termsEnum.seekExact(newBytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(newBytesRef("hello")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(newBytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(newBytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
// NORMAL automaton
termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
assertNull(termsEnum.next());
// SINGLE automaton
termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertNull(termsEnum.next());
ireader.close();
directory.close();
}
protected void doTestSortedSetVsStoredFields(int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues) throws Exception {
Directory dir = newFSDirectory(createTempDir("dvduel"));
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Set<String> valueSet = new HashSet<String>();
for (int i = 0; i < 10000 && valueSet.size() < maxUniqueValues; ++i) {
final int length = TestUtil.nextInt(random(), minLength, maxLength);
valueSet.add(TestUtil.randomSimpleString(random(), length));
}
String[] uniqueValues = valueSet.toArray(new String[0]);
// index some docs
if (VERBOSE) {
System.out.println("\nTEST: now add numDocs=" + numDocs);
}
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
// create a random set of strings
Set<String> values = new TreeSet<>();
for (int v = 0; v < numValues; v++) {
values.add(RandomPicks.randomFrom(random(), uniqueValues));
}
// add ordered to the stored field
for (String v : values) {
doc.add(new StoredField("stored", v));
}
// add in any order to the dv field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : unordered) {
doc.add(new SortedSetDocValuesField("dv", newBytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
if (VERBOSE) {
System.out.println("\nTEST: now delete " + numDeletions + " docs");
}
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
if (VERBOSE) {
System.out.println("\nTEST: now get reader");
}
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String stringValues[] = r.document(i).getValues("stored");
if (docValues != null) {
if (docValues.docID() < i) {
docValues.nextDoc();
}
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
for (int j = 0; j < stringValues.length; j++) {
assert docValues != null;
long ord = docValues.nextOrd();
assert ord != NO_MORE_ORDS;
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValues[j], scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
if (VERBOSE) {
System.out.println("\nTEST: now close reader");
}
ir.close();
if (VERBOSE) {
System.out.println("TEST: force merge");
}
writer.forceMerge(1);
// compare again
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String stringValues[] = r.document(i).getValues("stored");
if (docValues.docID() < i) {
docValues.nextDoc();
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
for (int j = 0; j < stringValues.length; j++) {
assert docValues != null;
long ord = docValues.nextOrd();
assert ord != NO_MORE_ORDS;
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValues[j], scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
if (VERBOSE) {
System.out.println("TEST: close reader");
}
ir.close();
if (VERBOSE) {
System.out.println("TEST: close writer");
}
writer.close();
if (VERBOSE) {
System.out.println("TEST: close dir");
}
dir.close();
}
public void testSortedSetFixedLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 16, 100);
}
}
public void testSortedNumericsSingleValuedVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedNumericsVsStoredFields(
() -> 1,
random()::nextLong
);
}
}
public void testSortedNumericsSingleValuedMissingVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedNumericsVsStoredFields(
() -> random().nextBoolean() ? 0 : 1,
random()::nextLong
);
}
}
public void testSortedNumericsMultipleValuesVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedNumericsVsStoredFields(
() -> TestUtil.nextLong(random(), 0, 50),
random()::nextLong
);
}
}
public void testSortedNumericsFewUniqueSetsVsStoredFields() throws Exception {
final long[] values = new long[TestUtil.nextInt(random(), 2, 6)];
for (int i = 0; i < values.length; ++i) {
values[i] = random().nextLong();
}
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedNumericsVsStoredFields(
() -> TestUtil.nextLong(random(), 0, 6),
() -> values[random().nextInt(values.length)]
);
}
}
public void testSortedSetVariableLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 16, 100);
}
}
public void testSortedSetFixedLengthSingleValuedVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 1, 100);
}
}
public void testSortedSetVariableLengthSingleValuedVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 1, 100);
}
}
public void testSortedSetFixedLengthFewUniqueSetsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(atLeast(300), 10, 10, 6, 6);
}
}
public void testSortedSetVariableLengthFewUniqueSetsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 6, 6);
}
}
public void testSortedSetVariableLengthManyValuesPerDocVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(atLeast(20), 1, 10, 500, 1000);
}
}
public void testSortedSetFixedLengthManyValuesPerDocVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(atLeast(20), 10, 10, 500, 1000);
}
}
public void testGCDCompression() throws Exception {
doTestGCDCompression(1);
}
public void testSparseGCDCompression() throws Exception {
doTestGCDCompression(random().nextDouble());
}
private void doTestGCDCompression(double density) throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
final long min = - (((long) random().nextInt(1 << 30)) << 32);
final long mul = random().nextInt() & 0xFFFFFFFFL;
final LongSupplier longs = () -> {
return min + mul * random().nextInt(1 << 20);
};
doTestNumericsVsStoredFields(density, longs);
}
}
public void testZeros() throws Exception {
doTestNumericsVsStoredFields(1, () -> 0);
}
public void testSparseZeros() throws Exception {
doTestNumericsVsStoredFields(random().nextDouble(), () -> 0);
}
public void testZeroOrMin() throws Exception {
// try to make GCD compression fail if the format did not anticipate that
// the GCD of 0 and MIN_VALUE is negative
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
final LongSupplier longs = () -> {
return random().nextBoolean() ? 0 : Long.MIN_VALUE;
};
doTestNumericsVsStoredFields(1, longs);
}
}
public void testTwoNumbersOneMissing() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 0));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.longValue());
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ir.close();
directory.close();
}
public void testTwoNumbersOneMissingWithMerging() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 0));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.longValue());
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ir.close();
directory.close();
}
public void testThreeNumbersOneMissingWithMerging() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 0));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "2", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.longValue());
assertEquals(2, dv.nextDoc());
assertEquals(5, dv.longValue());
ir.close();
directory.close();
}
public void testTwoBytesOneMissing() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", newBytesRef()));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
assertEquals(0, dv.nextDoc());
assertEquals(newBytesRef(), dv.binaryValue());
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ir.close();
directory.close();
}
public void testTwoBytesOneMissingWithMerging() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", newBytesRef()));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
assertEquals(0, dv.nextDoc());
assertEquals(newBytesRef(), dv.binaryValue());
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ir.close();
directory.close();
}
public void testThreeBytesOneMissingWithMerging() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", newBytesRef()));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "2", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", newBytesRef("boo")));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
LeafReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
assertEquals(0, dv.nextDoc());
assertEquals(newBytesRef(), dv.binaryValue());
assertEquals(2, dv.nextDoc());
assertEquals(newBytesRef("boo"), dv.binaryValue());
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ir.close();
directory.close();
}
/** Tests dv against stored fields with threads (binary/numeric/sorted, no missing) */
public void testThreads() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", newBytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", newBytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
doc.add(idField);
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
doc.add(storedNumericField);
doc.add(dvNumericField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte buffer[] = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread threads[] = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
assertEquals(j, binaries.nextDoc());
BytesRef scratch = binaries.binaryValue();
assertEquals(binaryValue, scratch);
assertEquals(j, sorted.nextDoc());
scratch = sorted.binaryValue();
assertEquals(binaryValue, scratch);
String expected = r.document(j).get("storedNum");
assertEquals(j, numerics.nextDoc());
assertEquals(Long.parseLong(expected), numerics.longValue());
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
/** Tests dv against stored fields with threads (all types + missing) */
@Nightly
public void testThreads2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", newBytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", newBytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
// index some docs
int numDocs = TestUtil.nextInt(random(), 1025, 2047);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte buffer[] = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
Document doc = new Document();
doc.add(idField);
if (random().nextInt(4) > 0) {
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
}
if (random().nextInt(4) > 0) {
doc.add(storedNumericField);
doc.add(dvNumericField);
}
int numSortedSetFields = random().nextInt(3);
Set<String> values = new TreeSet<>();
for (int j = 0; j < numSortedSetFields; j++) {
values.add(TestUtil.randomSimpleString(random()));
}
for (String v : values) {
doc.add(new SortedSetDocValuesField("dvSortedSet", newBytesRef(v)));
doc.add(new StoredField("storedSortedSet", v));
}
int numSortedNumericFields = random().nextInt(3);
Set<Long> numValues = new TreeSet<>();
for (int j = 0; j < numSortedNumericFields; j++) {
numValues.add(TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE));
}
for (Long l : numValues) {
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", l));
doc.add(new StoredField("storedSortedNumeric", Long.toString(l)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread threads[] = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet");
SortedNumericDocValues sortedNumeric = r.getSortedNumericDocValues("dvSortedNumeric");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
if (binaryValue != null) {
if (binaries != null) {
assertEquals(j, binaries.nextDoc());
BytesRef scratch = binaries.binaryValue();
assertEquals(binaryValue, scratch);
assertEquals(j, sorted.nextDoc());
scratch = sorted.binaryValue();
assertEquals(binaryValue, scratch);
}
}
String number = r.document(j).get("storedNum");
if (number != null) {
if (numerics != null) {
assertEquals(j, numerics.advance(j));
assertEquals(Long.parseLong(number), numerics.longValue());
}
}
String values[] = r.document(j).getValues("storedSortedSet");
if (values.length > 0) {
assertNotNull(sortedSet);
assertEquals(j, sortedSet.nextDoc());
for (int k = 0; k < values.length; k++) {
long ord = sortedSet.nextOrd();
assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
BytesRef value = sortedSet.lookupOrd(ord);
assertEquals(values[k], value.utf8ToString());
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
}
String numValues[] = r.document(j).getValues("storedSortedNumeric");
if (numValues.length > 0) {
assertNotNull(sortedNumeric);
assertEquals(j, sortedNumeric.nextDoc());
assertEquals(numValues.length, sortedNumeric.docValueCount());
for (int k = 0; k < numValues.length; k++) {
long v = sortedNumeric.nextValue();
assertEquals(numValues[k], Long.toString(v));
}
}
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
@Nightly
public void testThreads3() throws Exception {
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
int numSortedSets = random().nextInt(21);
int numBinaries = random().nextInt(21);
int numSortedNums = random().nextInt(21);
int numDocs = TestUtil.nextInt(random(), 2025, 2047);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
for (int j = 0; j < numSortedSets; j++) {
doc.add(
new SortedSetDocValuesField(
"ss" + j, newBytesRef(TestUtil.randomSimpleString(random()))));
doc.add(
new SortedSetDocValuesField(
"ss" + j, newBytesRef(TestUtil.randomSimpleString(random()))));
}
for (int j = 0; j < numBinaries; j++) {
doc.add(
new BinaryDocValuesField("b" + j, newBytesRef(TestUtil.randomSimpleString(random()))));
}
for (int j = 0; j < numSortedNums; j++) {
doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
}
writer.addDocument(doc);
}
writer.close();
// now check with threads
for (int i = 0; i < 10; i++) {
final DirectoryReader r = DirectoryReader.open(dir);
final CountDownLatch startingGun = new CountDownLatch(1);
Thread threads[] = new Thread[TestUtil.nextInt(random(), 4, 10)];
for (int tid = 0; tid < threads.length; tid++) {
threads[tid] = new Thread() {
@Override
public void run() {
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);
startingGun.await();
for (LeafReaderContext leaf : r.leaves()) {
DocValuesStatus status = CheckIndex.testDocValues((SegmentReader)leaf.reader(), infoStream, true);
if (status.error != null) {
throw status.error;
}
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
};
}
for (int tid = 0; tid < threads.length; tid++) {
threads[tid].start();
}
startingGun.countDown();
for (int tid = 0; tid < threads.length; tid++) {
threads[tid].join();
}
r.close();
}
dir.close();
}
// LUCENE-5218
public void testEmptyBinaryValueOnPageSizes() throws Exception {
// Test larger and larger power-of-two sized values,
// followed by empty string value:
for(int i=0;i<20;i++) {
if (i > 14 && codecAcceptsHugeBinaryValues("field") == false) {
break;
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
BytesRef bytes = newBytesRef(new byte[1 << i], 0, 1 << i);
for (int j = 0; j < 4; j++) {
Document doc = new Document();
doc.add(new BinaryDocValuesField("field", bytes));
w.addDocument(doc);
}
Document doc = new Document();
doc.add(new StoredField("id", "5"));
doc.add(new BinaryDocValuesField("field", newBytesRef()));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
BinaryDocValues values = MultiDocValues.getBinaryValues(r, "field");
for(int j=0;j<5;j++) {
assertEquals(j, values.nextDoc());
BytesRef result = values.binaryValue();
assertTrue(result.length == 0 || result.length == 1<<i);
}
r.close();
dir.close();
}
}
public void testOneSortedNumber() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("dv", 5));
writer.addDocument(doc);
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(1, dv.docValueCount());
assertEquals(5, dv.nextValue());
reader.close();
directory.close();
}
public void testOneSortedNumberOneMissing() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(null));
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("dv", 5));
writer.addDocument(doc);
writer.addDocument(new Document());
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(1, dv.docValueCount());
assertEquals(5, dv.nextValue());
assertEquals(NO_MORE_DOCS, dv.nextDoc());
reader.close();
directory.close();
}
public void testNumberMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new NumericDocValuesField("field", 5));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
public void testTwoSortedNumber() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("dv", 11));
doc.add(new SortedNumericDocValuesField("dv", -5));
writer.addDocument(doc);
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(2, dv.docValueCount());
assertEquals(-5, dv.nextValue());
assertEquals(11, dv.nextValue());
reader.close();
directory.close();
}
public void testTwoSortedNumberSameValue() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("dv", 11));
doc.add(new SortedNumericDocValuesField("dv", 11));
writer.addDocument(doc);
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(2, dv.docValueCount());
assertEquals(11, dv.nextValue());
assertEquals(11, dv.nextValue());
reader.close();
directory.close();
}
public void testTwoSortedNumberOneMissing() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(null));
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("dv", 11));
doc.add(new SortedNumericDocValuesField("dv", -5));
writer.addDocument(doc);
writer.addDocument(new Document());
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(2, dv.docValueCount());
assertEquals(-5, dv.nextValue());
assertEquals(11, dv.nextValue());
assertEquals(NO_MORE_DOCS, dv.nextDoc());
reader.close();
directory.close();
}
public void testSortedNumberMerge() throws IOException {
Directory directory = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter writer = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("dv", 11));
writer.addDocument(doc);
writer.commit();
doc = new Document();
doc.add(new SortedNumericDocValuesField("dv", -5));
writer.addDocument(doc);
writer.forceMerge(1);
writer.close();
// Now search the index:
IndexReader reader = DirectoryReader.open(directory);
assert reader.leaves().size() == 1;
SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv");
assertEquals(0, dv.nextDoc());
assertEquals(1, dv.docValueCount());
assertEquals(11, dv.nextValue());
assertEquals(1, dv.nextDoc());
assertEquals(1, dv.docValueCount());
assertEquals(-5, dv.nextValue());
reader.close();
directory.close();
}
public void testSortedNumberMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedNumericDocValuesField("field", 5));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
public void testSortedEnumAdvanceIndependently() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
SortedDocValuesField field = new SortedDocValuesField("field", newBytesRef("2"));
doc.add(field);
iwriter.addDocument(doc);
field.setBytesValue(newBytesRef("1"));
iwriter.addDocument(doc);
field.setBytesValue(newBytesRef("3"));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
doTestSortedSetEnumAdvanceIndependently(DocValues.singleton(dv));
ireader.close();
directory.close();
}
public void testSortedSetEnumAdvanceIndependently() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
SortedSetDocValuesField field1 = new SortedSetDocValuesField("field", newBytesRef("2"));
SortedSetDocValuesField field2 = new SortedSetDocValuesField("field", newBytesRef("3"));
doc.add(field1);
doc.add(field2);
iwriter.addDocument(doc);
field1.setBytesValue(newBytesRef("1"));
iwriter.addDocument(doc);
field2.setBytesValue(newBytesRef("2"));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
doTestSortedSetEnumAdvanceIndependently(dv);
ireader.close();
directory.close();
}
protected void doTestSortedSetEnumAdvanceIndependently(SortedSetDocValues dv) throws IOException {
if (dv.getValueCount() < 2) {
return;
}
List<BytesRef> terms = new ArrayList<>();
TermsEnum te = dv.termsEnum();
terms.add(BytesRef.deepCopyOf(te.next()));
terms.add(BytesRef.deepCopyOf(te.next()));
// Make sure that calls to next() does not modify the term of the other enum
TermsEnum enum1 = dv.termsEnum();
TermsEnum enum2 = dv.termsEnum();
BytesRefBuilder term1 = new BytesRefBuilder();
BytesRefBuilder term2 = new BytesRefBuilder();
term1.copyBytes(enum1.next());
term2.copyBytes(enum2.next());
term1.copyBytes(enum1.next());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
// Same for seekCeil
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
BytesRefBuilder seekTerm = new BytesRefBuilder();
seekTerm.append(terms.get(0));
seekTerm.append((byte) 0);
enum1.seekCeil(seekTerm.get());
term1.copyBytes(enum1.term());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
// Same for seekCeil on an exact value
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
enum1.seekCeil(terms.get(1));
term1.copyBytes(enum1.term());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
// Same for seekExact
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
final boolean found = enum1.seekExact(terms.get(1));
assertTrue(found);
term1.copyBytes(enum1.term());
// Same for seek by ord
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
enum1.seekExact(1);
term1.copyBytes(enum1.term());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
}
// same as testSortedMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
public void testSortedMergeAwayAllValuesLargeSegment() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
iwriter.addDocument(new Document());
}
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
// same as testSortedSetMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
public void testSortedSetMergeAwayAllValuesLargeSegment() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
iwriter.addDocument(new Document());
}
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
// same as testNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
public void testNumericMergeAwayAllValuesLargeSegment() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new NumericDocValuesField("field", 42L));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
iwriter.addDocument(new Document());
}
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
// same as testSortedNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
public void testSortedNumericMergeAwayAllValuesLargeSegment() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedNumericDocValuesField("field", 42L));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
iwriter.addDocument(new Document());
}
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
// same as testBinaryMergeAwayAllValues but on more than 1024 docs to have sparse encoding on
public void testBinaryMergeAwayAllValuesLargeSegment() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new BinaryDocValuesField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
iwriter.addDocument(new Document());
}
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
BinaryDocValues dv = getOnlyLeafReader(ireader).getBinaryDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
ireader.close();
directory.close();
}
public void testRandomAdvanceNumeric() throws IOException {
final long longRange;
if (random().nextBoolean()) {
longRange = TestUtil.nextInt(random(), 1, 1024);
} else {
longRange = TestUtil.nextLong(random(), 1, Long.MAX_VALUE);
}
doTestRandomAdvance(new FieldCreator() {
@Override
public Field next() {
return new NumericDocValuesField("field", TestUtil.nextLong(random(), 0, longRange));
}
@Override
public DocIdSetIterator iterator(IndexReader r) throws IOException {
return MultiDocValues.getNumericValues(r, "field");
}
});
}
public void testRandomAdvanceBinary() throws IOException {
doTestRandomAdvance(
new FieldCreator() {
@Override
public Field next() {
byte[] bytes = new byte[random().nextInt(10)];
random().nextBytes(bytes);
return new BinaryDocValuesField("field", newBytesRef(bytes));
}
@Override
public DocIdSetIterator iterator(IndexReader r) throws IOException {
return MultiDocValues.getBinaryValues(r, "field");
}
});
}
private interface FieldCreator {
public Field next();
public DocIdSetIterator iterator(IndexReader r) throws IOException;
}
private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter w = new RandomIndexWriter(random(), directory, conf);
int numChunks = atLeast(10);
int id = 0;
Set<Integer> missingSet = new HashSet<>();
for(int i=0;i<numChunks;i++) {
// change sparseness for each chunk
double sparseChance = random().nextDouble();
int docCount = atLeast(1000);
for(int j=0;j<docCount;j++) {
Document doc = new Document();
doc.add(new StoredField("id", id));
if (random().nextDouble() > sparseChance) {
doc.add(fieldCreator.next());
} else {
missingSet.add(id);
}
id++;
w.addDocument(doc);
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
// Now search the index:
IndexReader r = w.getReader();
BitSet missing = new FixedBitSet(r.maxDoc());
for(int docID=0;docID<r.maxDoc();docID++) {
Document doc = r.document(docID);
if (missingSet.contains(doc.getField("id").numericValue())) {
missing.set(docID);
}
}
int numIters = atLeast(10);
for(int iter=0;iter<numIters;iter++) {
DocIdSetIterator values = fieldCreator.iterator(r);
assertEquals(-1, values.docID());
while (true) {
int docID;
if (random().nextBoolean()) {
docID = values.nextDoc();
} else {
int range;
if (random().nextInt(10) == 7) {
// big jump
range = r.maxDoc()-values.docID();
} else {
// small jump
range = 25;
}
int inc = TestUtil.nextInt(random(), 1, range);
docID = values.advance(values.docID() + inc);
}
if (docID == NO_MORE_DOCS) {
break;
}
assertFalse(missing.get(docID));
}
}
IOUtils.close(r, w, directory);
}
protected boolean codecAcceptsHugeBinaryValues(String field) {
return true;
}
}