blob: f61ad31db489806f91da9fc67f589e44da3fa306 [file] [log] [blame]
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.TestUtil;
/**
* Abstract class to do basic tests for a docvalues format.
* NOTE: This test focuses on the docvalues impl, nothing else.
* The [stretch] goal is for this test to be
* so thorough in testing a new DocValuesFormat that if this
* test passes, then all Lucene/Solr tests should also pass. Ie,
* if there is some bug in a given DocValuesFormat that this
* test fails to catch then this test needs to be improved! */
public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTestCase {
@Override
protected void addRandomFields(Document doc) {
if (usually()) {
doc.add(new NumericDocValuesField("ndv", random().nextInt(1 << 12)));
doc.add(new BinaryDocValuesField("bdv", new BytesRef(TestUtil.randomSimpleString(random()))));
doc.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
}
if (defaultCodecSupportsSortedSet()) {
final int numValues = random().nextInt(5);
for (int i = 0; i < numValues; ++i) {
doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
}
}
}
public void testOneNumber() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv", 5));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(5, dv.get(hits.scoreDocs[i].doc));
}
ireader.close();
directory.close();
}
public void testOneFloat() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new FloatDocValuesField("dv", 5.7f));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(Float.floatToRawIntBits(5.7f), dv.get(hits.scoreDocs[i].doc));
}
ireader.close();
directory.close();
}
public void testTwoNumbers() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new NumericDocValuesField("dv2", 17));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(5, dv.get(hits.scoreDocs[i].doc));
dv = ireader.leaves().get(0).reader().getNumericDocValues("dv2");
assertEquals(17, dv.get(hits.scoreDocs[i].doc));
}
ireader.close();
directory.close();
}
public void testTwoBinaryValues() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", new BytesRef(longTerm)));
doc.add(new BinaryDocValuesField("dv2", new BytesRef(text)));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv1");
BytesRef scratch = new BytesRef();
dv.get(hits.scoreDocs[i].doc, scratch);
assertEquals(new BytesRef(longTerm), scratch);
dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
dv.get(hits.scoreDocs[i].doc, scratch);
assertEquals(new BytesRef(text), scratch);
}
ireader.close();
directory.close();
}
public void testTwoFieldsMixed() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(5, dv.get(hits.scoreDocs[i].doc));
BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
dv2.get(hits.scoreDocs[i].doc, scratch);
assertEquals(new BytesRef("hello world"), scratch);
}
ireader.close();
directory.close();
}
public void testThreeFieldsMixed() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new SortedDocValuesField("dv1", new BytesRef("hello hello")));
doc.add(new NumericDocValuesField("dv2", 5));
doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv1");
int ord = dv.getOrd(0);
dv.lookupOrd(ord, scratch);
assertEquals(new BytesRef("hello hello"), scratch);
NumericDocValues dv2 = ireader.leaves().get(0).reader().getNumericDocValues("dv2");
assertEquals(5, dv2.get(hits.scoreDocs[i].doc));
BinaryDocValues dv3 = ireader.leaves().get(0).reader().getBinaryDocValues("dv3");
dv3.get(hits.scoreDocs[i].doc, scratch);
assertEquals(new BytesRef("hello world"), scratch);
}
ireader.close();
directory.close();
}
public void testThreeFieldsMixed2() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", new BytesRef("hello world")));
doc.add(new SortedDocValuesField("dv2", new BytesRef("hello hello")));
doc.add(new NumericDocValuesField("dv3", 5));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv2");
int ord = dv.getOrd(0);
dv.lookupOrd(ord, scratch);
assertEquals(new BytesRef("hello hello"), scratch);
NumericDocValues dv2 = ireader.leaves().get(0).reader().getNumericDocValues("dv3");
assertEquals(5, dv2.get(hits.scoreDocs[i].doc));
BinaryDocValues dv3 = ireader.leaves().get(0).reader().getBinaryDocValues("dv1");
dv3.get(hits.scoreDocs[i].doc, scratch);
assertEquals(new BytesRef("hello world"), scratch);
}
ireader.close();
directory.close();
}
public void testTwoDocumentsNumeric() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv", 1));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("dv", 2));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(1, dv.get(0));
assertEquals(2, dv.get(1));
ireader.close();
directory.close();
}
public void testTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(newField("id", "0", StringField.TYPE_STORED));
doc.add(new NumericDocValuesField("dv", -10));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new NumericDocValuesField("dv", 99));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
for(int i=0;i<2;i++) {
StoredDocument doc2 = ireader.leaves().get(0).reader().document(i);
long expected;
if (doc2.get("id").equals("0")) {
expected = -10;
} else {
expected = 99;
}
assertEquals(expected, dv.get(i));
}
ireader.close();
directory.close();
}
public void testBigNumericRange() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv", Long.MIN_VALUE));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("dv", Long.MAX_VALUE));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(Long.MIN_VALUE, dv.get(0));
assertEquals(Long.MAX_VALUE, dv.get(1));
ireader.close();
directory.close();
}
public void testBigNumericRange2() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv", -8841491950446638677L));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("dv", 9062230939892376225L));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
assertEquals(-8841491950446638677L, dv.get(0));
assertEquals(9062230939892376225L, dv.get(1));
ireader.close();
directory.close();
}
public void testBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new BinaryDocValuesField("dv", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
dv.get(hits.scoreDocs[i].doc, scratch);
assertEquals(new BytesRef("hello world"), scratch);
}
ireader.close();
directory.close();
}
public void testBytesTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(newField("id", "0", StringField.TYPE_STORED));
doc.add(new BinaryDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new BinaryDocValuesField("dv", new BytesRef("hello 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
for(int i=0;i<2;i++) {
StoredDocument doc2 = ireader.leaves().get(0).reader().document(i);
String expected;
if (doc2.get("id").equals("0")) {
expected = "hello world 1";
} else {
expected = "hello 2";
}
dv.get(i, scratch);
assertEquals(expected, scratch.utf8ToString());
}
ireader.close();
directory.close();
}
public void testSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, null, 1);
assertEquals(1, hits.totalHits);
BytesRef scratch = new BytesRef();
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
dv.lookupOrd(dv.getOrd(hits.scoreDocs[i].doc), scratch);
assertEquals(new BytesRef("hello world"), scratch);
}
ireader.close();
directory.close();
}
public void testSortedBytesTwoDocuments() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.lookupOrd(dv.getOrd(0), scratch);
assertEquals("hello world 1", scratch.utf8ToString());
dv.lookupOrd(dv.getOrd(1), scratch);
assertEquals("hello world 2", scratch.utf8ToString());
ireader.close();
directory.close();
}
public void testSortedBytesThreeDocuments() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(2, dv.getValueCount());
BytesRef scratch = new BytesRef();
assertEquals(0, dv.getOrd(0));
dv.lookupOrd(0, scratch);
assertEquals("hello world 1", scratch.utf8ToString());
assertEquals(1, dv.getOrd(1));
dv.lookupOrd(1, scratch);
assertEquals("hello world 2", scratch.utf8ToString());
assertEquals(0, dv.getOrd(2));
ireader.close();
directory.close();
}
public void testSortedBytesTwoDocumentsMerged() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(newField("id", "0", StringField.TYPE_STORED));
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(newField("id", "1", StringField.TYPE_STORED));
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
assertEquals(2, dv.getValueCount()); // 2 ords
BytesRef scratch = new BytesRef();
dv.lookupOrd(0, scratch);
assertEquals(new BytesRef("hello world 1"), scratch);
dv.lookupOrd(1, scratch);
assertEquals(new BytesRef("hello world 2"), scratch);
for(int i=0;i<2;i++) {
StoredDocument doc2 = ireader.leaves().get(0).reader().document(i);
String expected;
if (doc2.get("id").equals("0")) {
expected = "hello world 1";
} else {
expected = "hello world 2";
}
dv.lookupOrd(dv.getOrd(i), scratch);
assertEquals(expected, scratch.utf8ToString());
}
ireader.close();
directory.close();
}
public void testSortedMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
if (defaultCodecSupportsDocsWithField()) {
assertEquals(-1, dv.getOrd(0));
assertEquals(0, dv.getValueCount());
} else {
assertEquals(0, dv.getOrd(0));
assertEquals(1, dv.getValueCount());
BytesRef ref = new BytesRef();
dv.lookupOrd(0, ref);
assertEquals(new BytesRef(), ref);
}
ireader.close();
directory.close();
}
public void testBytesWithNewline() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new BinaryDocValuesField("dv", new BytesRef("hello\nworld\r1")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.get(0, scratch);
assertEquals(new BytesRef("hello\nworld\r1"), scratch);
ireader.close();
directory.close();
}
public void testMissingSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2")));
iwriter.addDocument(doc);
// 2nd doc missing the DV field
iwriter.addDocument(new Document());
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.lookupOrd(dv.getOrd(0), scratch);
assertEquals(new BytesRef("hello world 2"), scratch);
if (defaultCodecSupportsDocsWithField()) {
assertEquals(-1, dv.getOrd(1));
}
dv.get(1, scratch);
assertEquals(new BytesRef(""), scratch);
ireader.close();
directory.close();
}
public void testSortedTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("world")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("beer")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
// seekExact()
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
assertEquals(Codec.getDefault().toString(), "hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
ireader.close();
directory.close();
}
public void testEmptySortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = new BytesRef();
assertEquals(0, dv.getOrd(0));
assertEquals(0, dv.getOrd(1));
dv.lookupOrd(dv.getOrd(0), scratch);
assertEquals("", scratch.utf8ToString());
ireader.close();
directory.close();
}
public void testEmptyBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new BinaryDocValuesField("dv", new BytesRef("")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new BinaryDocValuesField("dv", new BytesRef("")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.get(0, scratch);
assertEquals("", scratch.utf8ToString());
dv.get(1, scratch);
assertEquals("", scratch.utf8ToString());
ireader.close();
directory.close();
}
public void testVeryLargeButLegalBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
byte bytes[] = new byte[32766];
BytesRef b = new BytesRef(bytes);
random().nextBytes(bytes);
doc.add(new BinaryDocValuesField("dv", b));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.get(0, scratch);
assertEquals(new BytesRef(bytes), scratch);
ireader.close();
directory.close();
}
public void testVeryLargeButLegalSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
byte bytes[] = new byte[32766];
BytesRef b = new BytesRef(bytes);
random().nextBytes(bytes);
doc.add(new SortedDocValuesField("dv", b));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.get(0, scratch);
assertEquals(new BytesRef(bytes), scratch);
ireader.close();
directory.close();
}
public void testCodecUsesOwnBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new BinaryDocValuesField("dv", new BytesRef("boo!")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
byte mybytes[] = new byte[20];
BytesRef scratch = new BytesRef(mybytes);
dv.get(0, scratch);
assertEquals("boo!", scratch.utf8ToString());
assertFalse(scratch.bytes == mybytes);
ireader.close();
directory.close();
}
public void testCodecUsesOwnSortedBytes() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("boo!")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
byte mybytes[] = new byte[20];
BytesRef scratch = new BytesRef(mybytes);
dv.get(0, scratch);
assertEquals("boo!", scratch.utf8ToString());
assertFalse(scratch.bytes == mybytes);
ireader.close();
directory.close();
}
public void testCodecUsesOwnBytesEachTime() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv");
BytesRef scratch = new BytesRef();
dv.get(0, scratch);
assertEquals("foo!", scratch.utf8ToString());
BytesRef scratch2 = new BytesRef();
dv.get(1, scratch2);
assertEquals("bar!", scratch2.utf8ToString());
// check scratch is still valid
assertEquals("foo!", scratch.utf8ToString());
ireader.close();
directory.close();
}
public void testCodecUsesOwnSortedBytesEachTime() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("foo!")));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("dv", new BytesRef("bar!")));
iwriter.addDocument(doc);
iwriter.shutdown();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
assert ireader.leaves().size() == 1;
BinaryDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
BytesRef scratch = new BytesRef();
dv.get(0, scratch);
assertEquals("foo!", scratch.utf8ToString());
BytesRef scratch2 = new BytesRef();
dv.get(1, scratch2);
assertEquals("bar!", scratch2.utf8ToString());
// check scratch is still valid
assertEquals("foo!", scratch.utf8ToString());
ireader.close();
directory.close();
}
/*
* Simple test case to show how to use the API
*/
public void testDocValuesSimple() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
conf.setMergePolicy(newLogMergePolicy());
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("docId", i));
doc.add(new TextField("docId", "" + i, Field.Store.NO));
writer.addDocument(doc);
}
writer.commit();
writer.forceMerge(1, true);
writer.shutdown();
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query, 10);
assertEquals(5, search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
NumericDocValues docValues = getOnlySegmentReader(reader).getNumericDocValues("docId");
for (int i = 0; i < scoreDocs.length; i++) {
assertEquals(i, scoreDocs[i].doc);
assertEquals(i, docValues.get(scoreDocs[i].doc));
}
reader.close();
dir.close();
}
public void testRandomSortedBytes() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
if (!defaultCodecSupportsDocsWithField()) {
// if the codec doesnt support missing, we expect missing to be mapped to byte[]
// by the impersonator, but we have to give it a chance to merge them to this
cfg.setMergePolicy(newLogMergePolicy());
}
RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
int numDocs = atLeast(100);
BytesRefHash hash = new BytesRefHash();
Map<String, String> docToString = new HashMap<>();
int maxLength = TestUtil.nextInt(random(), 1, 50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newTextField("id", "" + i, Field.Store.YES));
String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
BytesRef br = new BytesRef(string);
doc.add(new SortedDocValuesField("field", br));
hash.add(br);
docToString.put("" + i, string);
w.addDocument(doc);
}
if (rarely()) {
w.commit();
}
int numDocsNoValue = atLeast(10);
for (int i = 0; i < numDocsNoValue; i++) {
Document doc = new Document();
doc.add(newTextField("id", "noValue", Field.Store.YES));
w.addDocument(doc);
}
if (!defaultCodecSupportsDocsWithField()) {
BytesRef bytesRef = new BytesRef();
hash.add(bytesRef); // add empty value for the gaps
}
if (rarely()) {
w.commit();
}
if (!defaultCodecSupportsDocsWithField()) {
// if the codec doesnt support missing, we expect missing to be mapped to byte[]
// by the impersonator, but we have to give it a chance to merge them to this
w.forceMerge(1);
}
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String id = "" + i + numDocs;
doc.add(newTextField("id", id, Field.Store.YES));
String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
BytesRef br = new BytesRef(string);
hash.add(br);
docToString.put(id, string);
doc.add(new SortedDocValuesField("field", br));
w.addDocument(doc);
}
w.commit();
IndexReader reader = w.getReader();
SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
BytesRef expected = new BytesRef();
BytesRef actual = new BytesRef();
assertEquals(hash.size(), docValues.getValueCount());
for (int i = 0; i < hash.size(); i++) {
hash.get(sort[i], expected);
docValues.lookupOrd(i, actual);
assertEquals(expected.utf8ToString(), actual.utf8ToString());
int ord = docValues.lookupTerm(expected);
assertEquals(i, ord);
}
AtomicReader slowR = SlowCompositeReaderWrapper.wrap(reader);
Set<Entry<String, String>> entrySet = docToString.entrySet();
for (Entry<String, String> entry : entrySet) {
// pk lookup
DocsEnum termDocsEnum = slowR.termDocsEnum(new Term("id", entry.getKey()));
int docId = termDocsEnum.nextDoc();
expected = new BytesRef(entry.getValue());
docValues.get(docId, actual);
assertEquals(expected, actual);
}
reader.close();
w.shutdown();
dir.close();
}
static abstract class LongProducer {
abstract long next();
}
private void doTestNumericsVsStoredFields(final long minValue, final long maxValue) throws Exception {
doTestNumericsVsStoredFields(new LongProducer() {
@Override
long next() {
return TestUtil.nextLong(random(), minValue, maxValue);
}
});
}
private void doTestNumericsVsStoredFields(LongProducer longs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = newStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
// numDocs should be always > 256 so that in case of a codec that optimizes
// for numbers of values <= 256, all storage layouts are tested
assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
long value = longs.next();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.forceMerge(numDocs / 256);
writer.shutdown();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (AtomicReaderContext context : ir.leaves()) {
AtomicReader r = context.reader();
NumericDocValues docValues = r.getNumericDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
long storedValue = Long.parseLong(r.document(i).get("stored"));
assertEquals(storedValue, docValues.get(i));
}
}
ir.close();
dir.close();
}
public void testBooleanNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(0, 1);
}
}
public void testByteNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(Byte.MIN_VALUE, Byte.MAX_VALUE);
}
}
public void testShortNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(Short.MIN_VALUE, Short.MAX_VALUE);
}
}
public void testIntNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(Integer.MIN_VALUE, Integer.MAX_VALUE);
}
}
public void testLongNumericsVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestNumericsVsStoredFields(Long.MIN_VALUE, Long.MAX_VALUE);
}
}
private void doTestBinaryVsStoredFields(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", new byte[0]);
Field dvField = new BinaryDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
if (minLength == maxLength) {
length = minLength; // fixed length
} else {
length = TestUtil.nextInt(random(), minLength, maxLength);
}
byte buffer[] = new byte[length];
random().nextBytes(buffer);
storedField.setBytesValue(buffer);
dvField.setBytesValue(buffer);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.shutdown();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (AtomicReaderContext context : ir.leaves()) {
AtomicReader r = context.reader();
BinaryDocValues docValues = r.getBinaryDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
BytesRef scratch = new BytesRef();
docValues.get(i, scratch);
assertEquals(binaryValue, scratch);
}
}
ir.close();
dir.close();
}
public void testBinaryFixedLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 0, 10);
doTestBinaryVsStoredFields(fixedLength, fixedLength);
}
}
public void testBinaryVariableLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestBinaryVsStoredFields(0, 10);
}
}
private void doTestSortedVsStoredFields(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", new byte[0]);
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
if (minLength == maxLength) {
length = minLength; // fixed length
} else {
length = TestUtil.nextInt(random(), minLength, maxLength);
}
byte buffer[] = new byte[length];
random().nextBytes(buffer);
storedField.setBytesValue(buffer);
dvField.setBytesValue(buffer);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.shutdown();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (AtomicReaderContext context : ir.leaves()) {
AtomicReader r = context.reader();
BinaryDocValues docValues = r.getSortedDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
BytesRef scratch = new BytesRef();
docValues.get(i, scratch);
assertEquals(binaryValue, scratch);
}
}
ir.close();
dir.close();
}
public void testSortedFixedLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedVsStoredFields(fixedLength, fixedLength);
}
}
public void testSortedVariableLengthVsStoredFields() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedVsStoredFields(1, 10);
}
}
public void testSortedSetOneValue() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoFields() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.add(new SortedSetDocValuesField("field2", new BytesRef("world")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field2");
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsMerged() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(2, dv.getValueCount());
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
dv.setDocument(1);
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
dv.lookupOrd(1, bytes);
assertEquals(new BytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoValues() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
dv.lookupOrd(1, bytes);
assertEquals(new BytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoValuesUnordered() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
dv.lookupOrd(1, bytes);
assertEquals(new BytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetThreeValuesTwoDocs() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(3, dv.getValueCount());
dv.setDocument(0);
assertEquals(1, dv.nextOrd());
assertEquals(2, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
dv.setDocument(1);
assertEquals(0, dv.nextOrd());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("beer"), bytes);
dv.lookupOrd(1, bytes);
assertEquals(new BytesRef("hello"), bytes);
dv.lookupOrd(2, bytes);
assertEquals(new BytesRef("world"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsLastMissing() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
doc = new Document();
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
dv.setDocument(0);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsFirstMissing() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
iwriter.addDocument(doc);
doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
dv.setDocument(1);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
dv.setDocument(1);
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = new BytesRef();
dv.lookupOrd(0, bytes);
assertEquals(new BytesRef("hello"), bytes);
ireader.close();
directory.close();
}
public void testSortedSetMergeAwayAllValues() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.getValueCount());
ireader.close();
directory.close();
}
public void testSortedSetTermsEnum() throws IOException {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.shutdown();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
// seekExact()
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
ireader.close();
directory.close();
}
private void doTestSortedSetVsStoredFields(int minLength, int maxLength, int maxValuesPerDoc) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
final int length;
if (minLength == maxLength) {
length = minLength; // fixed length
} else {
length = TestUtil.nextInt(random(), minLength, maxLength);
}
int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
// create a random set of strings
Set<String> values = new TreeSet<>();
for (int v = 0; v < numValues; v++) {
values.add(TestUtil.randomSimpleString(random(), length));
}
// add ordered to the stored field
for (String v : values) {
doc.add(new StoredField("stored", v));
}
// add in any order to the dv field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : unordered) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.shutdown();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (AtomicReaderContext context : ir.leaves()) {
AtomicReader r = context.reader();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
BytesRef scratch = new BytesRef();
for (int i = 0; i < r.maxDoc(); i++) {
String stringValues[] = r.document(i).getValues("stored");
if (docValues != null) {
docValues.setDocument(i);
}
for (int j = 0; j < stringValues.length; j++) {
assert docValues != null;
long ord = docValues.nextOrd();
assert ord != NO_MORE_ORDS;
docValues.lookupOrd(ord, scratch);
assertEquals(stringValues[j], scratch.utf8ToString());
}
assert docValues == null || docValues.nextOrd() == NO_MORE_ORDS;
}
}
ir.close();
dir.close();
}
public void testSortedSetFixedLengthVsStoredFields() throws Exception {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedSetVsStoredFields(fixedLength, fixedLength, 16);
}
}
public void testSortedSetVariableLengthVsStoredFields() throws Exception {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(1, 10, 16);
}
}
public void testSortedSetFixedLengthSingleValuedVsStoredFields() throws Exception {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedSetVsStoredFields(fixedLength, fixedLength, 1);
}
}
public void testSortedSetVariableLengthSingleValuedVsStoredFields() throws Exception {
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsStoredFields(1, 10, 1);
}
}
public void testGCDCompression() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
final long min = - (((long) random().nextInt(1 << 30)) << 32);
final long mul = random().nextInt() & 0xFFFFFFFFL;
final LongProducer longs = new LongProducer() {
@Override
long next() {
return min + mul * random().nextInt(1 << 20);
}
};
doTestNumericsVsStoredFields(longs);
}
}
public void testZeros() throws Exception {
doTestNumericsVsStoredFields(0, 0);
}
public void testZeroOrMin() throws Exception {
// try to make GCD compression fail if the format did not anticipate that
// the GCD of 0 and MIN_VALUE is negative
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
final LongProducer longs = new LongProducer() {
@Override
long next() {
return random().nextBoolean() ? 0 : Long.MIN_VALUE;
}
};
doTestNumericsVsStoredFields(longs);
}
}
public void testTwoNumbersOneMissing() throws IOException {
assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 0));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
AtomicReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
assertEquals(0, dv.get(0));
assertEquals(0, dv.get(1));
Bits docsWithField = ar.getDocsWithField("dv1");
assertTrue(docsWithField.get(0));
assertFalse(docsWithField.get(1));
ir.close();
directory.close();
}
public void testTwoNumbersOneMissingWithMerging() throws IOException {
assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 0));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
AtomicReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
assertEquals(0, dv.get(0));
assertEquals(0, dv.get(1));
Bits docsWithField = ar.getDocsWithField("dv1");
assertTrue(docsWithField.get(0));
assertFalse(docsWithField.get(1));
ir.close();
directory.close();
}
public void testThreeNumbersOneMissingWithMerging() throws IOException {
assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 0));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "2", Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
AtomicReader ar = ir.leaves().get(0).reader();
NumericDocValues dv = ar.getNumericDocValues("dv1");
assertEquals(0, dv.get(0));
assertEquals(0, dv.get(1));
assertEquals(5, dv.get(2));
Bits docsWithField = ar.getDocsWithField("dv1");
assertTrue(docsWithField.get(0));
assertFalse(docsWithField.get(1));
assertTrue(docsWithField.get(2));
ir.close();
directory.close();
}
public void testTwoBytesOneMissing() throws IOException {
assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", new BytesRef()));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
AtomicReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
BytesRef ref = new BytesRef();
dv.get(0, ref);
assertEquals(new BytesRef(), ref);
dv.get(1, ref);
assertEquals(new BytesRef(), ref);
Bits docsWithField = ar.getDocsWithField("dv1");
assertTrue(docsWithField.get(0));
assertFalse(docsWithField.get(1));
ir.close();
directory.close();
}
public void testTwoBytesOneMissingWithMerging() throws IOException {
assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", new BytesRef()));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
AtomicReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
BytesRef ref = new BytesRef();
dv.get(0, ref);
assertEquals(new BytesRef(), ref);
dv.get(1, ref);
assertEquals(new BytesRef(), ref);
Bits docsWithField = ar.getDocsWithField("dv1");
assertTrue(docsWithField.get(0));
assertFalse(docsWithField.get(1));
ir.close();
directory.close();
}
public void testThreeBytesOneMissingWithMerging() throws IOException {
assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", new BytesRef()));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
iw.addDocument(doc);
iw.commit();
doc = new Document();
doc.add(new StringField("id", "2", Field.Store.YES));
doc.add(new BinaryDocValuesField("dv1", new BytesRef("boo")));
iw.addDocument(doc);
iw.forceMerge(1);
iw.shutdown();
IndexReader ir = DirectoryReader.open(directory);
assertEquals(1, ir.leaves().size());
AtomicReader ar = ir.leaves().get(0).reader();
BinaryDocValues dv = ar.getBinaryDocValues("dv1");
BytesRef ref = new BytesRef();
dv.get(0, ref);
assertEquals(new BytesRef(), ref);
dv.get(1, ref);
assertEquals(new BytesRef(), ref);
dv.get(2, ref);
assertEquals(new BytesRef("boo"), ref);
Bits docsWithField = ar.getDocsWithField("dv1");
assertTrue(docsWithField.get(0));
assertFalse(docsWithField.get(1));
assertTrue(docsWithField.get(2));
ir.close();
directory.close();
}
/** Tests dv against stored fields with threads (binary/numeric/sorted, no missing) */
public void testThreads() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
doc.add(idField);
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
doc.add(storedNumericField);
doc.add(dvNumericField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte buffer[] = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.shutdown();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread threads[] = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (AtomicReaderContext context : ir.leaves()) {
AtomicReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
BytesRef scratch = new BytesRef();
binaries.get(j, scratch);
assertEquals(binaryValue, scratch);
sorted.get(j, scratch);
assertEquals(binaryValue, scratch);
String expected = r.document(j).get("storedNum");
assertEquals(Long.parseLong(expected), numerics.get(j));
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
/** Tests dv against stored fields with threads (all types + missing) */
public void testThreads2() throws Exception {
assumeTrue("Codec does not support getDocsWithField", defaultCodecSupportsDocsWithField());
assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte buffer[] = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
Document doc = new Document();
doc.add(idField);
if (random().nextInt(4) > 0) {
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
}
if (random().nextInt(4) > 0) {
doc.add(storedNumericField);
doc.add(dvNumericField);
}
int numSortedSetFields = random().nextInt(3);
Set<String> values = new TreeSet<>();
for (int j = 0; j < numSortedSetFields; j++) {
values.add(TestUtil.randomSimpleString(random()));
}
for (String v : values) {
doc.add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
doc.add(new StoredField("storedSortedSet", v));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.shutdown();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread threads[] = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (AtomicReaderContext context : ir.leaves()) {
AtomicReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
Bits binaryBits = r.getDocsWithField("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
Bits sortedBits = r.getDocsWithField("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
Bits numericBits = r.getDocsWithField("dvNum");
SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet");
Bits sortedSetBits = r.getDocsWithField("dvSortedSet");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
if (binaryValue != null) {
if (binaries != null) {
BytesRef scratch = new BytesRef();
binaries.get(j, scratch);
assertEquals(binaryValue, scratch);
sorted.get(j, scratch);
assertEquals(binaryValue, scratch);
assertTrue(binaryBits.get(j));
assertTrue(sortedBits.get(j));
}
} else if (binaries != null) {
assertFalse(binaryBits.get(j));
assertFalse(sortedBits.get(j));
assertEquals(-1, sorted.getOrd(j));
}
String number = r.document(j).get("storedNum");
if (number != null) {
if (numerics != null) {
assertEquals(Long.parseLong(number), numerics.get(j));
}
} else if (numerics != null) {
assertFalse(numericBits.get(j));
assertEquals(0, numerics.get(j));
}
String values[] = r.document(j).getValues("storedSortedSet");
if (values.length > 0) {
assertNotNull(sortedSet);
sortedSet.setDocument(j);
for (int k = 0; k < values.length; k++) {
long ord = sortedSet.nextOrd();
assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
BytesRef value = new BytesRef();
sortedSet.lookupOrd(ord, value);
assertEquals(values[k], value.utf8ToString());
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertTrue(sortedSetBits.get(j));
} else if (sortedSet != null) {
sortedSet.setDocument(j);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertFalse(sortedSetBits.get(j));
}
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
// LUCENE-5218
public void testEmptyBinaryValueOnPageSizes() throws Exception {
// Test larger and larger power-of-two sized values,
// followed by empty string value:
for(int i=0;i<20;i++) {
if (i > 14 && codecAcceptsHugeBinaryValues("field") == false) {
break;
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
BytesRef bytes = new BytesRef();
bytes.bytes = new byte[1<<i];
bytes.length = 1<<i;
for(int j=0;j<4;j++) {
Document doc = new Document();
doc.add(new BinaryDocValuesField("field", bytes));
w.addDocument(doc);
}
Document doc = new Document();
doc.add(new StoredField("id", "5"));
doc.add(new BinaryDocValuesField("field", new BytesRef()));
w.addDocument(doc);
IndexReader r = w.getReader();
w.shutdown();
AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
BinaryDocValues values = ar.getBinaryDocValues("field");
BytesRef result = new BytesRef();
for(int j=0;j<5;j++) {
values.get(0, result);
assertTrue(result.length == 0 || result.length == 1<<i);
}
ar.close();
dir.close();
}
}
protected boolean codecAcceptsHugeBinaryValues(String field) {
return true;
}
}