blob: 07463bcba63e2e337afbd43b2558d3f9af597bdc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.hamcrest.core.StringContains.containsString;
public class TestIndexSorting extends LuceneTestCase {
static class AssertingNeedsIndexSortCodec extends FilterCodec {
boolean needsIndexSort;
int numCalls;
AssertingNeedsIndexSortCodec() {
super(TestUtil.getDefaultCodec().getName(), TestUtil.getDefaultCodec());
}
@Override
public PointsFormat pointsFormat() {
final PointsFormat pf = delegate.pointsFormat();
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
final PointsWriter writer = pf.fieldsWriter(state);
return new PointsWriter() {
@Override
public void merge(MergeState mergeState) throws IOException {
// For single segment merge we cannot infer if the segment is already sorted or not.
if (mergeState.docMaps.length > 1) {
assertEquals(needsIndexSort, mergeState.needsIndexSort);
}
++ numCalls;
writer.merge(mergeState);
}
@Override
public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
writer.writeField(fieldInfo, values);
}
@Override
public void finish() throws IOException {
writer.finish();
}
@Override
public void close() throws IOException {
writer.close();
}
};
}
@Override
public PointsReader fieldsReader(SegmentReadState state) throws IOException {
return pf.fieldsReader(state);
}
};
}
}
private static void assertNeedsIndexSortMerge(SortField sortField, Consumer<Document> defaultValueConsumer, Consumer<Document> randomValueConsumer) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
iwc.setCodec(codec);
Sort indexSort = new Sort(sortField,
new SortField("id", SortField.Type.INT));
iwc.setIndexSort(indexSort);
LogMergePolicy policy = newLogMergePolicy();
// make sure that merge factor is always > 2
if (policy.getMergeFactor() <= 2) {
policy.setMergeFactor(3);
}
iwc.setMergePolicy(policy);
// add already sorted documents
codec.numCalls = 0;
codec.needsIndexSort = false;
IndexWriter w = new IndexWriter(dir, iwc);
boolean withValues = random().nextBoolean();
for (int i = 100; i < 200; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
doc.add(new IntPoint("point", random().nextInt()));
if (withValues) {
defaultValueConsumer.accept(doc);
}
w.addDocument(doc);
if (i % 10 == 0) {
w.commit();
}
}
Set<Integer> deletedDocs = new HashSet<> ();
int num = random().nextInt(20);
for (int i = 0; i < num; i++) {
int nextDoc = random().nextInt(100);
w.deleteDocuments(new Term("id", Integer.toString(nextDoc)));
deletedDocs.add(nextDoc);
}
w.commit();
w.waitForMerges();
w.forceMerge(1);
assertTrue(codec.numCalls > 0);
// merge sort is needed
codec.numCalls = 0;
codec.needsIndexSort = true;
for (int i = 10; i >= 0; i--) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
doc.add(new IntPoint("point", random().nextInt()));
if (withValues) {
defaultValueConsumer.accept(doc);
}
w.addDocument(doc);
w.commit();
}
w.commit();
w.waitForMerges();
w.forceMerge(1);
assertTrue(codec.numCalls > 0);
// segment sort is needed
codec.needsIndexSort = true;
codec.numCalls = 0;
for (int i = 201; i < 300; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
doc.add(new IntPoint("point", random().nextInt()));
randomValueConsumer.accept(doc);
w.addDocument(doc);
if (i % 10 == 0) {
w.commit();
}
}
w.commit();
w.waitForMerges();
w.forceMerge(1);
assertTrue(codec.numCalls > 0);
w.close();
dir.close();
}
public void testNumericAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(new SortField("foo", SortField.Type.INT),
(doc) -> doc.add(new NumericDocValuesField("foo", 0)),
(doc) -> doc.add(new NumericDocValuesField("foo", random().nextInt())));
}
public void testStringAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(
new SortField("foo", SortField.Type.STRING),
(doc) -> doc.add(new SortedDocValuesField("foo", newBytesRef("default"))),
(doc) -> doc.add(new SortedDocValuesField("foo", TestUtil.randomBinaryTerm(random()))));
}
public void testMultiValuedNumericAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(new SortedNumericSortField("foo", SortField.Type.INT),
(doc) -> {
doc.add(new SortedNumericDocValuesField("foo", Integer.MIN_VALUE));
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedNumericDocValuesField("foo", random().nextInt()));
}
},
(doc) -> {
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedNumericDocValuesField("foo", random().nextInt()));
}
});
}
public void testMultiValuedStringAlreadySorted() throws Exception {
assertNeedsIndexSortMerge(new SortedSetSortField("foo", false),
(doc) -> {
doc.add(new SortedSetDocValuesField("foo", newBytesRef("")));
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedSetDocValuesField("foo", TestUtil.randomBinaryTerm(random())));
}
},
(doc) -> {
int num = random().nextInt(5);
for (int j = 0; j < num; j++) {
doc.add(new SortedSetDocValuesField("foo", TestUtil.randomBinaryTerm(random())));
}
});
}
public void testBasicString() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.STRING));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new SortedDocValuesField("foo", newBytesRef("zzz")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new SortedDocValuesField("foo", newBytesRef("aaa")));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new SortedDocValuesField("foo", newBytesRef("mmm")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
SortedDocValues values = leaf.getSortedDocValues("foo");
assertEquals(0, values.nextDoc());
assertEquals("aaa", values.binaryValue().utf8ToString());
assertEquals(1, values.nextDoc());
assertEquals("mmm", values.binaryValue().utf8ToString());
assertEquals(2, values.nextDoc());
assertEquals("zzz", values.binaryValue().utf8ToString());
r.close();
w.close();
dir.close();
}
public void testBasicMultiValuedString() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedSetSortField("foo", false));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("zzz")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("aaa")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("zzz")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("bcg")));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("mmm")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("pppp")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
assertEquals(0, values.nextDoc());
assertEquals(1l, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2l, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3l, values.longValue());
r.close();
w.close();
dir.close();
}
public void testMissingStringFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.STRING, reverse);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new SortedDocValuesField("foo", newBytesRef("zzz")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new SortedDocValuesField("foo", newBytesRef("mmm")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
SortedDocValues values = leaf.getSortedDocValues("foo");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals("zzz", values.binaryValue().utf8ToString());
assertEquals(1, values.nextDoc());
assertEquals("mmm", values.binaryValue().utf8ToString());
} else {
// docID 0 is missing:
assertEquals(1, values.nextDoc());
assertEquals("mmm", values.binaryValue().utf8ToString());
assertEquals(2, values.nextDoc());
assertEquals("zzz", values.binaryValue().utf8ToString());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedStringFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedSetSortField("foo", reverse);
sortField.setMissingValue(SortField.STRING_FIRST);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("zzz")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("zzza")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("zzzd")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("mmm")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("nnnn")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3l, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2l, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1l, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1l, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2l, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3l, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingStringLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.STRING, reverse);
sortField.setMissingValue(SortField.STRING_LAST);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new SortedDocValuesField("foo", newBytesRef("zzz")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new SortedDocValuesField("foo", newBytesRef("mmm")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
SortedDocValues values = leaf.getSortedDocValues("foo");
if (reverse) {
assertEquals(1, values.nextDoc());
assertEquals("zzz", values.binaryValue().utf8ToString());
assertEquals(2, values.nextDoc());
assertEquals("mmm", values.binaryValue().utf8ToString());
} else {
assertEquals(0, values.nextDoc());
assertEquals("mmm", values.binaryValue().utf8ToString());
assertEquals(1, values.nextDoc());
assertEquals("zzz", values.binaryValue().utf8ToString());
}
assertEquals(NO_MORE_DOCS, values.nextDoc());
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedStringLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedSetSortField("foo", reverse);
sortField.setMissingValue(SortField.STRING_LAST);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("zzz")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("zzzd")));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("mmm")));
doc.add(new SortedSetDocValuesField("foo", newBytesRef("ppp")));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3l, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2l, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1l, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1l, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2l, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3l, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testBasicLong() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(0, values.nextDoc());
assertEquals(-1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(7, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(18, values.longValue());
r.close();
w.close();
dir.close();
}
public void testBasicMultiValuedLong() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", 18));
doc.add(new SortedNumericDocValuesField("foo", 35));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", 7));
doc.add(new SortedNumericDocValuesField("foo", 22));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
r.close();
w.close();
dir.close();
}
public void testMissingLongFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.LONG, reverse);
sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(18, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(7, values.longValue());
} else {
// docID 0 has no value
assertEquals(1, values.nextDoc());
assertEquals(7, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(18, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedLongFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.LONG, reverse);
sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", 18));
doc.add(new SortedNumericDocValuesField("foo", 27));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", 7));
doc.add(new SortedNumericDocValuesField("foo", 24));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingLongLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.LONG, reverse);
sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
// docID 0 is missing
assertEquals(1, values.nextDoc());
assertEquals(18, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(7, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(7, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(18, values.longValue());
}
assertEquals(NO_MORE_DOCS, values.nextDoc());
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedLongLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.LONG, reverse);
sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", 18));
doc.add(new SortedNumericDocValuesField("foo", 65));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", 7));
doc.add(new SortedNumericDocValuesField("foo", 34));
doc.add(new SortedNumericDocValuesField("foo", 74));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testBasicInt() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(0, values.nextDoc());
assertEquals(-1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(7, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(18, values.longValue());
r.close();
w.close();
dir.close();
}
public void testBasicMultiValuedInt() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", 18));
doc.add(new SortedNumericDocValuesField("foo", 34));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", -1));
doc.add(new SortedNumericDocValuesField("foo", 34));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", 7));
doc.add(new SortedNumericDocValuesField("foo", 22));
doc.add(new SortedNumericDocValuesField("foo", 27));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
r.close();
w.close();
dir.close();
}
public void testMissingIntFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.INT, reverse);
sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(18, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(7, values.longValue());
} else {
assertEquals(1, values.nextDoc());
assertEquals(7, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(18, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedIntFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.INT, reverse);
sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", 18));
doc.add(new SortedNumericDocValuesField("foo", 187667));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", 7));
doc.add(new SortedNumericDocValuesField("foo", 34));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingIntLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.INT, reverse);
sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
// docID 0 is missing
assertEquals(1, values.nextDoc());
assertEquals(18, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(7, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(7, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(18, values.longValue());
}
assertEquals(NO_MORE_DOCS, values.nextDoc());
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedIntLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.INT, reverse);
sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", 18));
doc.add(new SortedNumericDocValuesField("foo", 6372));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", 7));
doc.add(new SortedNumericDocValuesField("foo", 8));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testBasicDouble() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", -1.0));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(0, values.nextDoc());
assertEquals(-1.0, Double.longBitsToDouble(values.longValue()), 0.0);
assertEquals(1, values.nextDoc());
assertEquals(7.0, Double.longBitsToDouble(values.longValue()), 0.0);
assertEquals(2, values.nextDoc());
assertEquals(18.0, Double.longBitsToDouble(values.longValue()), 0.0);
r.close();
w.close();
dir.close();
}
public void testBasicMultiValuedDouble() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.DOUBLE));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.54)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(27.0)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(-1.0)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(0.0)));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.0)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.67)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
r.close();
w.close();
dir.close();
}
public void testMissingDoubleFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.DOUBLE, reverse);
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(18.0, Double.longBitsToDouble(values.longValue()), 0.0);
assertEquals(1, values.nextDoc());
assertEquals(7.0, Double.longBitsToDouble(values.longValue()), 0.0);
} else {
assertEquals(1, values.nextDoc());
assertEquals(7.0, Double.longBitsToDouble(values.longValue()), 0.0);
assertEquals(2, values.nextDoc());
assertEquals(18.0, Double.longBitsToDouble(values.longValue()), 0.0);
}
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedDoubleFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.DOUBLE, reverse);
sortField.setMissingValue(Double.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(18.0)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(18.76)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.0)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(70.0)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingDoubleLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.DOUBLE, reverse);
sortField.setMissingValue(Double.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
assertEquals(1, values.nextDoc());
assertEquals(18.0, Double.longBitsToDouble(values.longValue()), 0.0);
assertEquals(2, values.nextDoc());
assertEquals(7.0, Double.longBitsToDouble(values.longValue()), 0.0);
} else {
assertEquals(0, values.nextDoc());
assertEquals(7.0, Double.longBitsToDouble(values.longValue()), 0.0);
assertEquals(1, values.nextDoc());
assertEquals(18.0, Double.longBitsToDouble(values.longValue()), 0.0);
}
assertEquals(NO_MORE_DOCS, values.nextDoc());
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedDoubleLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.DOUBLE, reverse);
sortField.setMissingValue(Double.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(18.0)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(8262.0)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.0)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.87)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testBasicFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", -1.0f));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(0, values.nextDoc());
assertEquals(-1.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
assertEquals(1, values.nextDoc());
assertEquals(7.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
assertEquals(2, values.nextDoc());
assertEquals(18.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
r.close();
w.close();
dir.close();
}
public void testBasicMultiValuedFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.FLOAT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(29.0f)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(-1.0f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(34.0f)));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(7.0f)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
r.close();
w.close();
dir.close();
}
public void testMissingFloatFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.FLOAT, reverse);
sortField.setMissingValue(Float.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(18.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
assertEquals(1, values.nextDoc());
assertEquals(7.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
} else {
assertEquals(1, values.nextDoc());
assertEquals(7.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
assertEquals(2, values.nextDoc());
assertEquals(18.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
}
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedFloatFirst() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.FLOAT, reverse);
sortField.setMissingValue(Float.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(726.0f)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(7.0f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testMissingFloatLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.FLOAT, reverse);
sortField.setMissingValue(Float.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
w.addDocument(new Document());
w.commit();
doc = new Document();
doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
if (reverse) {
assertEquals(1, values.nextDoc());
assertEquals(18.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
assertEquals(2, values.nextDoc());
assertEquals(7.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
} else {
assertEquals(0, values.nextDoc());
assertEquals(7.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
assertEquals(1, values.nextDoc());
assertEquals(18.0f, Float.intBitsToFloat((int) values.longValue()), 0.0f);
}
assertEquals(NO_MORE_DOCS, values.nextDoc());
r.close();
w.close();
dir.close();
}
}
public void testMissingMultiValuedFloatLast() throws Exception {
for (boolean reverse : new boolean[] {true, false}) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.FLOAT, reverse);
sortField.setMissingValue(Float.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(726.0f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(12.67f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(7.0f)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
if (reverse) {
assertEquals(0, values.nextDoc());
assertEquals(3, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(1, values.longValue());
} else {
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
}
r.close();
w.close();
dir.close();
}
}
public void testRandom1() throws IOException {
boolean withDeletes = random().nextBoolean();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
final int numDocs = atLeast(200);
final FixedBitSet deleted = new FixedBitSet(numDocs);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
w.addDocument(doc);
if (random().nextInt(5) == 0) {
w.getReader().close();
} else if (random().nextInt(30) == 0) {
w.forceMerge(2);
} else if (random().nextInt(4) == 0) {
final int id = TestUtil.nextInt(random(), 0, i);
deleted.set(id);
w.deleteDocuments(new Term("id", Integer.toString(id)));
}
}
// Check that segments are sorted
DirectoryReader reader = w.getReader();
for (LeafReaderContext ctx : reader.leaves()) {
final SegmentReader leaf = (SegmentReader) ctx.reader();
SegmentInfo info = leaf.getSegmentInfo().info;
switch (info.getDiagnostics().get(IndexWriter.SOURCE)) {
case IndexWriter.SOURCE_FLUSH:
case IndexWriter.SOURCE_MERGE:
assertEquals(indexSort, info.getIndexSort());
final NumericDocValues values = leaf.getNumericDocValues("foo");
long previous = Long.MIN_VALUE;
for (int i = 0; i < leaf.maxDoc(); ++i) {
assertEquals(i, values.nextDoc());
final long value = values.longValue();
assertTrue(value >= previous);
previous = value;
}
break;
default:
fail();
}
}
// Now check that the index is consistent
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numDocs; ++i) {
TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
final TopDocs topDocs = searcher.search(termQuery, 1);
if (deleted.get(i)) {
assertEquals(0, topDocs.totalHits.value);
} else {
assertEquals(1, topDocs.totalHits.value);
NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
assertEquals(i, values.longValue());
Document document = reader.document(topDocs.scoreDocs[0].doc);
assertEquals(Integer.toString(i), document.get("id"));
}
}
reader.close();
w.close();
dir.close();
}
public void testMultiValuedRandom1() throws IOException {
boolean withDeletes = random().nextBoolean();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
final int numDocs = atLeast(200);
final FixedBitSet deleted = new FixedBitSet(numDocs);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
int num = random().nextInt(10);
for (int j = 0; j < num; j++) {
doc.add(new SortedNumericDocValuesField("foo", random().nextInt(2000)));
}
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
w.addDocument(doc);
if (random().nextInt(5) == 0) {
w.getReader().close();
} else if (random().nextInt(30) == 0) {
w.forceMerge(2);
} else if (random().nextInt(4) == 0) {
final int id = TestUtil.nextInt(random(), 0, i);
deleted.set(id);
w.deleteDocuments(new Term("id", Integer.toString(id)));
}
}
DirectoryReader reader = w.getReader();
// Now check that the index is consistent
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numDocs; ++i) {
TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
final TopDocs topDocs = searcher.search(termQuery, 1);
if (deleted.get(i)) {
assertEquals(0, topDocs.totalHits.value);
} else {
assertEquals(1, topDocs.totalHits.value);
NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
assertEquals(i, values.longValue());
Document document = reader.document(topDocs.scoreDocs[0].doc);
assertEquals(Integer.toString(i), document.get("id"));
}
}
reader.close();
w.close();
dir.close();
}
static class UpdateRunnable implements Runnable {
private final int numDocs;
private final Random random;
private final AtomicInteger updateCount;
private final IndexWriter w;
private final Map<Integer, Long> values;
private final CountDownLatch latch;
UpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map<Integer, Long> values) {
this.numDocs = numDocs;
this.random = random;
this.latch = latch;
this.updateCount = updateCount;
this.w = w;
this.values = values;
}
@Override
public void run() {
try {
latch.await();
while (updateCount.decrementAndGet() >= 0) {
final int id = random.nextInt(numDocs);
final long value = random.nextInt(20);
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(id), Store.NO));
doc.add(new NumericDocValuesField("foo", value));
synchronized (values) {
w.updateDocument(new Term("id", Integer.toString(id)), doc);
values.put(id, value);
}
switch (random.nextInt(10)) {
case 0:
case 1:
// reopen
DirectoryReader.open(w).close();
break;
case 2:
w.forceMerge(3);
break;
}
}
} catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}
}
// There is tricky logic to resolve deletes that happened while merging
public void testConcurrentUpdates() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Map<Integer, Long> values = new HashMap<>();
final int numDocs = atLeast(100);
Thread[] threads = new Thread[2];
final AtomicInteger updateCount = new AtomicInteger(atLeast(1000));
final CountDownLatch latch = new CountDownLatch(1);
for (int i = 0; i < threads.length; ++i) {
Random r = new Random(random().nextLong());
threads[i] = new Thread(new UpdateRunnable(numDocs, r, latch, updateCount, w, values));
}
for (Thread thread : threads) {
thread.start();
}
latch.countDown();
for (Thread thread : threads) {
thread.join();
}
w.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numDocs; ++i) {
final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
if (values.containsKey(i) == false) {
assertEquals(0, topDocs.totalHits.value);
} else {
assertEquals(1, topDocs.totalHits.value);
NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "foo");
int docID = topDocs.scoreDocs[0].doc;
assertEquals(docID, dvs.advance(docID));
assertEquals(values.get(i).longValue(), dvs.longValue());
}
}
reader.close();
w.close();
dir.close();
}
// docvalues fields involved in the index sort cannot be updated
public void testBadDVUpdate() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new StringField("id", newBytesRef("0"), Store.NO));
doc.add(new NumericDocValuesField("foo", random().nextInt()));
w.addDocument(doc);
w.commit();
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class,
() -> w.updateDocValues(new Term("id", "0"), new NumericDocValuesField("foo", -1)));
assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
exc = expectThrows(IllegalArgumentException.class,
() -> w.updateNumericDocValue(new Term("id", "0"), "foo", -1));
assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
w.close();
dir.close();
}
static class DVUpdateRunnable implements Runnable {
private final int numDocs;
private final Random random;
private final AtomicInteger updateCount;
private final IndexWriter w;
private final Map<Integer, Long> values;
private final CountDownLatch latch;
DVUpdateRunnable(int numDocs, Random random, CountDownLatch latch, AtomicInteger updateCount, IndexWriter w, Map<Integer, Long> values) {
this.numDocs = numDocs;
this.random = random;
this.latch = latch;
this.updateCount = updateCount;
this.w = w;
this.values = values;
}
@Override
public void run() {
try {
latch.await();
while (updateCount.decrementAndGet() >= 0) {
final int id = random.nextInt(numDocs);
final long value = random.nextInt(20);
synchronized (values) {
w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("bar", value));
values.put(id, value);
}
switch (random.nextInt(10)) {
case 0:
case 1:
// reopen
DirectoryReader.open(w).close();
break;
case 2:
w.forceMerge(3);
break;
}
}
} catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}
}
// There is tricky logic to resolve dv updates that happened while merging
public void testConcurrentDVUpdates() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Map<Integer, Long> values = new HashMap<>();
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.NO));
doc.add(new NumericDocValuesField("foo", random().nextInt()));
doc.add(new NumericDocValuesField("bar", -1));
w.addDocument(doc);
values.put(i, -1L);
}
Thread[] threads = new Thread[2];
final AtomicInteger updateCount = new AtomicInteger(atLeast(1000));
final CountDownLatch latch = new CountDownLatch(1);
for (int i = 0; i < threads.length; ++i) {
Random r = new Random(random().nextLong());
threads[i] = new Thread(new DVUpdateRunnable(numDocs, r, latch, updateCount, w, values));
}
for (Thread thread : threads) {
thread.start();
}
latch.countDown();
for (Thread thread : threads) {
thread.join();
}
w.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numDocs; ++i) {
final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
assertEquals(1, topDocs.totalHits.value);
NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "bar");
int hitDoc = topDocs.scoreDocs[0].doc;
assertEquals(hitDoc, dvs.advance(hitDoc));
assertEquals(values.get(i).longValue(), dvs.longValue());
}
reader.close();
w.close();
dir.close();
}
public void testBadAddIndexes() throws Exception {
Directory dir = newDirectory();
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
IndexWriterConfig iwc1 = newIndexWriterConfig();
iwc1.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc1);
w.addDocument(new Document());
List<Sort> indexSorts = Arrays.asList(null, new Sort(new SortField("bar", SortField.Type.LONG)));
for (Sort sort : indexSorts) {
Directory dir2 = newDirectory();
IndexWriterConfig iwc2 = newIndexWriterConfig();
if (sort != null) {
iwc2.setIndexSort(sort);
}
IndexWriter w2 = new IndexWriter(dir2, iwc2);
w2.addDocument(new Document());
final IndexReader reader = w2.getReader();
w2.close();
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w.addIndexes(dir2));
assertThat(expected.getMessage(), containsString("cannot change index sort"));
CodecReader[] codecReaders = new CodecReader[reader.leaves().size()];
for (int i = 0; i < codecReaders.length; ++i) {
codecReaders[i] = (CodecReader) reader.leaves().get(i).reader();
}
expected = expectThrows(IllegalArgumentException.class, () -> w.addIndexes(codecReaders));
assertThat(expected.getMessage(), containsString("cannot change index sort"));
reader.close();
dir2.close();
}
w.close();
dir.close();
}
public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc1 = newIndexWriterConfig();
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG), new SortField("bar", SortField.Type.LONG));
iwc1.setIndexSort(indexSort);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc1);
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.NO));
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
doc.add(new NumericDocValuesField("bar", random().nextInt(20)));
w.addDocument(doc);
}
if (withDeletes) {
for (int i = random().nextInt(5); i < numDocs; i += TestUtil.nextInt(random(), 1, 5)) {
w.deleteDocuments(new Term("id", Integer.toString(i)));
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader reader = w.getReader();
w.close();
Directory dir2 = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
if (indexSort != null && random().nextBoolean()) {
// test congruent index sort
iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.LONG)));
} else {
iwc.setIndexSort(indexSort);
}
IndexWriter w2 = new IndexWriter(dir2, iwc);
if (useReaders) {
CodecReader[] codecReaders = new CodecReader[reader.leaves().size()];
for (int i = 0; i < codecReaders.length; ++i) {
codecReaders[i] = (CodecReader) reader.leaves().get(i).reader();
}
w2.addIndexes(codecReaders);
} else {
w2.addIndexes(dir);
}
final IndexReader reader2 = w2.getReader();
final IndexSearcher searcher = newSearcher(reader);
final IndexSearcher searcher2 = newSearcher(reader2);
for (int i = 0; i < numDocs; ++i) {
Query query = new TermQuery(new Term("id", Integer.toString(i)));
final TopDocs topDocs = searcher.search(query, 1);
final TopDocs topDocs2 = searcher2.search(query, 1);
assertEquals(topDocs.totalHits.value, topDocs2.totalHits.value);
if (topDocs.totalHits.value == 1) {
NumericDocValues dvs1 = MultiDocValues.getNumericValues(reader, "foo");
int hitDoc1 = topDocs.scoreDocs[0].doc;
assertEquals(hitDoc1, dvs1.advance(hitDoc1));
long value1 = dvs1.longValue();
NumericDocValues dvs2 = MultiDocValues.getNumericValues(reader2, "foo");
int hitDoc2 = topDocs2.scoreDocs[0].doc;
assertEquals(hitDoc2, dvs2.advance(hitDoc2));
long value2 = dvs2.longValue();
assertEquals(value1, value2);
}
}
IOUtils.close(reader, reader2, w2, dir, dir2);
}
public void testAddIndexes() throws Exception {
testAddIndexes(false, true);
}
public void testAddIndexesWithDeletions() throws Exception {
testAddIndexes(true, true);
}
public void testAddIndexesWithDirectory() throws Exception {
testAddIndexes(false, false);
}
public void testAddIndexesWithDeletionsAndDirectory() throws Exception {
testAddIndexes(true, false);
}
public void testBadSort() throws Exception {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
iwc.setIndexSort(Sort.RELEVANCE);
});
assertEquals("Cannot sort index with sort field <score>", expected.getMessage());
}
// you can't change the index sort on an existing index:
public void testIllegalChangeSort() throws Exception {
final Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.LONG)));
IndexWriter w = new IndexWriter(dir, iwc);
w.addDocument(new Document());
DirectoryReader.open(w).close();
w.addDocument(new Document());
w.forceMerge(1);
w.close();
final IndexWriterConfig iwc2 = new IndexWriterConfig(new MockAnalyzer(random()));
iwc2.setIndexSort(new Sort(new SortField("bar", SortField.Type.LONG)));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
new IndexWriter(dir, iwc2);
});
String message = e.getMessage();
assertTrue(message.contains("cannot change previous indexSort=<long: \"foo\">"));
assertTrue(message.contains("to new indexSort=<long: \"bar\">"));
dir.close();
}
static final class NormsSimilarity extends Similarity {
private final Similarity in;
public NormsSimilarity(Similarity in) {
this.in = in;
}
@Override
public long computeNorm(FieldInvertState state) {
if (state.getName().equals("norms")) {
return state.getLength();
} else {
return in.computeNorm(state);
}
}
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return in.scorer(boost, collectionStats, termStats);
}
}
static final class PositionsTokenStream extends TokenStream {
private final CharTermAttribute term;
private final PayloadAttribute payload;
private final OffsetAttribute offset;
private int pos, off;
public PositionsTokenStream() {
term = addAttribute(CharTermAttribute.class);
payload = addAttribute(PayloadAttribute.class);
offset = addAttribute(OffsetAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (pos == 0) {
return false;
}
clearAttributes();
term.append("#all#");
payload.setPayload(newBytesRef(Integer.toString(pos)));
offset.setOffset(off, off);
--pos;
++off;
return true;
}
void setId(int id) {
pos = id / 10 + 1;
off = 0;
}
}
public void testRandom2() throws Exception {
int numDocs = atLeast(100);
FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
POSITIONS_TYPE.freeze();
FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
TERM_VECTORS_TYPE.setStoreTermVectors(true);
TERM_VECTORS_TYPE.freeze();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
List<Document> docs = new ArrayList<>();
for (int i=0;i<numDocs;i++) {
int id = i * 10;
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(id), Store.YES));
doc.add(new StringField("docs", "#all#", Store.NO));
PositionsTokenStream positions = new PositionsTokenStream();
positions.setId(id);
doc.add(new Field("positions", positions, POSITIONS_TYPE));
doc.add(new NumericDocValuesField("numeric", id));
String value = IntStream.range(0, id).mapToObj(k -> Integer.toString(id)).collect(Collectors.joining(" "));
TextField norms = new TextField("norms", value, Store.NO);
doc.add(norms);
doc.add(new BinaryDocValuesField("binary", newBytesRef(Integer.toString(id))));
doc.add(new SortedDocValuesField("sorted", newBytesRef(Integer.toString(id))));
doc.add(
new SortedSetDocValuesField("multi_valued_string", newBytesRef(Integer.toString(id))));
doc.add(
new SortedSetDocValuesField(
"multi_valued_string", newBytesRef(Integer.toString(id + 1))));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id + 1));
doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
byte[] bytes = new byte[4];
NumericUtils.intToSortableBytes(id, bytes, 0);
doc.add(new BinaryPoint("points", bytes));
docs.add(doc);
}
// Must use the same seed for both RandomIndexWriters so they behave identically
long seed = random().nextLong();
// We add document alread in ID order for the first writer:
Directory dir1 = newFSDirectory(createTempDir());
Random random1 = new Random(seed);
IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field
// preserve docIDs
iwc1.setMergePolicy(newLogMergePolicy());
if (VERBOSE) {
System.out.println("TEST: now index pre-sorted");
}
RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
for(Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
w1.addDocument(doc);
}
// We shuffle documents, but set index sort, for the second writer:
Directory dir2 = newFSDirectory(createTempDir());
Random random2 = new Random(seed);
IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field
Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
iwc2.setIndexSort(sort);
Collections.shuffle(docs, random());
if (VERBOSE) {
System.out.println("TEST: now index with index-time sorting");
}
RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
int count = 0;
int commitAtCount = TestUtil.nextInt(random(), 1, numDocs-1);
for(Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
if (count++ == commitAtCount) {
// Ensure forceMerge really does merge
w2.commit();
}
w2.addDocument(doc);
}
if (VERBOSE) {
System.out.println("TEST: now force merge");
}
w2.forceMerge(1);
DirectoryReader r1 = w1.getReader();
DirectoryReader r2 = w2.getReader();
if (VERBOSE) {
System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
}
assertEquals(sort, getOnlyLeafReader(r2).getMetaData().getSort());
assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
IOUtils.close(w1, w2, r1, r2, dir1, dir2);
}
private static final class RandomDoc {
public final int id;
public final int intValue;
public final int[] intValues;
public final long longValue;
public final long[] longValues;
public final float floatValue;
public final float[] floatValues;
public final double doubleValue;
public final double[] doubleValues;
public final byte[] bytesValue;
public final byte[][] bytesValues;
public RandomDoc(int id) {
this.id = id;
intValue = random().nextInt();
longValue = random().nextLong();
floatValue = random().nextFloat();
doubleValue = random().nextDouble();
bytesValue = new byte[TestUtil.nextInt(random(), 1, 50)];
random().nextBytes(bytesValue);
int numValues = random().nextInt(10);
intValues = new int[numValues];
longValues = new long[numValues];
floatValues = new float[numValues];
doubleValues = new double[numValues];
bytesValues = new byte[numValues][];
for (int i = 0; i < numValues; i++) {
intValues[i] = random().nextInt();
longValues[i] = random().nextLong();
floatValues[i] = random().nextFloat();
doubleValues[i] = random().nextDouble();
bytesValues[i] = new byte[TestUtil.nextInt(random(), 1, 50)];
random().nextBytes(bytesValue);
}
}
}
private static SortField randomIndexSortField() {
boolean reversed = random().nextBoolean();
SortField sortField;
switch(random().nextInt(10)) {
case 0:
sortField = new SortField("int", SortField.Type.INT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextInt());
}
break;
case 1:
sortField = new SortedNumericSortField("multi_valued_int", SortField.Type.INT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextInt());
}
break;
case 2:
sortField = new SortField("long", SortField.Type.LONG, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextLong());
}
break;
case 3:
sortField = new SortedNumericSortField("multi_valued_long", SortField.Type.LONG, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextLong());
}
break;
case 4:
sortField = new SortField("float", SortField.Type.FLOAT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextFloat());
}
break;
case 5:
sortField = new SortedNumericSortField("multi_valued_float", SortField.Type.FLOAT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextFloat());
}
break;
case 6:
sortField = new SortField("double", SortField.Type.DOUBLE, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextDouble());
}
break;
case 7:
sortField = new SortedNumericSortField("multi_valued_double", SortField.Type.DOUBLE, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextDouble());
}
break;
case 8:
sortField = new SortField("bytes", SortField.Type.STRING, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(SortField.STRING_LAST);
}
break;
case 9:
sortField = new SortedSetSortField("multi_valued_bytes", reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(SortField.STRING_LAST);
}
break;
default:
sortField = null;
fail();
}
return sortField;
}
private static Sort randomSort() {
// at least 2
int numFields = TestUtil.nextInt(random(), 2, 4);
SortField[] sortFields = new SortField[numFields];
for(int i=0;i<numFields-1;i++) {
SortField sortField = randomIndexSortField();
sortFields[i] = sortField;
}
// tie-break by id:
sortFields[numFields-1] = new SortField("id", SortField.Type.INT);
return new Sort(sortFields);
}
// pits index time sorting against query time sorting
public void testRandom3() throws Exception {
int numDocs = atLeast(1000);
List<RandomDoc> docs = new ArrayList<>();
Sort sort = randomSort();
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort);
}
// no index sorting, all search-time sorting:
Directory dir1 = newFSDirectory(createTempDir());
IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w1 = new IndexWriter(dir1, iwc1);
// use index sorting:
Directory dir2 = newFSDirectory(createTempDir());
IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
iwc2.setIndexSort(sort);
IndexWriter w2 = new IndexWriter(dir2, iwc2);
Set<Integer> toDelete = new HashSet<>();
double deleteChance = random().nextDouble();
for(int id=0;id<numDocs;id++) {
RandomDoc docValues = new RandomDoc(id);
docs.add(docValues);
if (VERBOSE) {
System.out.println("TEST: doc id=" + id);
System.out.println(" int=" + docValues.intValue);
System.out.println(" long=" + docValues.longValue);
System.out.println(" float=" + docValues.floatValue);
System.out.println(" double=" + docValues.doubleValue);
System.out.println(" bytes=" + newBytesRef(docValues.bytesValue));
System.out.println(" mvf=" + Arrays.toString(docValues.floatValues));
}
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
doc.add(new NumericDocValuesField("id", id));
doc.add(new NumericDocValuesField("int", docValues.intValue));
doc.add(new NumericDocValuesField("long", docValues.longValue));
doc.add(new DoubleDocValuesField("double", docValues.doubleValue));
doc.add(new FloatDocValuesField("float", docValues.floatValue));
doc.add(new SortedDocValuesField("bytes", newBytesRef(docValues.bytesValue)));
for (int value : docValues.intValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_int", value));
}
for (long value : docValues.longValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_long", value));
}
for (float value : docValues.floatValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_float", NumericUtils.floatToSortableInt(value)));
}
for (double value : docValues.doubleValues) {
doc.add(new SortedNumericDocValuesField("multi_valued_double", NumericUtils.doubleToSortableLong(value)));
}
for (byte[] value : docValues.bytesValues) {
doc.add(new SortedSetDocValuesField("multi_valued_bytes", newBytesRef(value)));
}
w1.addDocument(doc);
w2.addDocument(doc);
if (random().nextDouble() < deleteChance) {
toDelete.add(id);
}
}
for(int id : toDelete) {
w1.deleteDocuments(new Term("id", Integer.toString(id)));
w2.deleteDocuments(new Term("id", Integer.toString(id)));
}
DirectoryReader r1 = DirectoryReader.open(w1);
IndexSearcher s1 = newSearcher(r1);
if (random().nextBoolean()) {
int maxSegmentCount = TestUtil.nextInt(random(), 1, 5);
if (VERBOSE) {
System.out.println("TEST: now forceMerge(" + maxSegmentCount + ")");
}
w2.forceMerge(maxSegmentCount);
}
DirectoryReader r2 = DirectoryReader.open(w2);
IndexSearcher s2 = newSearcher(r2);
/*
System.out.println("TEST: full index:");
SortedDocValues docValues = MultiDocValues.getSortedValues(r2, "bytes");
for(int i=0;i<r2.maxDoc();i++) {
System.out.println(" doc " + i + " id=" + r2.document(i).get("id") + " bytes=" + docValues.get(i));
}
*/
for(int iter=0;iter<100;iter++) {
int numHits = TestUtil.nextInt(random(), 1, numDocs);
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " numHits=" + numHits);
}
TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, Integer.MAX_VALUE);
s1.search(new MatchAllDocsQuery(), c1);
TopDocs hits1 = c1.topDocs();
TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, 1);
s2.search(new MatchAllDocsQuery(), c2);
TopDocs hits2 = c2.topDocs();
if (VERBOSE) {
System.out.println(" topDocs query-time sort: totalHits=" + hits1.totalHits.value);
for(ScoreDoc scoreDoc : hits1.scoreDocs) {
System.out.println(" " + scoreDoc.doc);
}
System.out.println(" topDocs index-time sort: totalHits=" + hits2.totalHits.value);
for(ScoreDoc scoreDoc : hits2.scoreDocs) {
System.out.println(" " + scoreDoc.doc);
}
}
assertEquals(hits2.scoreDocs.length, hits1.scoreDocs.length);
for(int i=0;i<hits2.scoreDocs.length;i++) {
ScoreDoc hit1 = hits1.scoreDocs[i];
ScoreDoc hit2 = hits2.scoreDocs[i];
assertEquals(r1.document(hit1.doc).get("id"), r2.document(hit2.doc).get("id"));
assertArrayEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields);
}
}
IOUtils.close(r1, r2, w1, w2, dir1, dir2);
}
public void testTieBreak() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.STRING)));
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter w = new IndexWriter(dir, iwc);
for(int id=0;id<1000;id++) {
Document doc = new Document();
doc.add(new StoredField("id", id));
String value;
if (id < 500) {
value = "bar2";
} else {
value = "bar1";
}
doc.add(new SortedDocValuesField("foo", newBytesRef(value)));
w.addDocument(doc);
if (id == 500) {
w.commit();
}
}
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
for(int docID=0;docID<1000;docID++) {
int expectedID;
if (docID < 500) {
expectedID = 500 + docID;
} else {
expectedID = docID - 500;
}
assertEquals(expectedID, r.document(docID).getField("id").numericValue().intValue());
}
IOUtils.close(r, w, dir);
}
public void testIndexSortWithSparseField() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("dense_int", SortField.Type.INT, true);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Field textField = newTextField("sparse_text", "", Field.Store.NO);
for (int i = 0; i < 128; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("dense_int", i));
if (i < 64) {
doc.add(new NumericDocValuesField("sparse_int", i));
doc.add(new BinaryDocValuesField("sparse_binary", newBytesRef(Integer.toString(i))));
textField.setStringValue("foo");
doc.add(textField);
}
w.addDocument(doc);
}
w.commit();
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.leaves().size());
LeafReader leafReader = r.leaves().get(0).reader();
NumericDocValues denseValues = leafReader.getNumericDocValues("dense_int");
NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse_int");
BinaryDocValues sparseBinaryValues = leafReader.getBinaryDocValues("sparse_binary");
NumericDocValues normsValues = leafReader.getNormValues("sparse_text");
for(int docID = 0; docID < 128; docID++) {
assertTrue(denseValues.advanceExact(docID));
assertEquals(127-docID, (int) denseValues.longValue());
if (docID >= 64) {
assertTrue(denseValues.advanceExact(docID));
assertTrue(sparseValues.advanceExact(docID));
assertTrue(sparseBinaryValues.advanceExact(docID));
assertTrue(normsValues.advanceExact(docID));
assertEquals(1, normsValues.longValue());
assertEquals(127 - docID, (int) sparseValues.longValue());
assertEquals(newBytesRef(Integer.toString(127 - docID)), sparseBinaryValues.binaryValue());
} else {
assertFalse(sparseBinaryValues.advanceExact(docID));
assertFalse(sparseValues.advanceExact(docID));
assertFalse(normsValues.advanceExact(docID));
}
}
IOUtils.close(r, w, dir);
}
public void testIndexSortOnSparseField() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("sparse", SortField.Type.INT, false);
sortField.setMissingValue(Integer.MIN_VALUE);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
for (int i = 0; i < 128; i++) {
Document doc = new Document();
if (i < 64) {
doc.add(new NumericDocValuesField("sparse", i));
}
w.addDocument(doc);
}
w.commit();
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.leaves().size());
LeafReader leafReader = r.leaves().get(0).reader();
NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse");
for(int docID = 0; docID < 128; docID++) {
if (docID >= 64) {
assertTrue(sparseValues.advanceExact(docID));
assertEquals(docID-64, (int) sparseValues.longValue());
} else {
assertFalse(sparseValues.advanceExact(docID));
}
}
IOUtils.close(r, w, dir);
}
public void testWrongSortFieldType() throws Exception {
Directory dir = newDirectory();
List<Field> dvs = new ArrayList<>();
dvs.add(new SortedDocValuesField("field", newBytesRef("")));
dvs.add(new SortedSetDocValuesField("field", newBytesRef("")));
dvs.add(new NumericDocValuesField("field", 42));
dvs.add(new SortedNumericDocValuesField("field", 42));
List<SortField> sortFields = new ArrayList<>();
sortFields.add(new SortField("field", SortField.Type.STRING));
sortFields.add(new SortedSetSortField("field", false));
sortFields.add(new SortField("field", SortField.Type.INT));
sortFields.add(new SortedNumericSortField("field", SortField.Type.INT));
for (int i = 0; i < sortFields.size(); i++) {
for (int j = 0; j < dvs.size(); j++) {
if (i == j) {
continue;
}
Sort indexSort = new Sort(sortFields.get(i));
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(dvs.get(j));
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertThat(exc.getMessage(), containsString("expected field [field] to be "));
doc.clear();
doc.add(dvs.get(i));
w.addDocument(doc);
doc.add(dvs.get(j));
exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertThat(exc.getMessage(), containsString("cannot change DocValues type"));
w.rollback();
IOUtils.close(w);
}
}
IOUtils.close(dir);
}
public void testDeleteByTermOrQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig();
config.setIndexSort(new Sort(new SortField("numeric", SortField.Type.LONG)));
IndexWriter w = new IndexWriter(dir, config);
Document doc = new Document();
int numDocs = random().nextInt(2000) + 5;
long[] expectedValues = new long[numDocs];
for (int i = 0; i < numDocs; i++) {
expectedValues[i] = random().nextInt(Integer.MAX_VALUE);
doc.clear();
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("numeric", expectedValues[i]));
w.addDocument(doc);
}
int numDeleted = random().nextInt(numDocs) + 1;
for (int i = 0; i < numDeleted; i++) {
int idToDelete = random().nextInt(numDocs);
if (random().nextBoolean()) {
w.deleteDocuments(new TermQuery(new Term("id", Integer.toString(idToDelete))));
} else {
w.deleteDocuments(new Term("id", Integer.toString(idToDelete)));
}
expectedValues[idToDelete] = -random().nextInt(Integer.MAX_VALUE); // force a reordering
doc.clear();
doc.add(new StringField("id", Integer.toString(idToDelete), Store.YES));
doc.add(new NumericDocValuesField("numeric", expectedValues[idToDelete]));
w.addDocument(doc);
}
int docCount = 0;
try (IndexReader reader = DirectoryReader.open(w)) {
for (LeafReaderContext leafCtx : reader.leaves()) {
final Bits liveDocs = leafCtx.reader().getLiveDocs();
final NumericDocValues values = leafCtx.reader().getNumericDocValues("numeric");
if (values == null) {
continue;
}
for (int id = 0; id < leafCtx.reader().maxDoc(); id++) {
if (liveDocs != null && liveDocs.get(id) == false) {
continue;
}
if (values.advanceExact(id) == false) {
continue;
}
int globalId = Integer.parseInt(leafCtx.reader().document(id).getField("id").stringValue());
assertTrue(values.advanceExact(id));
assertEquals(expectedValues[globalId], values.longValue());
docCount ++;
}
}
assertEquals(docCount, numDocs);
}
w.close();
dir.close();
}
}