blob: 7d4d74f352ca5e483ef7671e47b0e6ca464988b4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
/** Tests MultiDocValues versus ordinary segment merging */
public class TestMultiDocValues extends LuceneTestCase {
public void testNumerics() throws Exception {
Directory dir = newDirectory();
Document doc = new Document();
Field field = new NumericDocValuesField("numbers", 0);
doc.add(field);
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
field.setLongValue(random().nextLong());
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers");
NumericDocValues single = merged.getNumericDocValues("numbers");
for (int i = 0; i < numDocs; i++) {
assertEquals(i, multi.nextDoc());
assertEquals(i, single.nextDoc());
assertEquals(single.longValue(), multi.longValue());
}
testRandomAdvance(merged.getNumericDocValues("numbers"), MultiDocValues.getNumericValues(ir, "numbers"));
testRandomAdvanceExact(merged.getNumericDocValues("numbers"), MultiDocValues.getNumericValues(ir, "numbers"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
public void testBinary() throws Exception {
Directory dir = newDirectory();
Document doc = new Document();
Field field = new BinaryDocValuesField("bytes", new BytesRef());
doc.add(field);
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
field.setBytesValue(ref);
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
BinaryDocValues multi = MultiDocValues.getBinaryValues(ir, "bytes");
BinaryDocValues single = merged.getBinaryDocValues("bytes");
for (int i = 0; i < numDocs; i++) {
assertEquals(i, multi.nextDoc());
assertEquals(i, single.nextDoc());
final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
final BytesRef actual = multi.binaryValue();
assertEquals(expected, actual);
}
testRandomAdvance(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"));
testRandomAdvanceExact(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
public void testSorted() throws Exception {
Directory dir = newDirectory();
Document doc = new Document();
Field field = new SortedDocValuesField("bytes", new BytesRef());
doc.add(field);
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
field.setBytesValue(ref);
if (random().nextInt(7) == 0) {
iw.addDocument(new Document());
}
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
SortedDocValues single = merged.getSortedDocValues("bytes");
assertEquals(single.getValueCount(), multi.getValueCount());
while (true) {
assertEquals(single.nextDoc(), multi.nextDoc());
if (single.docID() == NO_MORE_DOCS) {
break;
}
// check value
final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
final BytesRef actual = multi.binaryValue();
assertEquals(expected, actual);
// check ord
assertEquals(single.ordValue(), multi.ordValue());
}
testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
// tries to make more dups than testSorted
public void testSortedWithLotsOfDups() throws Exception {
Directory dir = newDirectory();
Document doc = new Document();
Field field = new SortedDocValuesField("bytes", new BytesRef());
doc.add(field);
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
BytesRef ref = new BytesRef(TestUtil.randomSimpleString(random(), 2));
field.setBytesValue(ref);
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
SortedDocValues single = merged.getSortedDocValues("bytes");
assertEquals(single.getValueCount(), multi.getValueCount());
for (int i = 0; i < numDocs; i++) {
assertEquals(i, multi.nextDoc());
assertEquals(i, single.nextDoc());
// check ord
assertEquals(single.ordValue(), multi.ordValue());
// check ord value
final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
final BytesRef actual = multi.binaryValue();
assertEquals(expected, actual);
}
testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
public void testSortedSet() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numValues = random().nextInt(5);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomUnicodeString(random()))));
}
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
if (multi == null) {
assertNull(single);
} else {
assertEquals(single.getValueCount(), multi.getValueCount());
// check values
for (long i = 0; i < single.getValueCount(); i++) {
final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
final BytesRef actual = multi.lookupOrd(i);
assertEquals(expected, actual);
}
// check ord list
while (true) {
int docID = single.nextDoc();
assertEquals(docID, multi.nextDoc());
if (docID == NO_MORE_DOCS) {
break;
}
ArrayList<Long> expectedList = new ArrayList<>();
long ord;
while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
expectedList.add(ord);
}
int upto = 0;
while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
assertEquals(expectedList.get(upto).longValue(), ord);
upto++;
}
assertEquals(expectedList.size(), upto);
}
}
testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
// tries to make more dups than testSortedSet
public void testSortedSetWithDups() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numValues = random().nextInt(5);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
}
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
if (multi == null) {
assertNull(single);
} else {
assertEquals(single.getValueCount(), multi.getValueCount());
// check values
for (long i = 0; i < single.getValueCount(); i++) {
final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
final BytesRef actual = multi.lookupOrd(i);
assertEquals(expected, actual);
}
// check ord list
while (true) {
int docID = single.nextDoc();
assertEquals(docID, multi.nextDoc());
if (docID == NO_MORE_DOCS) {
break;
}
ArrayList<Long> expectedList = new ArrayList<>();
long ord;
while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
expectedList.add(ord);
}
int upto = 0;
while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
assertEquals(expectedList.get(upto).longValue(), ord);
upto++;
}
assertEquals(expectedList.size(), upto);
}
}
testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
public void testSortedNumeric() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numValues = random().nextInt(5);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedNumericDocValuesField("nums", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
}
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedNumericDocValues multi = MultiDocValues.getSortedNumericValues(ir, "nums");
SortedNumericDocValues single = merged.getSortedNumericDocValues("nums");
if (multi == null) {
assertNull(single);
} else {
// check values
for (int i = 0; i < numDocs; i++) {
if (i > single.docID()) {
assertEquals(single.nextDoc(), multi.nextDoc());
}
if (i == single.docID()) {
assertEquals(single.docValueCount(), multi.docValueCount());
for (int j = 0; j < single.docValueCount(); j++) {
assertEquals(single.nextValue(), multi.nextValue());
}
}
}
}
testRandomAdvance(merged.getSortedNumericDocValues("nums"), MultiDocValues.getSortedNumericValues(ir, "nums"));
testRandomAdvanceExact(merged.getSortedNumericDocValues("nums"), MultiDocValues.getSortedNumericValues(ir, "nums"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
private void testRandomAdvance(DocIdSetIterator iter1, DocIdSetIterator iter2) throws IOException {
assertEquals(-1, iter1.docID());
assertEquals(-1, iter2.docID());
while (iter1.docID() != NO_MORE_DOCS) {
if (random().nextBoolean()) {
assertEquals(iter1.nextDoc(), iter2.nextDoc());
} else {
int target = iter1.docID() + TestUtil.nextInt(random(), 1, 100);
assertEquals(iter1.advance(target), iter2.advance(target));
}
}
}
private void testRandomAdvanceExact(DocValuesIterator iter1, DocValuesIterator iter2, int maxDoc) throws IOException {
for (int target = random().nextInt(Math.min(maxDoc, 10)); target < maxDoc; target += random().nextInt(10)) {
final boolean exists1 = iter1.advanceExact(target);
final boolean exists2 = iter2.advanceExact(target);
assertEquals(exists1, exists2);
}
}
}