blob: 280dd13c5f04510c6436830aadcb3da505ae4ff9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.perfield;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BaseDocValuesFormatTestCase;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomCodec;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
/**
* Basic tests of PerFieldDocValuesFormat
*/
public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
private Codec codec;
@Override
public void setUp() throws Exception {
codec = new RandomCodec(new Random(random().nextLong()), Collections.emptySet());
super.setUp();
}
@Override
protected Codec getCodec() {
return codec;
}
@Override
protected boolean codecAcceptsHugeBinaryValues(String field) {
return TestUtil.fieldSupportsHugeBinaryDocValues(field);
}
// just a simple trivial test
// TODO: we should come up with a test that somehow checks that segment suffix
// is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
final DocValuesFormat slow = DocValuesFormat.forName("Direct");
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if ("dv1".equals(field)) {
return fast;
} else {
return slow;
}
}
});
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
IndexReader ireader = DirectoryReader.open(directory); // read-only=true
IndexSearcher isearcher = newSearcher(ireader);
assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int hitDocID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(hitDocID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(hitDocID, dv.advance(hitDocID));
assertEquals(5, dv.longValue());
BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
assertEquals(hitDocID, dv2.advance(hitDocID));
final BytesRef term = dv2.binaryValue();
assertEquals(new BytesRef("hello world"), term);
}
ireader.close();
directory.close();
}
public void testMergeCalledOnTwoFormats() throws IOException {
MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
switch (field) {
case "dv1":
case "dv2":
return dvf1;
case "dv3":
return dvf2;
default:
return super.getDocValuesFormatForField(field);
}
}
});
Directory directory = newDirectory();
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new NumericDocValuesField("dv2", 42));
doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new NumericDocValuesField("dv1", 8));
doc.add(new NumericDocValuesField("dv2", 45));
doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1, true);
iwriter.close();
assertEquals(1, dvf1.nbMergeCalls);
assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames));
assertEquals(1, dvf2.nbMergeCalls);
assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames);
directory.close();
}
public void testDocValuesMergeWithIndexedFields() throws IOException {
MergeRecordingDocValueFormatWrapper docValuesFormat = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return docValuesFormat;
}
});
Directory directory = newDirectory();
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new TextField("normalField", "not a doc value", Field.Store.NO));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new TextField("anotherField", "again no doc values here", Field.Store.NO));
doc.add(new TextField("normalField", "my document without doc values", Field.Store.NO));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1, true);
iwriter.close();
// "normalField" and "anotherField" are ignored when merging doc values.
assertEquals(1, docValuesFormat.nbMergeCalls);
assertEquals(Collections.singletonList("dv1"), docValuesFormat.fieldNames);
directory.close();
}
private static final class MergeRecordingDocValueFormatWrapper extends DocValuesFormat {
private final DocValuesFormat delegate;
final List<String> fieldNames = new ArrayList<>();
volatile int nbMergeCalls = 0;
MergeRecordingDocValueFormatWrapper(DocValuesFormat delegate) {
super(delegate.getName());
this.delegate = delegate;
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
final DocValuesConsumer consumer = delegate.fieldsConsumer(state);
return new DocValuesConsumer() {
@Override
public void addNumericField(FieldInfo field, DocValuesProducer values) throws IOException {
consumer.addNumericField(field, values);
}
@Override
public void addBinaryField(FieldInfo field, DocValuesProducer values) throws IOException {
consumer.addBinaryField(field, values);
}
@Override
public void addSortedField(FieldInfo field, DocValuesProducer values) throws IOException {
consumer.addSortedField(field, values);
}
@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer values) throws IOException {
consumer.addSortedNumericField(field, values);
}
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer values) throws IOException {
consumer.addSortedSetField(field, values);
}
@Override
public void merge(MergeState mergeState) throws IOException {
nbMergeCalls++;
for (FieldInfo fi : mergeState.mergeFieldInfos) {
fieldNames.add(fi.name);
}
consumer.merge(mergeState);
}
@Override
public void close() throws IOException {
consumer.close();
}
};
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return delegate.fieldsProducer(state);
}
}
}