blob: bee1720c21220109d27003fe1d20bda917d171c3 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
public class TestOmitNorms extends LuceneTestCase {
// Tests whether the DocumentWriter correctly enable the
// omitNorms bit in the FieldInfo
public void testOmitNorms() throws Exception {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer));
Document d = new Document();
// this field will have norms
Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
d.add(f1);
// this field will NOT have norms
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setOmitNorms(true);
Field f2 = newField("f2", "This field has NO norms in all docs", customType);
d.add(f2);
writer.addDocument(d);
writer.forceMerge(1);
// now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
// keep things constant
d = new Document();
// Reverse
d.add(newField("f1", "This field has norms", customType));
d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));
writer.addDocument(d);
// force merge
writer.forceMerge(1);
// flush
writer.close();
LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
FieldInfos fi = reader.getFieldInfos();
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());
reader.close();
ram.close();
}
// Tests whether merging of docs that have different
// omitNorms for the same field works
public void testMixedMerge() throws Exception {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter writer = new IndexWriter(
ram,
newIndexWriterConfig(analyzer)
.setMaxBufferedDocs(3)
.setMergePolicy(newLogMergePolicy(2))
);
Document d = new Document();
// this field will have norms
Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
d.add(f1);
// this field will NOT have norms
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setOmitNorms(true);
Field f2 = newField("f2", "This field has NO norms in all docs", customType);
d.add(f2);
for (int i = 0; i < 30; i++) {
writer.addDocument(d);
}
// now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
// keep things constant
d = new Document();
// Reverese
d.add(newField("f1", "This field has norms", customType));
d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));
for (int i = 0; i < 30; i++) {
writer.addDocument(d);
}
// force merge
writer.forceMerge(1);
// flush
writer.close();
LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
FieldInfos fi = reader.getFieldInfos();
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());
reader.close();
ram.close();
}
// Make sure first adding docs that do not omitNorms for
// field X, then adding docs that do omitNorms for that same
// field,
public void testMixedRAM() throws Exception {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter writer = new IndexWriter(
ram,
newIndexWriterConfig(analyzer)
.setMaxBufferedDocs(10)
.setMergePolicy(newLogMergePolicy(2))
);
Document d = new Document();
// this field will have norms
Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
d.add(f1);
// this field will NOT have norms
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setOmitNorms(true);
Field f2 = newField("f2", "This field has NO norms in all docs", customType);
d.add(f2);
for (int i = 0; i < 5; i++) {
writer.addDocument(d);
}
for (int i = 0; i < 20; i++) {
writer.addDocument(d);
}
// force merge
writer.forceMerge(1);
// flush
writer.close();
LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
FieldInfos fi = reader.getFieldInfos();
assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitsNorms());
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());
reader.close();
ram.close();
}
private void assertNoNrm(Directory dir) throws Throwable {
final String[] files = dir.listAll();
for (int i = 0; i < files.length; i++) {
// TODO: this relies upon filenames
assertFalse(files[i].endsWith(".nrm") || files[i].endsWith(".len"));
}
}
// Verifies no *.nrm exists when all fields omit norms:
public void testNoNrmFile() throws Throwable {
Directory ram = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)
.setMaxBufferedDocs(3)
.setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
lmp.setMergeFactor(2);
lmp.setNoCFSRatio(0.0);
Document d = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setOmitNorms(true);
Field f1 = newField("f1", "This field has no norms", customType);
d.add(f1);
for (int i = 0; i < 30; i++) {
writer.addDocument(d);
}
writer.commit();
assertNoNrm(ram);
// force merge
writer.forceMerge(1);
// flush
writer.close();
assertNoNrm(ram);
ram.close();
}
/**
* Tests various combinations of omitNorms=true/false, the field not existing at all,
* ensuring that only omitNorms is 'viral'.
* Internally checks that MultiNorms.norms() is consistent (returns the same bytes)
* as the fully merged equivalent.
*/
public void testOmitNormsCombos() throws IOException {
// indexed with norms
FieldType customType = new FieldType(TextField.TYPE_STORED);
Field norms = new Field("foo", "a", customType);
// indexed without norms
FieldType customType1 = new FieldType(TextField.TYPE_STORED);
customType1.setOmitNorms(true);
Field noNorms = new Field("foo", "a", customType1);
// not indexed, but stored
FieldType customType2 = new FieldType();
customType2.setStored(true);
Field noIndex = new Field("foo", "a", customType2);
// not indexed but stored, omitNorms is set
FieldType customType3 = new FieldType();
customType3.setStored(true);
customType3.setOmitNorms(true);
Field noNormsNoIndex = new Field("foo", "a", customType3);
// not indexed nor stored (doesnt exist at all, we index a different field instead)
Field emptyNorms = new Field("bar", "a", customType);
assertNotNull(getNorms("foo", norms, norms));
assertNull(getNorms("foo", norms, noNorms));
assertNotNull(getNorms("foo", norms, noIndex));
assertNotNull(getNorms("foo", norms, noNormsNoIndex));
assertNotNull(getNorms("foo", norms, emptyNorms));
assertNull(getNorms("foo", noNorms, noNorms));
assertNull(getNorms("foo", noNorms, noIndex));
assertNull(getNorms("foo", noNorms, noNormsNoIndex));
assertNull(getNorms("foo", noNorms, emptyNorms));
assertNull(getNorms("foo", noIndex, noIndex));
assertNull(getNorms("foo", noIndex, noNormsNoIndex));
assertNull(getNorms("foo", noIndex, emptyNorms));
assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex));
assertNull(getNorms("foo", noNormsNoIndex, emptyNorms));
assertNull(getNorms("foo", emptyNorms, emptyNorms));
}
/**
* Indexes at least 1 document with f1, and at least 1 document with f2.
* returns the norms for "field".
*/
NumericDocValues getNorms(String field, Field f1, Field f2) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy());
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
// add f1
Document d = new Document();
d.add(f1);
riw.addDocument(d);
// add f2
d = new Document();
d.add(f2);
riw.addDocument(d);
// add a mix of f1's and f2's
int numExtraDocs = TestUtil.nextInt(random(), 1, 1000);
for (int i = 0; i < numExtraDocs; i++) {
d = new Document();
d.add(random().nextBoolean() ? f1 : f2);
riw.addDocument(d);
}
IndexReader ir1 = riw.getReader();
// todo: generalize
NumericDocValues norms1 = MultiDocValues.getNormValues(ir1, field);
// fully merge and validate MultiNorms against single segment.
riw.forceMerge(1);
DirectoryReader ir2 = riw.getReader();
NumericDocValues norms2 = getOnlyLeafReader(ir2).getNormValues(field);
if (norms1 == null) {
assertNull(norms2);
} else {
while (true) {
int norms1DocID = norms1.nextDoc();
int norms2DocID = norms2.nextDoc();
while (norms1DocID < norms2DocID) {
assertEquals(0, norms1.longValue());
norms1DocID = norms1.nextDoc();
}
while (norms2DocID < norms1DocID) {
assertEquals(0, norms2.longValue());
norms2DocID = norms2.nextDoc();
}
if (norms1.docID() == NO_MORE_DOCS) {
break;
}
assertEquals(norms1.longValue(), norms2.longValue());
}
}
ir1.close();
ir2.close();
riw.close();
dir.close();
return norms1;
}
}