| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.TestUtil; |
| |
| import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| |
| public class TestOmitNorms extends LuceneTestCase { |
| // Tests whether the DocumentWriter correctly enable the |
| // omitNorms bit in the FieldInfo |
| public void testOmitNorms() throws Exception { |
| Directory ram = newDirectory(); |
| Analyzer analyzer = new MockAnalyzer(random()); |
| IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)); |
| Document d = new Document(); |
| |
| // this field will have norms |
| Field f1 = newTextField("f1", "This field has norms", Field.Store.NO); |
| d.add(f1); |
| |
| // this field will NOT have norms |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setOmitNorms(true); |
| Field f2 = newField("f2", "This field has NO norms in all docs", customType); |
| d.add(f2); |
| |
| writer.addDocument(d); |
| writer.forceMerge(1); |
| // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger |
| // keep things constant |
| d = new Document(); |
| |
| // Reverse |
| d.add(newField("f1", "This field has norms", customType)); |
| |
| d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO)); |
| |
| writer.addDocument(d); |
| |
| // force merge |
| writer.forceMerge(1); |
| // flush |
| writer.close(); |
| |
| LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram)); |
| FieldInfos fi = reader.getFieldInfos(); |
| assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms()); |
| assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms()); |
| |
| reader.close(); |
| ram.close(); |
| } |
| |
| // Tests whether merging of docs that have different |
| // omitNorms for the same field works |
| public void testMixedMerge() throws Exception { |
| Directory ram = newDirectory(); |
| Analyzer analyzer = new MockAnalyzer(random()); |
| IndexWriter writer = new IndexWriter( |
| ram, |
| newIndexWriterConfig(analyzer) |
| .setMaxBufferedDocs(3) |
| .setMergePolicy(newLogMergePolicy(2)) |
| ); |
| Document d = new Document(); |
| |
| // this field will have norms |
| Field f1 = newTextField("f1", "This field has norms", Field.Store.NO); |
| d.add(f1); |
| |
| // this field will NOT have norms |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setOmitNorms(true); |
| Field f2 = newField("f2", "This field has NO norms in all docs", customType); |
| d.add(f2); |
| |
| for (int i = 0; i < 30; i++) { |
| writer.addDocument(d); |
| } |
| |
| // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger |
| // keep things constant |
| d = new Document(); |
| |
| // Reverese |
| d.add(newField("f1", "This field has norms", customType)); |
| |
| d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO)); |
| |
| for (int i = 0; i < 30; i++) { |
| writer.addDocument(d); |
| } |
| |
| // force merge |
| writer.forceMerge(1); |
| // flush |
| writer.close(); |
| |
| LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram)); |
| FieldInfos fi = reader.getFieldInfos(); |
| assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms()); |
| assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms()); |
| |
| reader.close(); |
| ram.close(); |
| } |
| |
| // Make sure first adding docs that do not omitNorms for |
| // field X, then adding docs that do omitNorms for that same |
| // field, |
| public void testMixedRAM() throws Exception { |
| Directory ram = newDirectory(); |
| Analyzer analyzer = new MockAnalyzer(random()); |
| IndexWriter writer = new IndexWriter( |
| ram, |
| newIndexWriterConfig(analyzer) |
| .setMaxBufferedDocs(10) |
| .setMergePolicy(newLogMergePolicy(2)) |
| ); |
| Document d = new Document(); |
| |
| // this field will have norms |
| Field f1 = newTextField("f1", "This field has norms", Field.Store.NO); |
| d.add(f1); |
| |
| // this field will NOT have norms |
| |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setOmitNorms(true); |
| Field f2 = newField("f2", "This field has NO norms in all docs", customType); |
| d.add(f2); |
| |
| for (int i = 0; i < 5; i++) { |
| writer.addDocument(d); |
| } |
| |
| for (int i = 0; i < 20; i++) { |
| writer.addDocument(d); |
| } |
| |
| // force merge |
| writer.forceMerge(1); |
| |
| // flush |
| writer.close(); |
| |
| LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram)); |
| FieldInfos fi = reader.getFieldInfos(); |
| assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitsNorms()); |
| assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms()); |
| |
| reader.close(); |
| ram.close(); |
| } |
| |
| private void assertNoNrm(Directory dir) throws Throwable { |
| final String[] files = dir.listAll(); |
| for (int i = 0; i < files.length; i++) { |
| // TODO: this relies upon filenames |
| assertFalse(files[i].endsWith(".nrm") || files[i].endsWith(".len")); |
| } |
| } |
| |
| // Verifies no *.nrm exists when all fields omit norms: |
| public void testNoNrmFile() throws Throwable { |
| Directory ram = newDirectory(); |
| Analyzer analyzer = new MockAnalyzer(random()); |
| IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer) |
| .setMaxBufferedDocs(3) |
| .setMergePolicy(newLogMergePolicy())); |
| LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); |
| lmp.setMergeFactor(2); |
| lmp.setNoCFSRatio(0.0); |
| Document d = new Document(); |
| |
| FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); |
| customType.setOmitNorms(true); |
| Field f1 = newField("f1", "This field has no norms", customType); |
| d.add(f1); |
| |
| for (int i = 0; i < 30; i++) { |
| writer.addDocument(d); |
| } |
| |
| writer.commit(); |
| |
| assertNoNrm(ram); |
| |
| // force merge |
| writer.forceMerge(1); |
| // flush |
| writer.close(); |
| |
| assertNoNrm(ram); |
| ram.close(); |
| } |
| |
| /** |
| * Tests various combinations of omitNorms=true/false, the field not existing at all, |
| * ensuring that only omitNorms is 'viral'. |
| * Internally checks that MultiNorms.norms() is consistent (returns the same bytes) |
| * as the fully merged equivalent. |
| */ |
| public void testOmitNormsCombos() throws IOException { |
| // indexed with norms |
| FieldType customType = new FieldType(TextField.TYPE_STORED); |
| Field norms = new Field("foo", "a", customType); |
| // indexed without norms |
| FieldType customType1 = new FieldType(TextField.TYPE_STORED); |
| customType1.setOmitNorms(true); |
| Field noNorms = new Field("foo", "a", customType1); |
| // not indexed, but stored |
| FieldType customType2 = new FieldType(); |
| customType2.setStored(true); |
| Field noIndex = new Field("foo", "a", customType2); |
| // not indexed but stored, omitNorms is set |
| FieldType customType3 = new FieldType(); |
| customType3.setStored(true); |
| customType3.setOmitNorms(true); |
| Field noNormsNoIndex = new Field("foo", "a", customType3); |
| // not indexed nor stored (doesnt exist at all, we index a different field instead) |
| Field emptyNorms = new Field("bar", "a", customType); |
| |
| assertNotNull(getNorms("foo", norms, norms)); |
| assertNull(getNorms("foo", norms, noNorms)); |
| assertNotNull(getNorms("foo", norms, noIndex)); |
| assertNotNull(getNorms("foo", norms, noNormsNoIndex)); |
| assertNotNull(getNorms("foo", norms, emptyNorms)); |
| assertNull(getNorms("foo", noNorms, noNorms)); |
| assertNull(getNorms("foo", noNorms, noIndex)); |
| assertNull(getNorms("foo", noNorms, noNormsNoIndex)); |
| assertNull(getNorms("foo", noNorms, emptyNorms)); |
| assertNull(getNorms("foo", noIndex, noIndex)); |
| assertNull(getNorms("foo", noIndex, noNormsNoIndex)); |
| assertNull(getNorms("foo", noIndex, emptyNorms)); |
| assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex)); |
| assertNull(getNorms("foo", noNormsNoIndex, emptyNorms)); |
| assertNull(getNorms("foo", emptyNorms, emptyNorms)); |
| } |
| |
| /** |
| * Indexes at least 1 document with f1, and at least 1 document with f2. |
| * returns the norms for "field". |
| */ |
| NumericDocValues getNorms(String field, Field f1, Field f2) throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMergePolicy(newLogMergePolicy()); |
| RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc); |
| |
| // add f1 |
| Document d = new Document(); |
| d.add(f1); |
| riw.addDocument(d); |
| |
| // add f2 |
| d = new Document(); |
| d.add(f2); |
| riw.addDocument(d); |
| |
| // add a mix of f1's and f2's |
| int numExtraDocs = TestUtil.nextInt(random(), 1, 1000); |
| for (int i = 0; i < numExtraDocs; i++) { |
| d = new Document(); |
| d.add(random().nextBoolean() ? f1 : f2); |
| riw.addDocument(d); |
| } |
| |
| IndexReader ir1 = riw.getReader(); |
| // todo: generalize |
| NumericDocValues norms1 = MultiDocValues.getNormValues(ir1, field); |
| |
| // fully merge and validate MultiNorms against single segment. |
| riw.forceMerge(1); |
| DirectoryReader ir2 = riw.getReader(); |
| NumericDocValues norms2 = getOnlyLeafReader(ir2).getNormValues(field); |
| |
| if (norms1 == null) { |
| assertNull(norms2); |
| } else { |
| while (true) { |
| int norms1DocID = norms1.nextDoc(); |
| int norms2DocID = norms2.nextDoc(); |
| while (norms1DocID < norms2DocID) { |
| assertEquals(0, norms1.longValue()); |
| norms1DocID = norms1.nextDoc(); |
| } |
| while (norms2DocID < norms1DocID) { |
| assertEquals(0, norms2.longValue()); |
| norms2DocID = norms2.nextDoc(); |
| } |
| if (norms1.docID() == NO_MORE_DOCS) { |
| break; |
| } |
| assertEquals(norms1.longValue(), norms2.longValue()); |
| } |
| } |
| ir1.close(); |
| ir2.close(); |
| riw.close(); |
| dir.close(); |
| return norms1; |
| } |
| } |