lucene/core/src/test/org/apache/lucene/index/TestOmitNorms.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.io.IOException;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;

 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

 public class TestOmitNorms extends LuceneTestCase {
   // Tests whether the DocumentWriter correctly enable the
   // omitNorms bit in the FieldInfo
   public void testOmitNorms() throws Exception {
     Directory ram = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer));
     Document d = new Document();

     // this field will have norms
     Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
     d.add(f1);

     // this field will NOT have norms
     FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
     customType.setOmitNorms(true);
     Field f2 = newField("f2", "This field has NO norms in all docs", customType);
     d.add(f2);

     writer.addDocument(d);
     writer.forceMerge(1);
     // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
     // keep things constant
     d = new Document();

     // Reverse
     d.add(newField("f1", "This field has norms", customType));

     d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

     writer.addDocument(d);

     // force merge
     writer.forceMerge(1);
     // flush
     writer.close();

     LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
     FieldInfos fi = reader.getFieldInfos();
     assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
     assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

     reader.close();
     ram.close();
   }

   // Tests whether merging of docs that have different
   // omitNorms for the same field works
   public void testMixedMerge() throws Exception {
     Directory ram = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriter writer = new IndexWriter(
         ram,
         newIndexWriterConfig(analyzer)
            .setMaxBufferedDocs(3)
            .setMergePolicy(newLogMergePolicy(2))
     );
     Document d = new Document();

     // this field will have norms
     Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
     d.add(f1);

     // this field will NOT have norms
     FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
     customType.setOmitNorms(true);
     Field f2 = newField("f2", "This field has NO norms in all docs", customType);
     d.add(f2);

     for (int i = 0; i < 30; i++) {
       writer.addDocument(d);
     }

     // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
     // keep things constant
     d = new Document();

     // Reverese
     d.add(newField("f1", "This field has norms", customType));

     d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

     for (int i = 0; i < 30; i++) {
       writer.addDocument(d);
     }

     // force merge
     writer.forceMerge(1);
     // flush
     writer.close();

     LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
     FieldInfos fi = reader.getFieldInfos();
     assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
     assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

     reader.close();
     ram.close();
   }

   // Make sure first adding docs that do not omitNorms for
   // field X, then adding docs that do omitNorms for that same
   // field,
   public void testMixedRAM() throws Exception {
     Directory ram = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriter writer = new IndexWriter(
         ram,
         newIndexWriterConfig(analyzer)
             .setMaxBufferedDocs(10)
             .setMergePolicy(newLogMergePolicy(2))
     );
     Document d = new Document();

     // this field will have norms
     Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
     d.add(f1);

     // this field will NOT have norms

     FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
     customType.setOmitNorms(true);
     Field f2 = newField("f2", "This field has NO norms in all docs", customType);
     d.add(f2);

     for (int i = 0; i < 5; i++) {
       writer.addDocument(d);
     }

     for (int i = 0; i < 20; i++) {
       writer.addDocument(d);
     }

     // force merge
     writer.forceMerge(1);

     // flush
     writer.close();

     LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
     FieldInfos fi = reader.getFieldInfos();
     assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitsNorms());
     assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

     reader.close();
     ram.close();
   }

   private void assertNoNrm(Directory dir) throws Throwable {
     final String[] files = dir.listAll();
     for (int i = 0; i < files.length; i++) {
       // TODO: this relies upon filenames
       assertFalse(files[i].endsWith(".nrm") || files[i].endsWith(".len"));
     }
   }

   // Verifies no *.nrm exists when all fields omit norms:
   public void testNoNrmFile() throws Throwable {
     Directory ram = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)
                                                 .setMaxBufferedDocs(3)
                                                 .setMergePolicy(newLogMergePolicy()));
     LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
     lmp.setMergeFactor(2);
     lmp.setNoCFSRatio(0.0);
     Document d = new Document();

     FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
     customType.setOmitNorms(true);
     Field f1 = newField("f1", "This field has no norms", customType);
     d.add(f1);

     for (int i = 0; i < 30; i++) {
       writer.addDocument(d);
     }

     writer.commit();

     assertNoNrm(ram);

     // force merge
     writer.forceMerge(1);
     // flush
     writer.close();

     assertNoNrm(ram);
     ram.close();
   }

   /**
    * Tests various combinations of omitNorms=true/false, the field not existing at all,
    * ensuring that only omitNorms is 'viral'.
    * Internally checks that MultiNorms.norms() is consistent (returns the same bytes)
    * as the fully merged equivalent.
    */
   public void testOmitNormsCombos() throws IOException {
     // indexed with norms
     FieldType customType = new FieldType(TextField.TYPE_STORED);
     Field norms = new Field("foo", "a", customType);
     // indexed without norms
     FieldType customType1 = new FieldType(TextField.TYPE_STORED);
     customType1.setOmitNorms(true);
     Field noNorms = new Field("foo", "a", customType1);
     // not indexed, but stored
     FieldType customType2 = new FieldType();
     customType2.setStored(true);
     Field noIndex = new Field("foo", "a", customType2);
     // not indexed but stored, omitNorms is set
     FieldType customType3 = new FieldType();
     customType3.setStored(true);
     customType3.setOmitNorms(true);
     Field noNormsNoIndex = new Field("foo", "a", customType3);
     // not indexed nor stored (doesnt exist at all, we index a different field instead)
     Field emptyNorms = new Field("bar", "a", customType);

     assertNotNull(getNorms("foo", norms, norms));
     assertNull(getNorms("foo", norms, noNorms));
     assertNotNull(getNorms("foo", norms, noIndex));
     assertNotNull(getNorms("foo", norms, noNormsNoIndex));
     assertNotNull(getNorms("foo", norms, emptyNorms));
     assertNull(getNorms("foo", noNorms, noNorms));
     assertNull(getNorms("foo", noNorms, noIndex));
     assertNull(getNorms("foo", noNorms, noNormsNoIndex));
     assertNull(getNorms("foo", noNorms, emptyNorms));
     assertNull(getNorms("foo", noIndex, noIndex));
     assertNull(getNorms("foo", noIndex, noNormsNoIndex));
     assertNull(getNorms("foo", noIndex, emptyNorms));
     assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex));
     assertNull(getNorms("foo", noNormsNoIndex, emptyNorms));
     assertNull(getNorms("foo", emptyNorms, emptyNorms));
   }

   /**
    * Indexes at least 1 document with f1, and at least 1 document with f2.
    * returns the norms for "field".
    */
   NumericDocValues getNorms(String field, Field f1, Field f2) throws IOException {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
                               .setMergePolicy(newLogMergePolicy());
     RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);

     // add f1
     Document d = new Document();
     d.add(f1);
     riw.addDocument(d);

     // add f2
     d = new Document();
     d.add(f2);
     riw.addDocument(d);

     // add a mix of f1's and f2's
     int numExtraDocs = TestUtil.nextInt(random(), 1, 1000);
     for (int i = 0; i < numExtraDocs; i++) {
       d = new Document();
       d.add(random().nextBoolean() ? f1 : f2);
       riw.addDocument(d);
     }

     IndexReader ir1 = riw.getReader();
     // todo: generalize
     NumericDocValues norms1 = MultiDocValues.getNormValues(ir1, field);

     // fully merge and validate MultiNorms against single segment.
     riw.forceMerge(1);
     DirectoryReader ir2 = riw.getReader();
     NumericDocValues norms2 = getOnlyLeafReader(ir2).getNormValues(field);

     if (norms1 == null) {
       assertNull(norms2);
     } else {
       while (true) {
         int norms1DocID = norms1.nextDoc();
         int norms2DocID = norms2.nextDoc();
         while (norms1DocID < norms2DocID) {
           assertEquals(0, norms1.longValue());
           norms1DocID = norms1.nextDoc();
         }
         while (norms2DocID < norms1DocID) {
           assertEquals(0, norms2.longValue());
           norms2DocID = norms2.nextDoc();
         }
         if (norms1.docID() == NO_MORE_DOCS) {
           break;
         }
         assertEquals(norms1.longValue(), norms2.longValue());
       }
     }
     ir1.close();
     ir2.close();
     riw.close();
     dir.close();
     return norms1;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.io.IOException;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.FieldType;
	import org.apache.lucene.document.TextField;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util.TestUtil;

	import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

	public class TestOmitNorms extends LuceneTestCase {
	// Tests whether the DocumentWriter correctly enable the
	// omitNorms bit in the FieldInfo
	public void testOmitNorms() throws Exception {
	Directory ram = newDirectory();
	Analyzer analyzer = new MockAnalyzer(random());
	IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer));
	Document d = new Document();

	// this field will have norms
	Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
	d.add(f1);

	// this field will NOT have norms
	FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
	customType.setOmitNorms(true);
	Field f2 = newField("f2", "This field has NO norms in all docs", customType);
	d.add(f2);

	writer.addDocument(d);
	writer.forceMerge(1);
	// now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
	// keep things constant
	d = new Document();

	// Reverse
	d.add(newField("f1", "This field has norms", customType));

	d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

	writer.addDocument(d);

	// force merge
	writer.forceMerge(1);
	// flush
	writer.close();

	LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
	FieldInfos fi = reader.getFieldInfos();
	assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
	assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

	reader.close();
	ram.close();
	}

	// Tests whether merging of docs that have different
	// omitNorms for the same field works
	public void testMixedMerge() throws Exception {
	Directory ram = newDirectory();
	Analyzer analyzer = new MockAnalyzer(random());
	IndexWriter writer = new IndexWriter(
	ram,
	newIndexWriterConfig(analyzer)
	.setMaxBufferedDocs(3)
	.setMergePolicy(newLogMergePolicy(2))
	);
	Document d = new Document();

	// this field will have norms
	Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
	d.add(f1);

	// this field will NOT have norms
	FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
	customType.setOmitNorms(true);
	Field f2 = newField("f2", "This field has NO norms in all docs", customType);
	d.add(f2);

	for (int i = 0; i < 30; i++) {
	writer.addDocument(d);
	}

	// now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
	// keep things constant
	d = new Document();

	// Reverese
	d.add(newField("f1", "This field has norms", customType));

	d.add(newTextField("f2", "This field has NO norms in all docs", Field.Store.NO));

	for (int i = 0; i < 30; i++) {
	writer.addDocument(d);
	}

	// force merge
	writer.forceMerge(1);
	// flush
	writer.close();

	LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
	FieldInfos fi = reader.getFieldInfos();
	assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitsNorms());
	assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

	reader.close();
	ram.close();
	}

	// Make sure first adding docs that do not omitNorms for
	// field X, then adding docs that do omitNorms for that same
	// field,
	public void testMixedRAM() throws Exception {
	Directory ram = newDirectory();
	Analyzer analyzer = new MockAnalyzer(random());
	IndexWriter writer = new IndexWriter(
	ram,
	newIndexWriterConfig(analyzer)
	.setMaxBufferedDocs(10)
	.setMergePolicy(newLogMergePolicy(2))
	);
	Document d = new Document();

	// this field will have norms
	Field f1 = newTextField("f1", "This field has norms", Field.Store.NO);
	d.add(f1);

	// this field will NOT have norms

	FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
	customType.setOmitNorms(true);
	Field f2 = newField("f2", "This field has NO norms in all docs", customType);
	d.add(f2);

	for (int i = 0; i < 5; i++) {
	writer.addDocument(d);
	}

	for (int i = 0; i < 20; i++) {
	writer.addDocument(d);
	}

	// force merge
	writer.forceMerge(1);

	// flush
	writer.close();

	LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram));
	FieldInfos fi = reader.getFieldInfos();
	assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitsNorms());
	assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitsNorms());

	reader.close();
	ram.close();
	}

	private void assertNoNrm(Directory dir) throws Throwable {
	final String[] files = dir.listAll();
	for (int i = 0; i < files.length; i++) {
	// TODO: this relies upon filenames
	assertFalse(files[i].endsWith(".nrm") \|\| files[i].endsWith(".len"));
	}
	}

	// Verifies no *.nrm exists when all fields omit norms:
	public void testNoNrmFile() throws Throwable {
	Directory ram = newDirectory();
	Analyzer analyzer = new MockAnalyzer(random());
	IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)
	.setMaxBufferedDocs(3)
	.setMergePolicy(newLogMergePolicy()));
	LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
	lmp.setMergeFactor(2);
	lmp.setNoCFSRatio(0.0);
	Document d = new Document();

	FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
	customType.setOmitNorms(true);
	Field f1 = newField("f1", "This field has no norms", customType);
	d.add(f1);

	for (int i = 0; i < 30; i++) {
	writer.addDocument(d);
	}

	writer.commit();

	assertNoNrm(ram);

	// force merge
	writer.forceMerge(1);
	// flush
	writer.close();

	assertNoNrm(ram);
	ram.close();
	}

	/**
	* Tests various combinations of omitNorms=true/false, the field not existing at all,
	* ensuring that only omitNorms is 'viral'.
	* Internally checks that MultiNorms.norms() is consistent (returns the same bytes)
	* as the fully merged equivalent.
	*/
	public void testOmitNormsCombos() throws IOException {
	// indexed with norms
	FieldType customType = new FieldType(TextField.TYPE_STORED);
	Field norms = new Field("foo", "a", customType);
	// indexed without norms
	FieldType customType1 = new FieldType(TextField.TYPE_STORED);
	customType1.setOmitNorms(true);
	Field noNorms = new Field("foo", "a", customType1);
	// not indexed, but stored
	FieldType customType2 = new FieldType();
	customType2.setStored(true);
	Field noIndex = new Field("foo", "a", customType2);
	// not indexed but stored, omitNorms is set
	FieldType customType3 = new FieldType();
	customType3.setStored(true);
	customType3.setOmitNorms(true);
	Field noNormsNoIndex = new Field("foo", "a", customType3);
	// not indexed nor stored (doesnt exist at all, we index a different field instead)
	Field emptyNorms = new Field("bar", "a", customType);

	assertNotNull(getNorms("foo", norms, norms));
	assertNull(getNorms("foo", norms, noNorms));
	assertNotNull(getNorms("foo", norms, noIndex));
	assertNotNull(getNorms("foo", norms, noNormsNoIndex));
	assertNotNull(getNorms("foo", norms, emptyNorms));
	assertNull(getNorms("foo", noNorms, noNorms));
	assertNull(getNorms("foo", noNorms, noIndex));
	assertNull(getNorms("foo", noNorms, noNormsNoIndex));
	assertNull(getNorms("foo", noNorms, emptyNorms));
	assertNull(getNorms("foo", noIndex, noIndex));
	assertNull(getNorms("foo", noIndex, noNormsNoIndex));
	assertNull(getNorms("foo", noIndex, emptyNorms));
	assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex));
	assertNull(getNorms("foo", noNormsNoIndex, emptyNorms));
	assertNull(getNorms("foo", emptyNorms, emptyNorms));
	}

	/**
	* Indexes at least 1 document with f1, and at least 1 document with f2.
	* returns the norms for "field".
	*/
	NumericDocValues getNorms(String field, Field f1, Field f2) throws IOException {
	Directory dir = newDirectory();
	IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
	.setMergePolicy(newLogMergePolicy());
	RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);

	// add f1
	Document d = new Document();
	d.add(f1);
	riw.addDocument(d);

	// add f2
	d = new Document();
	d.add(f2);
	riw.addDocument(d);

	// add a mix of f1's and f2's
	int numExtraDocs = TestUtil.nextInt(random(), 1, 1000);
	for (int i = 0; i < numExtraDocs; i++) {
	d = new Document();
	d.add(random().nextBoolean() ? f1 : f2);
	riw.addDocument(d);
	}

	IndexReader ir1 = riw.getReader();
	// todo: generalize
	NumericDocValues norms1 = MultiDocValues.getNormValues(ir1, field);

	// fully merge and validate MultiNorms against single segment.
	riw.forceMerge(1);
	DirectoryReader ir2 = riw.getReader();
	NumericDocValues norms2 = getOnlyLeafReader(ir2).getNormValues(field);

	if (norms1 == null) {
	assertNull(norms2);
	} else {
	while (true) {
	int norms1DocID = norms1.nextDoc();
	int norms2DocID = norms2.nextDoc();
	while (norms1DocID < norms2DocID) {
	assertEquals(0, norms1.longValue());
	norms1DocID = norms1.nextDoc();
	}
	while (norms2DocID < norms1DocID) {
	assertEquals(0, norms2.longValue());
	norms2DocID = norms2.nextDoc();
	}
	if (norms1.docID() == NO_MORE_DOCS) {
	break;
	}
	assertEquals(norms1.longValue(), norms2.longValue());
	}
	}
	ir1.close();
	ir2.close();
	riw.close();
	dir.close();
	return norms1;
	}
	}