lucene/backwards/src/test/org/apache/lucene/index/TestNorms.java - manifoldcf-integration-solr-3.x - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Random;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Index;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

 /**
  * Test that norms info is preserved during index life - including
  * separate norms, addDocument, addIndexes, optimize.
  */
 public class TestNorms extends LuceneTestCase {

   private class SimilarityOne extends DefaultSimilarity {
     @Override
     public float computeNorm(String fieldName, FieldInvertState state) {
       // Disable length norm
       return state.getBoost();
     }
   }

   private static final int NUM_FIELDS = 10;

   private Similarity similarityOne;
   private Analyzer anlzr;
   private int numDocNorms;
   private ArrayList<Float> norms;
   private ArrayList<Float> modifiedNorms;
   private float lastNorm = 0;
   private float normDelta = (float) 0.001;

   @Override
   public void setUp() throws Exception {
     super.setUp();
     similarityOne = new SimilarityOne();
     anlzr = new MockAnalyzer(random);
   }

   /**
    * Test that norms values are preserved as the index is maintained.
    * Including separate norms.
    * Including merging indexes with seprate norms.
    * Including optimize.
    */
   public void testNorms() throws IOException {
     Directory dir1 = newDirectory();

     norms = new ArrayList<Float>();
     modifiedNorms = new ArrayList<Float>();

     createIndex(random, dir1);
     doTestNorms(random, dir1);

     // test with a single index: index2
     ArrayList<Float> norms1 = norms;
     ArrayList<Float> modifiedNorms1 = modifiedNorms;
     int numDocNorms1 = numDocNorms;

     norms = new ArrayList<Float>();
     modifiedNorms = new ArrayList<Float>();
     numDocNorms = 0;

     Directory dir2 = newDirectory();

     createIndex(random, dir2);
     doTestNorms(random, dir2);

     // add index1 and index2 to a third index: index3
     Directory dir3 = newDirectory();

     createIndex(random, dir3);
     IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig(
         TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
                                      .setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
     iw.addIndexes(new Directory[]{dir1,dir2});
     iw.optimize();
     iw.close();

     norms1.addAll(norms);
     norms = norms1;
     modifiedNorms1.addAll(modifiedNorms);
     modifiedNorms = modifiedNorms1;
     numDocNorms += numDocNorms1;

     // test with index3
     verifyIndex(dir3);
     doTestNorms(random, dir3);

     // now with optimize
     iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT,
         anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
     iw.optimize();
     iw.close();
     verifyIndex(dir3);

     dir1.close();
     dir2.close();
     dir3.close();
   }

   private void doTestNorms(Random random, Directory dir) throws IOException {
     int num = atLeast(1);
     for (int i=0; i<num; i++) {
       addDocs(random, dir,12,true);
       verifyIndex(dir);
       modifyNormsForF1(dir);
       verifyIndex(dir);
       addDocs(random, dir,12,false);
       verifyIndex(dir);
       modifyNormsForF1(dir);
       verifyIndex(dir);
     }
   }

   private void createIndex(Random random, Directory dir) throws IOException {
     IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
         TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
         .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
     LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
     lmp.setMergeFactor(3);
     lmp.setUseCompoundFile(true);
     iw.close();
   }

   private void modifyNormsForF1(Directory dir) throws IOException {
     IndexReader ir = IndexReader.open(dir, false);
     int n = ir.maxDoc();
     for (int i = 0; i < n; i+=3) { // modify for every third doc
       int k = (i*3) % modifiedNorms.size();
       float origNorm = modifiedNorms.get(i).floatValue();
       float newNorm = modifiedNorms.get(k).floatValue();
       //System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
       //System.out.println("      and: for "+k+" from "+newNorm+" to "+origNorm);
       modifiedNorms.set(i, Float.valueOf(newNorm));
       modifiedNorms.set(k, Float.valueOf(origNorm));
       ir.setNorm(i, "f"+1, newNorm);
       ir.setNorm(k, "f"+1, origNorm);
     }
     ir.close();
   }


   private void verifyIndex(Directory dir) throws IOException {
     IndexReader ir = IndexReader.open(dir, false);
     for (int i = 0; i < NUM_FIELDS; i++) {
       String field = "f"+i;
       byte b[] = ir.norms(field);
       assertEquals("number of norms mismatches",numDocNorms,b.length);
       ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
       for (int j = 0; j < b.length; j++) {
         float norm = similarityOne.decodeNormValue(b[j]);
         float norm1 = storedNorms.get(j).floatValue();
         assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
       }
     }
     ir.close();
   }

   private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
     IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
         TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
         .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
     LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
     lmp.setMergeFactor(3);
     lmp.setUseCompoundFile(compound);
     for (int i = 0; i < ndocs; i++) {
       iw.addDocument(newDoc());
     }
     iw.close();
   }

   // create the next document
   private Document newDoc() {
     Document d = new Document();
     float boost = nextNorm();
     for (int i = 0; i < 10; i++) {
       Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
       f.setBoost(boost);
       d.add(f);
     }
     return d;
   }

   // return unique norm values that are unchanged by encoding/decoding
   private float nextNorm() {
     float norm = lastNorm + normDelta;
     do {
       float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
       if (norm1 > lastNorm) {
         //System.out.println(norm1+" > "+lastNorm);
         norm = norm1;
         break;
       }
       norm += normDelta;
     } while (true);
     norms.add(numDocNorms, Float.valueOf(norm));
     modifiedNorms.add(numDocNorms, Float.valueOf(norm));
     //System.out.println("creating norm("+numDocNorms+"): "+norm);
     numDocNorms ++;
     lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
     return norm;
   }

   class CustomNormEncodingSimilarity extends DefaultSimilarity {
     @Override
     public byte encodeNormValue(float f) {
       return (byte) f;
     }

     @Override
     public float decodeNormValue(byte b) {
       return (float) b;
     }

     @Override
     public float computeNorm(String field, FieldInvertState state) {
       return (float) state.getLength();
     }
   }

   // LUCENE-1260
   public void testCustomEncoder() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
     config.setSimilarity(new CustomNormEncodingSimilarity());
     RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
     Document doc = new Document();
     Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
     Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
     doc.add(foo);
     doc.add(bar);

     for (int i = 0; i < 100; i++) {
       bar.setValue("singleton");
       writer.addDocument(doc);
     }

     IndexReader reader = writer.getReader();
     writer.close();

     byte fooNorms[] = reader.norms("foo");
     for (int i = 0; i < reader.maxDoc(); i++)
       assertEquals(0, fooNorms[i]);

     byte barNorms[] = reader.norms("bar");
     for (int i = 0; i < reader.maxDoc(); i++)
       assertEquals(1, barNorms[i]);

     reader.close();
     dir.close();
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Random;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.Field.Index;
	import org.apache.lucene.document.Field.Store;
	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	import org.apache.lucene.search.DefaultSimilarity;
	import org.apache.lucene.search.Similarity;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.LuceneTestCase;

	/**
	* Test that norms info is preserved during index life - including
	* separate norms, addDocument, addIndexes, optimize.
	*/
	public class TestNorms extends LuceneTestCase {

	private class SimilarityOne extends DefaultSimilarity {
	@Override
	public float computeNorm(String fieldName, FieldInvertState state) {
	// Disable length norm
	return state.getBoost();
	}
	}

	private static final int NUM_FIELDS = 10;

	private Similarity similarityOne;
	private Analyzer anlzr;
	private int numDocNorms;
	private ArrayList<Float> norms;
	private ArrayList<Float> modifiedNorms;
	private float lastNorm = 0;
	private float normDelta = (float) 0.001;

	@Override
	public void setUp() throws Exception {
	super.setUp();
	similarityOne = new SimilarityOne();
	anlzr = new MockAnalyzer(random);
	}

	/**
	* Test that norms values are preserved as the index is maintained.
	* Including separate norms.
	* Including merging indexes with seprate norms.
	* Including optimize.
	*/
	public void testNorms() throws IOException {
	Directory dir1 = newDirectory();

	norms = new ArrayList<Float>();
	modifiedNorms = new ArrayList<Float>();

	createIndex(random, dir1);
	doTestNorms(random, dir1);

	// test with a single index: index2
	ArrayList<Float> norms1 = norms;
	ArrayList<Float> modifiedNorms1 = modifiedNorms;
	int numDocNorms1 = numDocNorms;

	norms = new ArrayList<Float>();
	modifiedNorms = new ArrayList<Float>();
	numDocNorms = 0;

	Directory dir2 = newDirectory();

	createIndex(random, dir2);
	doTestNorms(random, dir2);

	// add index1 and index2 to a third index: index3
	Directory dir3 = newDirectory();

	createIndex(random, dir3);
	IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig(
	TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
	.setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
	iw.addIndexes(new Directory[]{dir1,dir2});
	iw.optimize();
	iw.close();

	norms1.addAll(norms);
	norms = norms1;
	modifiedNorms1.addAll(modifiedNorms);
	modifiedNorms = modifiedNorms1;
	numDocNorms += numDocNorms1;

	// test with index3
	verifyIndex(dir3);
	doTestNorms(random, dir3);

	// now with optimize
	iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT,
	anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
	iw.optimize();
	iw.close();
	verifyIndex(dir3);

	dir1.close();
	dir2.close();
	dir3.close();
	}

	private void doTestNorms(Random random, Directory dir) throws IOException {
	int num = atLeast(1);
	for (int i=0; i<num; i++) {
	addDocs(random, dir,12,true);
	verifyIndex(dir);
	modifyNormsForF1(dir);
	verifyIndex(dir);
	addDocs(random, dir,12,false);
	verifyIndex(dir);
	modifyNormsForF1(dir);
	verifyIndex(dir);
	}
	}

	private void createIndex(Random random, Directory dir) throws IOException {
	IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
	TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
	.setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
	LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
	lmp.setMergeFactor(3);
	lmp.setUseCompoundFile(true);
	iw.close();
	}

	private void modifyNormsForF1(Directory dir) throws IOException {
	IndexReader ir = IndexReader.open(dir, false);
	int n = ir.maxDoc();
	for (int i = 0; i < n; i+=3) { // modify for every third doc
	int k = (i*3) % modifiedNorms.size();
	float origNorm = modifiedNorms.get(i).floatValue();
	float newNorm = modifiedNorms.get(k).floatValue();
	//System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
	//System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
	modifiedNorms.set(i, Float.valueOf(newNorm));
	modifiedNorms.set(k, Float.valueOf(origNorm));
	ir.setNorm(i, "f"+1, newNorm);
	ir.setNorm(k, "f"+1, origNorm);
	}
	ir.close();
	}


	private void verifyIndex(Directory dir) throws IOException {
	IndexReader ir = IndexReader.open(dir, false);
	for (int i = 0; i < NUM_FIELDS; i++) {
	String field = "f"+i;
	byte b[] = ir.norms(field);
	assertEquals("number of norms mismatches",numDocNorms,b.length);
	ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
	for (int j = 0; j < b.length; j++) {
	float norm = similarityOne.decodeNormValue(b[j]);
	float norm1 = storedNorms.get(j).floatValue();
	assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
	}
	}
	ir.close();
	}

	private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
	IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
	TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
	.setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
	LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
	lmp.setMergeFactor(3);
	lmp.setUseCompoundFile(compound);
	for (int i = 0; i < ndocs; i++) {
	iw.addDocument(newDoc());
	}
	iw.close();
	}

	// create the next document
	private Document newDoc() {
	Document d = new Document();
	float boost = nextNorm();
	for (int i = 0; i < 10; i++) {
	Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
	f.setBoost(boost);
	d.add(f);
	}
	return d;
	}

	// return unique norm values that are unchanged by encoding/decoding
	private float nextNorm() {
	float norm = lastNorm + normDelta;
	do {
	float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
	if (norm1 > lastNorm) {
	//System.out.println(norm1+" > "+lastNorm);
	norm = norm1;
	break;
	}
	norm += normDelta;
	} while (true);
	norms.add(numDocNorms, Float.valueOf(norm));
	modifiedNorms.add(numDocNorms, Float.valueOf(norm));
	//System.out.println("creating norm("+numDocNorms+"): "+norm);
	numDocNorms ++;
	lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
	return norm;
	}

	class CustomNormEncodingSimilarity extends DefaultSimilarity {
	@Override
	public byte encodeNormValue(float f) {
	return (byte) f;
	}

	@Override
	public float decodeNormValue(byte b) {
	return (float) b;
	}

	@Override
	public float computeNorm(String field, FieldInvertState state) {
	return (float) state.getLength();
	}
	}

	// LUCENE-1260
	public void testCustomEncoder() throws Exception {
	Directory dir = newDirectory();
	IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
	config.setSimilarity(new CustomNormEncodingSimilarity());
	RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
	Document doc = new Document();
	Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
	Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
	doc.add(foo);
	doc.add(bar);

	for (int i = 0; i < 100; i++) {
	bar.setValue("singleton");
	writer.addDocument(doc);
	}

	IndexReader reader = writer.getReader();
	writer.close();

	byte fooNorms[] = reader.norms("foo");
	for (int i = 0; i < reader.maxDoc(); i++)
	assertEquals(0, fooNorms[i]);

	byte barNorms[] = reader.norms("bar");
	for (int i = 0; i < reader.maxDoc(); i++)
	assertEquals(1, barNorms[i]);

	reader.close();
	dir.close();
	}
	}