lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.TestUtil;

 import java.io.IOException;
 import java.util.Random;

 /**
  * Compares one codec against another
  */
 @Slow
 public class TestDuelingCodecs extends LuceneTestCase {
   Directory leftDir;
   IndexReader leftReader;
   Codec leftCodec;

   Directory rightDir;
   IndexReader rightReader;
   Codec rightCodec;
   RandomIndexWriter leftWriter;
   RandomIndexWriter rightWriter;
   long seed;
   String info;  // for debugging

   @Override
   public void setUp() throws Exception {
     super.setUp();

     // for now it's SimpleText vs Default(random postings format)
     // as this gives the best overall coverage. when we have more
     // codecs we should probably pick 2 from Codec.availableCodecs()

     leftCodec = Codec.forName("SimpleText");
     rightCodec = new RandomCodec(random());

     leftDir = newFSDirectory(createTempDir("leftDir"));
     rightDir = newFSDirectory(createTempDir("rightDir"));

     seed = random().nextLong();

     // must use same seed because of random payloads, etc
     int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH);
     MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
     leftAnalyzer.setMaxTokenLength(maxTermLength);
     MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
     rightAnalyzer.setMaxTokenLength(maxTermLength);

     // but these can be different
     // TODO: this turns this into a really big test of Multi*, is that what we want?
     IndexWriterConfig leftConfig = newIndexWriterConfig(leftAnalyzer);
     leftConfig.setCodec(leftCodec);
     // preserve docids
     leftConfig.setMergePolicy(newLogMergePolicy());

     IndexWriterConfig rightConfig = newIndexWriterConfig(rightAnalyzer);
     rightConfig.setCodec(rightCodec);
     // preserve docids
     rightConfig.setMergePolicy(newLogMergePolicy());

     // must use same seed because of random docvalues fields, etc
     leftWriter = new RandomIndexWriter(new Random(seed), leftDir, leftConfig);
     rightWriter = new RandomIndexWriter(new Random(seed), rightDir, rightConfig);

     info = "left: " + leftCodec.toString() + " / right: " + rightCodec.toString();
   }

   @Override
   public void tearDown() throws Exception {
     IOUtils.close(leftWriter,
                   rightWriter,
                   leftReader,
                   rightReader,
                   leftDir,
                   rightDir);
     super.tearDown();
   }

   /**
    * populates a writer with random stuff. this must be fully reproducable with the seed!
    */
   public static void createRandomIndex(int numdocs, RandomIndexWriter writer, long seed) throws IOException {
     Random random = new Random(seed);
     // primary source for our data is from linefiledocs, it's realistic.
     LineFileDocs lineFileDocs = new LineFileDocs(random);

     // TODO: we should add other fields that use things like docs&freqs but omit positions,
     // because linefiledocs doesn't cover all the possibilities.
     for (int i = 0; i < numdocs; i++) {
       Document document = lineFileDocs.nextDoc();
       // grab the title and add some SortedSet instances for fun
       String title = document.get("titleTokenized");
       String split[] = title.split("\\s+");
       document.removeFields("sortedset");
       for (String trash : split) {
         document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
       }
       // add a numeric dv field sometimes
       document.removeFields("sparsenumeric");
       if (random.nextInt(4) == 2) {
         document.add(new NumericDocValuesField("sparsenumeric", random.nextInt()));
       }
       // add sortednumeric sometimes
       document.removeFields("sparsesortednum");
       if (random.nextInt(5) == 1) {
         document.add(new SortedNumericDocValuesField("sparsesortednum", random.nextLong()));
         if (random.nextBoolean()) {
           document.add(new SortedNumericDocValuesField("sparsesortednum", random.nextLong()));
         }
       }
       writer.addDocument(document);
     }

     lineFileDocs.close();
   }

   /**
    * checks the two indexes are equivalent
    */
   // we use a small amount of docs here, so it works with any codec
   public void testEquals() throws IOException {
     int numdocs = atLeast(20);
     createRandomIndex(numdocs, leftWriter, seed);
     createRandomIndex(numdocs, rightWriter, seed);

     leftReader = leftWriter.getReader();
     rightReader = rightWriter.getReader();

     assertReaderEquals(info, leftReader, rightReader);
   }

   public void testCrazyReaderEquals() throws IOException {
     int numdocs = atLeast(20);
     createRandomIndex(numdocs, leftWriter, seed);
     createRandomIndex(numdocs, rightWriter, seed);

     leftReader = wrapReader(leftWriter.getReader());
     rightReader = wrapReader(rightWriter.getReader());

     // check that our readers are valid
     TestUtil.checkReader(leftReader);
     TestUtil.checkReader(rightReader);

     assertReaderEquals(info, leftReader, rightReader);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.codecs.Codec;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.NumericDocValuesField;
	import org.apache.lucene.document.SortedNumericDocValuesField;
	import org.apache.lucene.document.SortedSetDocValuesField;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.IOUtils;
	import org.apache.lucene.util.LineFileDocs;
	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util.LuceneTestCase.Slow;
	import org.apache.lucene.util.TestUtil;

	import java.io.IOException;
	import java.util.Random;

	/**
	* Compares one codec against another
	*/
	@Slow
	public class TestDuelingCodecs extends LuceneTestCase {
	Directory leftDir;
	IndexReader leftReader;
	Codec leftCodec;

	Directory rightDir;
	IndexReader rightReader;
	Codec rightCodec;
	RandomIndexWriter leftWriter;
	RandomIndexWriter rightWriter;
	long seed;
	String info; // for debugging

	@Override
	public void setUp() throws Exception {
	super.setUp();

	// for now it's SimpleText vs Default(random postings format)
	// as this gives the best overall coverage. when we have more
	// codecs we should probably pick 2 from Codec.availableCodecs()

	leftCodec = Codec.forName("SimpleText");
	rightCodec = new RandomCodec(random());

	leftDir = newFSDirectory(createTempDir("leftDir"));
	rightDir = newFSDirectory(createTempDir("rightDir"));

	seed = random().nextLong();

	// must use same seed because of random payloads, etc
	int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH);
	MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
	leftAnalyzer.setMaxTokenLength(maxTermLength);
	MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
	rightAnalyzer.setMaxTokenLength(maxTermLength);

	// but these can be different
	// TODO: this turns this into a really big test of Multi*, is that what we want?
	IndexWriterConfig leftConfig = newIndexWriterConfig(leftAnalyzer);
	leftConfig.setCodec(leftCodec);
	// preserve docids
	leftConfig.setMergePolicy(newLogMergePolicy());

	IndexWriterConfig rightConfig = newIndexWriterConfig(rightAnalyzer);
	rightConfig.setCodec(rightCodec);
	// preserve docids
	rightConfig.setMergePolicy(newLogMergePolicy());

	// must use same seed because of random docvalues fields, etc
	leftWriter = new RandomIndexWriter(new Random(seed), leftDir, leftConfig);
	rightWriter = new RandomIndexWriter(new Random(seed), rightDir, rightConfig);

	info = "left: " + leftCodec.toString() + " / right: " + rightCodec.toString();
	}

	@Override
	public void tearDown() throws Exception {
	IOUtils.close(leftWriter,
	rightWriter,
	leftReader,
	rightReader,
	leftDir,
	rightDir);
	super.tearDown();
	}

	/**
	* populates a writer with random stuff. this must be fully reproducable with the seed!
	*/
	public static void createRandomIndex(int numdocs, RandomIndexWriter writer, long seed) throws IOException {
	Random random = new Random(seed);
	// primary source for our data is from linefiledocs, it's realistic.
	LineFileDocs lineFileDocs = new LineFileDocs(random);

	// TODO: we should add other fields that use things like docs&freqs but omit positions,
	// because linefiledocs doesn't cover all the possibilities.
	for (int i = 0; i < numdocs; i++) {
	Document document = lineFileDocs.nextDoc();
	// grab the title and add some SortedSet instances for fun
	String title = document.get("titleTokenized");
	String split[] = title.split("\\s+");
	document.removeFields("sortedset");
	for (String trash : split) {
	document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
	}
	// add a numeric dv field sometimes
	document.removeFields("sparsenumeric");
	if (random.nextInt(4) == 2) {
	document.add(new NumericDocValuesField("sparsenumeric", random.nextInt()));
	}
	// add sortednumeric sometimes
	document.removeFields("sparsesortednum");
	if (random.nextInt(5) == 1) {
	document.add(new SortedNumericDocValuesField("sparsesortednum", random.nextLong()));
	if (random.nextBoolean()) {
	document.add(new SortedNumericDocValuesField("sparsesortednum", random.nextLong()));
	}
	}
	writer.addDocument(document);
	}

	lineFileDocs.close();
	}

	/**
	* checks the two indexes are equivalent
	*/
	// we use a small amount of docs here, so it works with any codec
	public void testEquals() throws IOException {
	int numdocs = atLeast(20);
	createRandomIndex(numdocs, leftWriter, seed);
	createRandomIndex(numdocs, rightWriter, seed);

	leftReader = leftWriter.getReader();
	rightReader = rightWriter.getReader();

	assertReaderEquals(info, leftReader, rightReader);
	}

	public void testCrazyReaderEquals() throws IOException {
	int numdocs = atLeast(20);
	createRandomIndex(numdocs, leftWriter, seed);
	createRandomIndex(numdocs, rightWriter, seed);

	leftReader = wrapReader(leftWriter.getReader());
	rightReader = wrapReader(rightWriter.getReader());

	// check that our readers are valid
	TestUtil.checkReader(leftReader);
	TestUtil.checkReader(rightReader);

	assertReaderEquals(info, leftReader, rightReader);
	}
	}