docs/attachments/LUCENE-3188/LUCENE-3188.patch - lucene-jira-archive - Git at Google

 Index: lucene/contrib/CHANGES.txt
 ===================================================================
 --- lucene/contrib/CHANGES.txt	(revision 1134725)
 +++ lucene/contrib/CHANGES.txt	(working copy)
 @@ -31,10 +31,14 @@

  Bug Fixes

 -  * LUCENE-3185: Fix bug in NRTCachingDirectory.deleteFile that would
 -    always throw exception and sometimes fail to actually delete the
 -    file.  (Mike McCandless)
 + * LUCENE-3185: Fix bug in NRTCachingDirectory.deleteFile that would
 +   always throw exception and sometimes fail to actually delete the
 +   file.  (Mike McCandless)

 + * LUCENE-3188: contrib/misc IndexSplitter creates indexes with incorrect
 +   SegmentInfos.counter; added CheckIndex check & fix for this problem.
 +   (Ivan Dimitrov Vasilev via Steve Rowe)
 +
  Build

   * LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.
 Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
 ===================================================================
 --- lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java	(revision 1134725)
 +++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java	(working copy)
 @@ -20,6 +20,7 @@

  import org.apache.lucene.analysis.MockAnalyzer;
  import org.apache.lucene.document.Document;
 +import org.apache.lucene.document.Field;
  import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.util.LuceneTestCase;
 @@ -92,4 +93,64 @@
      r.close();
      fsDir.close();
    }
 +
 +  public void testDeleteThenOptimize() throws Exception {
 +    // Create directories where the indexes will reside
 +    File indexPath = new File(TEMP_DIR, "testfilesplitter");
 +    _TestUtil.rmDir(indexPath);
 +    indexPath.mkdirs();
 +    File indexSplitPath = new File(TEMP_DIR, "testfilesplitterdest");
 +    _TestUtil.rmDir(indexSplitPath);
 +    indexSplitPath.mkdirs();
 +
 +    // Create the original index
 +    LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
 +    mergePolicy.setNoCFSRatio(1);
 +    IndexWriterConfig iwConfig
 +        = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
 +              .setOpenMode(OpenMode.CREATE)
 +              .setMergePolicy(mergePolicy);
 +    Directory fsDir = newFSDirectory(indexPath);
 +    IndexWriter indexWriter = new IndexWriter(fsDir, iwConfig);
 +    Document doc = new Document();
 +    doc.add(new Field("content", "doc 1", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
 +    indexWriter.addDocument(doc);
 +    doc = new Document();
 +    doc.add(new Field("content", "doc 2", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
 +    indexWriter.addDocument(doc);
 +    indexWriter.close();
 +    fsDir.close();
 +
 +    // Create the split index
 +    IndexSplitter indexSplitter = new IndexSplitter(indexPath);
 +    String splitSegName = indexSplitter.infos.info(0).name;
 +    indexSplitter.split(indexSplitPath, new String[] {splitSegName});
 +
 +    // Delete the first document in the split index
 +    Directory fsDirDest = newFSDirectory(indexSplitPath);
 +    IndexReader indexReader = IndexReader.open(fsDirDest, false);
 +    indexReader.deleteDocument(0);
 +    assertEquals(1, indexReader.numDocs());
 +    indexReader.close();
 +    fsDirDest.close();
 +
 +    // Optimize the split index
 +    mergePolicy = new LogByteSizeMergePolicy();
 +    mergePolicy.setNoCFSRatio(1);
 +    iwConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
 +                   .setOpenMode(OpenMode.APPEND)
 +                   .setMergePolicy(mergePolicy);
 +    fsDirDest = newFSDirectory(indexSplitPath);
 +    indexWriter = new IndexWriter(fsDirDest, iwConfig);
 +    indexWriter.optimize();
 +    indexWriter.close();
 +    fsDirDest.close();
 +
 +    // Read the number of docs in the index
 +    fsDirDest = newFSDirectory(indexSplitPath);
 +    indexReader = IndexReader.open(fsDirDest);
 +	  assertEquals(1, indexReader.numDocs());
 +    indexReader.close();
 +    fsDirDest.close();
 +  }
  }
 Index: lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
 ===================================================================
 --- lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java	(revision 1134725)
 +++ lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java	(working copy)
 @@ -139,6 +139,7 @@
      destDir.mkdirs();
      FSDirectory destFSDir = FSDirectory.open(destDir);
      SegmentInfos destInfos = new SegmentInfos();
 +    destInfos.counter = infos.counter;
      for (String n : segs) {
        SegmentInfo info = getInfo(n);
        destInfos.add(info);
 Index: lucene/src/java/org/apache/lucene/index/CheckIndex.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/index/CheckIndex.java	(revision 1134725)
 +++ lucene/src/java/org/apache/lucene/index/CheckIndex.java	(working copy)
 @@ -111,6 +111,12 @@
       * argument). */
      public boolean partial;

 +    /** The greatest segment name. */
 +    public int maxSegmentName;
 +
 +    /** Whether the SegmentInfos.counter is greater than any of the segments' names. */
 +    public boolean validCounter;
 +
      /** Holds the userData of the last commit in the index */
      public Map<String, String> userData;

 @@ -424,6 +430,10 @@

      for(int i=0;i<numSegments;i++) {
        final SegmentInfo info = sis.info(i);
 +      int segmentName = Integer.parseInt(info.name.substring(1), Character.MAX_RADIX);
 +      if (segmentName > result.maxSegmentName) {
 +        result.maxSegmentName = segmentName;
 +      }
        if (onlySegments != null && !onlySegments.contains(info.name))
          continue;
        Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
 @@ -555,10 +565,19 @@

      if (0 == result.numBadSegments) {
        result.clean = true;
 -      msg("No problems were detected with this index.\n");
      } else
        msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");

 +    if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) {
 +      result.clean = false;
 +      result.newSegments.counter = result.maxSegmentName + 1;
 +      msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
 +    }
 +
 +    if (result.clean) {
 +      msg("No problems were detected with this index.\n");
 +    }
 +
      return result;
    }
	Index: lucene/contrib/CHANGES.txt
	===================================================================
	--- lucene/contrib/CHANGES.txt (revision 1134725)
	+++ lucene/contrib/CHANGES.txt (working copy)
	@@ -31,10 +31,14 @@

	Bug Fixes

	- * LUCENE-3185: Fix bug in NRTCachingDirectory.deleteFile that would
	- always throw exception and sometimes fail to actually delete the
	- file. (Mike McCandless)
	+ * LUCENE-3185: Fix bug in NRTCachingDirectory.deleteFile that would
	+ always throw exception and sometimes fail to actually delete the
	+ file. (Mike McCandless)

	+ * LUCENE-3188: contrib/misc IndexSplitter creates indexes with incorrect
	+ SegmentInfos.counter; added CheckIndex check & fix for this problem.
	+ (Ivan Dimitrov Vasilev via Steve Rowe)
	+
	Build

	* LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.
	Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
	===================================================================
	--- lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java (revision 1134725)
	+++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java (working copy)
	@@ -20,6 +20,7 @@

	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.document.Document;
	+import org.apache.lucene.document.Field;
	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.LuceneTestCase;
	@@ -92,4 +93,64 @@
	r.close();
	fsDir.close();
	}
	+
	+ public void testDeleteThenOptimize() throws Exception {
	+ // Create directories where the indexes will reside
	+ File indexPath = new File(TEMP_DIR, "testfilesplitter");
	+ _TestUtil.rmDir(indexPath);
	+ indexPath.mkdirs();
	+ File indexSplitPath = new File(TEMP_DIR, "testfilesplitterdest");
	+ _TestUtil.rmDir(indexSplitPath);
	+ indexSplitPath.mkdirs();
	+
	+ // Create the original index
	+ LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
	+ mergePolicy.setNoCFSRatio(1);
	+ IndexWriterConfig iwConfig
	+ = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
	+ .setOpenMode(OpenMode.CREATE)
	+ .setMergePolicy(mergePolicy);
	+ Directory fsDir = newFSDirectory(indexPath);
	+ IndexWriter indexWriter = new IndexWriter(fsDir, iwConfig);
	+ Document doc = new Document();
	+ doc.add(new Field("content", "doc 1", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
	+ indexWriter.addDocument(doc);
	+ doc = new Document();
	+ doc.add(new Field("content", "doc 2", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
	+ indexWriter.addDocument(doc);
	+ indexWriter.close();
	+ fsDir.close();
	+
	+ // Create the split index
	+ IndexSplitter indexSplitter = new IndexSplitter(indexPath);
	+ String splitSegName = indexSplitter.infos.info(0).name;
	+ indexSplitter.split(indexSplitPath, new String[] {splitSegName});
	+
	+ // Delete the first document in the split index
	+ Directory fsDirDest = newFSDirectory(indexSplitPath);
	+ IndexReader indexReader = IndexReader.open(fsDirDest, false);
	+ indexReader.deleteDocument(0);
	+ assertEquals(1, indexReader.numDocs());
	+ indexReader.close();
	+ fsDirDest.close();
	+
	+ // Optimize the split index
	+ mergePolicy = new LogByteSizeMergePolicy();
	+ mergePolicy.setNoCFSRatio(1);
	+ iwConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
	+ .setOpenMode(OpenMode.APPEND)
	+ .setMergePolicy(mergePolicy);
	+ fsDirDest = newFSDirectory(indexSplitPath);
	+ indexWriter = new IndexWriter(fsDirDest, iwConfig);
	+ indexWriter.optimize();
	+ indexWriter.close();
	+ fsDirDest.close();
	+
	+ // Read the number of docs in the index
	+ fsDirDest = newFSDirectory(indexSplitPath);
	+ indexReader = IndexReader.open(fsDirDest);
	+ assertEquals(1, indexReader.numDocs());
	+ indexReader.close();
	+ fsDirDest.close();
	+ }
	}
	Index: lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
	===================================================================
	--- lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java (revision 1134725)
	+++ lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java (working copy)
	@@ -139,6 +139,7 @@
	destDir.mkdirs();
	FSDirectory destFSDir = FSDirectory.open(destDir);
	SegmentInfos destInfos = new SegmentInfos();
	+ destInfos.counter = infos.counter;
	for (String n : segs) {
	SegmentInfo info = getInfo(n);
	destInfos.add(info);
	Index: lucene/src/java/org/apache/lucene/index/CheckIndex.java
	===================================================================
	--- lucene/src/java/org/apache/lucene/index/CheckIndex.java (revision 1134725)
	+++ lucene/src/java/org/apache/lucene/index/CheckIndex.java (working copy)
	@@ -111,6 +111,12 @@
	* argument). */
	public boolean partial;

	+ /** The greatest segment name. */
	+ public int maxSegmentName;
	+
	+ /** Whether the SegmentInfos.counter is greater than any of the segments' names. */
	+ public boolean validCounter;
	+
	/** Holds the userData of the last commit in the index */
	public Map<String, String> userData;

	@@ -424,6 +430,10 @@

	for(int i=0;i<numSegments;i++) {
	final SegmentInfo info = sis.info(i);
	+ int segmentName = Integer.parseInt(info.name.substring(1), Character.MAX_RADIX);
	+ if (segmentName > result.maxSegmentName) {
	+ result.maxSegmentName = segmentName;
	+ }
	if (onlySegments != null && !onlySegments.contains(info.name))
	continue;
	Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
	@@ -555,10 +565,19 @@

	if (0 == result.numBadSegments) {
	result.clean = true;
	- msg("No problems were detected with this index.\n");
	} else
	msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");

	+ if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) {
	+ result.clean = false;
	+ result.newSegments.counter = result.maxSegmentName + 1;
	+ msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
	+ }
	+
	+ if (result.clean) {
	+ msg("No problems were detected with this index.\n");
	+ }
	+
	return result;
	}