lucene/src/java/org/apache/lucene/index/CheckIndex.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.document.AbstractField;  // for javadocs
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;

 import java.text.NumberFormat;
 import java.io.PrintStream;
 import java.io.IOException;
 import java.io.File;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Map;

 /**
  * Basic tool and API to check the health of an index and
  * write a new segments file that removes reference to
  * problematic segments.
  *
  * <p>As this tool checks every byte in the index, on a large
  * index it can take quite a long time to run.
  *
  * @lucene.experimental Please make a complete backup of your
  * index before using this to fix your index!
  */
 public class CheckIndex {

   private PrintStream infoStream;
   private Directory dir;

   /**
    * Returned from {@link #checkIndex()} detailing the health and status of the index.
    *
    * @lucene.experimental
    **/

   public static class Status {

     /** True if no problems were found with the index. */
     public boolean clean;

     /** True if we were unable to locate and load the segments_N file. */
     public boolean missingSegments;

     /** True if we were unable to open the segments_N file. */
     public boolean cantOpenSegments;

     /** True if we were unable to read the version number from segments_N file. */
     public boolean missingSegmentVersion;

     /** Name of latest segments_N file in the index. */
     public String segmentsFileName;

     /** Number of segments in the index. */
     public int numSegments;

     /** String description of the version of the index. */
     public String segmentFormat;

     /** Empty unless you passed specific segments list to check as optional 3rd argument.
      *  @see CheckIndex#checkIndex(List) */
     public List<String> segmentsChecked = new ArrayList<String>();

     /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
     public boolean toolOutOfDate;

     /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
     public List<SegmentInfoStatus> segmentInfos = new ArrayList<SegmentInfoStatus>();

     /** Directory index is in. */
     public Directory dir;

     /**
      * SegmentInfos instance containing only segments that
      * had no problems (this is used with the {@link CheckIndex#fixIndex}
      * method to repair the index.
      */
     SegmentInfos newSegments;

     /** How many documents will be lost to bad segments. */
     public int totLoseDocCount;

     /** How many bad segments were found. */
     public int numBadSegments;

     /** True if we checked only specific segments ({@link
      * #checkIndex(List)}) was called with non-null
      * argument). */
     public boolean partial;

     /** Holds the userData of the last commit in the index */
     public Map<String, String> userData;

     /** Holds the status of each segment in the index.
      *  See {@link #segmentInfos}.
      *
      * <p><b>WARNING</b>: this API is new and experimental and is
      * subject to suddenly change in the next release.
      */
     public static class SegmentInfoStatus {
       /** Name of the segment. */
       public String name;

       /** CodecInfo used to read this segment. */
       public SegmentCodecs codec;

       /** Document count (does not take deletions into account). */
       public int docCount;

       /** True if segment is compound file format. */
       public boolean compound;

       /** Number of files referenced by this segment. */
       public int numFiles;

       /** Net size (MB) of the files referenced by this
        *  segment. */
       public double sizeMB;

       /** Doc store offset, if this segment shares the doc
        *  store files (stored fields and term vectors) with
        *  other segments.  This is -1 if it does not share. */
       public int docStoreOffset = -1;

       /** String of the shared doc store segment, or null if
        *  this segment does not share the doc store files. */
       public String docStoreSegment;

       /** True if the shared doc store files are compound file
        *  format. */
       public boolean docStoreCompoundFile;

       /** True if this segment has pending deletions. */
       public boolean hasDeletions;

       /** Name of the current deletions file name. */
       public String deletionsFileName;

       /** Number of deleted documents. */
       public int numDeleted;

       /** True if we were able to open a SegmentReader on this
        *  segment. */
       public boolean openReaderPassed;

       /** Number of fields in this segment. */
       int numFields;

       /** True if at least one of the fields in this segment
        *  does not omitTermFreqAndPositions.
        *  @see AbstractField#setOmitTermFreqAndPositions */
       public boolean hasProx;

       /** Map that includes certain
        *  debugging details that IndexWriter records into
        *  each segment it creates */
       public Map<String,String> diagnostics;

       /** Status for testing of field norms (null if field norms could not be tested). */
       public FieldNormStatus fieldNormStatus;

       /** Status for testing of indexed terms (null if indexed terms could not be tested). */
       public TermIndexStatus termIndexStatus;

       /** Status for testing of stored fields (null if stored fields could not be tested). */
       public StoredFieldStatus storedFieldStatus;

       /** Status for testing of term vectors (null if term vectors could not be tested). */
       public TermVectorStatus termVectorStatus;
     }

     /**
      * Status from testing field norms.
      */
     public static final class FieldNormStatus {
       /** Number of fields successfully tested */
       public long totFields = 0L;

       /** Exception thrown during term index test (null on success) */
       public Throwable error = null;
     }

     /**
      * Status from testing term index.
      */
     public static final class TermIndexStatus {
       /** Total term count */
       public long termCount = 0L;

       /** Total frequency across all terms. */
       public long totFreq = 0L;

       /** Total number of positions. */
       public long totPos = 0L;

       /** Exception thrown during term index test (null on success) */
       public Throwable error = null;
     }

     /**
      * Status from testing stored fields.
      */
     public static final class StoredFieldStatus {

       /** Number of documents tested. */
       public int docCount = 0;

       /** Total number of stored fields tested. */
       public long totFields = 0;

       /** Exception thrown during stored fields test (null on success) */
       public Throwable error = null;
     }

     /**
      * Status from testing stored fields.
      */
     public static final class TermVectorStatus {

       /** Number of documents tested. */
       public int docCount = 0;

       /** Total number of term vectors tested. */
       public long totVectors = 0;

       /** Exception thrown during term vector test (null on success) */
       public Throwable error = null;
     }
   }

   /** Create a new CheckIndex on the directory. */
   public CheckIndex(Directory dir) {
     this.dir = dir;
     infoStream = null;
   }

   /** Set infoStream where messages should go.  If null, no
    *  messages are printed */
   public void setInfoStream(PrintStream out) {
     infoStream = out;
   }

   private void msg(String msg) {
     if (infoStream != null)
       infoStream.println(msg);
   }

   /** Returns a {@link Status} instance detailing
    *  the state of the index.
    *
    *  <p>As this method checks every byte in the index, on a large
    *  index it can take quite a long time to run.
    *
    *  <p><b>WARNING</b>: make sure
    *  you only call this when the index is not opened by any
    *  writer. */
   public Status checkIndex() throws IOException {
     return checkIndex(null);
   }

   public Status checkIndex(List<String> onlySegments) throws IOException {
     return checkIndex(onlySegments, CodecProvider.getDefault());
   }

   /** Returns a {@link Status} instance detailing
    *  the state of the index.
    *
    *  @param onlySegments list of specific segment names to check
    *
    *  <p>As this method checks every byte in the specified
    *  segments, on a large index it can take quite a long
    *  time to run.
    *
    *  <p><b>WARNING</b>: make sure
    *  you only call this when the index is not opened by any
    *  writer. */
   public Status checkIndex(List<String> onlySegments, CodecProvider codecs) throws IOException {
     NumberFormat nf = NumberFormat.getInstance();
     SegmentInfos sis = new SegmentInfos(codecs);
     Status result = new Status();
     result.dir = dir;
     try {
       sis.read(dir, codecs);
     } catch (Throwable t) {
       msg("ERROR: could not read any segments file in directory");
       result.missingSegments = true;
       if (infoStream != null)
         t.printStackTrace(infoStream);
       return result;
     }

     final int numSegments = sis.size();
     final String segmentsFileName = sis.getCurrentSegmentFileName();
     IndexInput input = null;
     try {
       input = dir.openInput(segmentsFileName);
     } catch (Throwable t) {
       msg("ERROR: could not open segments file in directory");
       if (infoStream != null)
         t.printStackTrace(infoStream);
       result.cantOpenSegments = true;
       return result;
     }
     int format = 0;
     try {
       format = input.readInt();
     } catch (Throwable t) {
       msg("ERROR: could not read segment file version in directory");
       if (infoStream != null)
         t.printStackTrace(infoStream);
       result.missingSegmentVersion = true;
       return result;
     } finally {
       if (input != null)
         input.close();
     }

     String sFormat = "";
     boolean skip = false;

     if (format == DefaultSegmentInfosWriter.FORMAT_DIAGNOSTICS) {
       sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
     } else if (format == DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
       sFormat = "FORMAT_HAS_VECTORS [Lucene 3.1]";
     } else if (format == DefaultSegmentInfosWriter.FORMAT_3_1) {
       sFormat = "FORMAT_3_1 [Lucene 3.1]";
     } else if (format == DefaultSegmentInfosWriter.FORMAT_4_0) {
       sFormat = "FORMAT_4_0 [Lucene 4.0]";
     } else if (format == DefaultSegmentInfosWriter.FORMAT_CURRENT) {
       throw new RuntimeException("BUG: You should update this tool!");
     } else if (format < DefaultSegmentInfosWriter.FORMAT_CURRENT) {
       sFormat = "int=" + format + " [newer version of Lucene than this tool supports]";
       skip = true;
     } else if (format > DefaultSegmentInfosWriter.FORMAT_MINIMUM) {
       sFormat = "int=" + format + " [older version of Lucene than this tool supports]";
       skip = true;
     }

     result.segmentsFileName = segmentsFileName;
     result.numSegments = numSegments;
     result.segmentFormat = sFormat;
     result.userData = sis.getUserData();
     String userDataString;
     if (sis.getUserData().size() > 0) {
       userDataString = " userData=" + sis.getUserData();
     } else {
       userDataString = "";
     }

     msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);

     if (onlySegments != null) {
       result.partial = true;
       if (infoStream != null)
         infoStream.print("\nChecking only these segments:");
       for (String s : onlySegments) {
         if (infoStream != null)
           infoStream.print(" " + s);
       }
       result.segmentsChecked.addAll(onlySegments);
       msg(":");
     }

     if (skip) {
       msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
       result.toolOutOfDate = true;
       return result;
     }


     result.newSegments = (SegmentInfos) sis.clone();
     result.newSegments.clear();

     for(int i=0;i<numSegments;i++) {
       final SegmentInfo info = sis.info(i);
       if (onlySegments != null && !onlySegments.contains(info.name))
         continue;
       Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
       result.segmentInfos.add(segInfoStat);
       msg("  " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
       segInfoStat.name = info.name;
       segInfoStat.docCount = info.docCount;

       int toLoseDocCount = info.docCount;

       SegmentReader reader = null;

       try {
         final SegmentCodecs codec = info.getSegmentCodecs();
         msg("    codec=" + codec);
         segInfoStat.codec = codec;
         msg("    compound=" + info.getUseCompoundFile());
         segInfoStat.compound = info.getUseCompoundFile();
         msg("    hasProx=" + info.getHasProx());
         segInfoStat.hasProx = info.getHasProx();
         msg("    numFiles=" + info.files().size());
         segInfoStat.numFiles = info.files().size();
         segInfoStat.sizeMB = info.sizeInBytes(true)/(1024.*1024.);
         msg("    size (MB)=" + nf.format(segInfoStat.sizeMB));
         Map<String,String> diagnostics = info.getDiagnostics();
         segInfoStat.diagnostics = diagnostics;
         if (diagnostics.size() > 0) {
           msg("    diagnostics = " + diagnostics);
         }

         final int docStoreOffset = info.getDocStoreOffset();
         if (docStoreOffset != -1) {
           msg("    docStoreOffset=" + docStoreOffset);
           segInfoStat.docStoreOffset = docStoreOffset;
           msg("    docStoreSegment=" + info.getDocStoreSegment());
           segInfoStat.docStoreSegment = info.getDocStoreSegment();
           msg("    docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
           segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
         }

         final String delFileName = info.getDelFileName();
         if (delFileName == null){
           msg("    no deletions");
           segInfoStat.hasDeletions = false;
         }
         else{
           msg("    has deletions [delFileName=" + delFileName + "]");
           segInfoStat.hasDeletions = true;
           segInfoStat.deletionsFileName = delFileName;
         }
         if (infoStream != null)
           infoStream.print("    test: open reader.........");
         reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

         segInfoStat.openReaderPassed = true;

         final int numDocs = reader.numDocs();
         toLoseDocCount = numDocs;
         if (reader.hasDeletions()) {
           if (reader.deletedDocs.count() != info.getDelCount()) {
             throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
           }
           if (reader.deletedDocs.count() > reader.maxDoc()) {
             throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
           }
           if (info.docCount - numDocs != info.getDelCount()){
             throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
           }
           segInfoStat.numDeleted = info.docCount - numDocs;
           msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
         } else {
           if (info.getDelCount() != 0) {
             throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
           }
           msg("OK");
         }
         if (reader.maxDoc() != info.docCount)
           throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);

         // Test getFieldNames()
         if (infoStream != null) {
           infoStream.print("    test: fields..............");
         }
         Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
         msg("OK [" + fieldNames.size() + " fields]");
         segInfoStat.numFields = fieldNames.size();

         // Test Field Norms
         segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);

         // Test the Term Index
         segInfoStat.termIndexStatus = testTermIndex(reader);

         // Test Stored Fields
         segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);

         // Test Term Vectors
         segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);

         // Rethrow the first exception we encountered
         //  This will cause stats for failed segments to be incremented properly
         if (segInfoStat.fieldNormStatus.error != null) {
           throw new RuntimeException("Field Norm test failed");
         } else if (segInfoStat.termIndexStatus.error != null) {
           throw new RuntimeException("Term Index test failed");
         } else if (segInfoStat.storedFieldStatus.error != null) {
           throw new RuntimeException("Stored Field test failed");
         } else if (segInfoStat.termVectorStatus.error != null) {
           throw new RuntimeException("Term Vector test failed");
         }

         msg("");

       } catch (Throwable t) {
         msg("FAILED");
         String comment;
         comment = "fixIndex() would remove reference to this segment";
         msg("    WARNING: " + comment + "; full exception:");
         if (infoStream != null)
           t.printStackTrace(infoStream);
         msg("");
         result.totLoseDocCount += toLoseDocCount;
         result.numBadSegments++;
         continue;
       } finally {
         if (reader != null)
           reader.close();
       }

       // Keeper
       result.newSegments.add((SegmentInfo) info.clone());
     }

     if (0 == result.numBadSegments) {
       result.clean = true;
       msg("No problems were detected with this index.\n");
     } else
       msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");

     return result;
   }

   /**
    * Test field norms.
    */
   private Status.FieldNormStatus testFieldNorms(Collection<String> fieldNames, SegmentReader reader) {
     final Status.FieldNormStatus status = new Status.FieldNormStatus();

     try {
       // Test Field Norms
       if (infoStream != null) {
         infoStream.print("    test: field norms.........");
       }
       byte[] b;
       for (final String fieldName : fieldNames) {
         if (reader.hasNorms(fieldName)) {
           b = reader.norms(fieldName);
           ++status.totFields;
         }
       }

       msg("OK [" + status.totFields + " fields]");
     } catch (Throwable e) {
       msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
       status.error = e;
       if (infoStream != null) {
         e.printStackTrace(infoStream);
       }
     }

     return status;
   }

   /**
    * Test the term index.
    */
   private Status.TermIndexStatus testTermIndex(SegmentReader reader) {
     final Status.TermIndexStatus status = new Status.TermIndexStatus();

     final int maxDoc = reader.maxDoc();
     final Bits delDocs = reader.getDeletedDocs();

     final IndexSearcher is = new IndexSearcher(reader);

     try {

       if (infoStream != null) {
         infoStream.print("    test: terms, freq, prox...");
       }

       final Fields fields = reader.fields();
       if (fields == null) {
         msg("OK [no fields/terms]");
         return status;
       }

       DocsEnum docs = null;
       DocsAndPositionsEnum postings = null;

       final FieldsEnum fieldsEnum = fields.iterator();
       while(true) {
         final String field = fieldsEnum.next();
         if (field == null) {
           break;
         }

         final TermsEnum terms = fieldsEnum.terms();
         assert terms != null;
         boolean hasOrd = true;
         final long termCountStart = status.termCount;

         BytesRef lastTerm = null;

         Comparator<BytesRef> termComp = terms.getComparator();

         long sumTotalTermFreq = 0;

         while(true) {

           final BytesRef term = terms.next();
           if (term == null) {
             break;
           }

           // make sure terms arrive in order according to
           // the comp
           if (lastTerm == null) {
             lastTerm = new BytesRef(term);
           } else {
             if (termComp.compare(lastTerm, term) >= 0) {
               throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
             }
             lastTerm.copy(term);
           }

           final int docFreq = terms.docFreq();
           status.totFreq += docFreq;

           docs = terms.docs(delDocs, docs);
           postings = terms.docsAndPositions(delDocs, postings);

           if (hasOrd) {
             long ord = -1;
             try {
               ord = terms.ord();
             } catch (UnsupportedOperationException uoe) {
               hasOrd = false;
             }

             if (hasOrd) {
               final long ordExpected = status.termCount - termCountStart;
               if (ord != ordExpected) {
                 throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected);
               }
             }
           }

           status.termCount++;

           final DocsEnum docs2;
           final boolean hasPositions;
           if (postings != null) {
             docs2 = postings;
             hasPositions = true;
           } else {
             docs2 = docs;
             hasPositions = false;
           }

           int lastDoc = -1;
           int docCount = 0;
           long totalTermFreq = 0;
           while(true) {
             final int doc = docs2.nextDoc();
             if (doc == DocIdSetIterator.NO_MORE_DOCS) {
               break;
             }
             final int freq = docs2.freq();
             status.totPos += freq;
             totalTermFreq += freq;
             docCount++;

             if (doc <= lastDoc) {
               throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
             }
             if (doc >= maxDoc) {
               throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
             }

             lastDoc = doc;
             if (freq <= 0) {
               throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
             }

             int lastPos = -1;
             if (postings != null) {
               for(int j=0;j<freq;j++) {
                 final int pos = postings.nextPosition();
                 if (pos < -1) {
                   throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                 }
                 if (pos < lastPos) {
                   throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                 }
                 lastPos = pos;
                 if (postings.hasPayload()) {
                   postings.getPayload();
                 }
               }
             }
           }

           final long totalTermFreq2 = terms.totalTermFreq();
           final boolean hasTotalTermFreq = postings != null && totalTermFreq2 != -1;

           // Re-count if there are deleted docs:
           if (reader.hasDeletions()) {
             final DocsEnum docsNoDel = terms.docs(null, docs);
             docCount = 0;
             totalTermFreq = 0;
             while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
               docCount++;
               totalTermFreq += docsNoDel.freq();
             }
           }

           if (docCount != docFreq) {
             throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
           }
           if (hasTotalTermFreq) {
             sumTotalTermFreq += totalTermFreq;
             if (totalTermFreq != totalTermFreq2) {
               throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
             }
           }

           // Test skipping
           if (docFreq >= 16) {
             if (hasPositions) {
               for(int idx=0;idx<7;idx++) {
                 final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
                 postings = terms.docsAndPositions(delDocs, postings);
                 final int docID = postings.advance(skipDocID);
                 if (docID == DocsEnum.NO_MORE_DOCS) {
                   break;
                 } else {
                   if (docID < skipDocID) {
                     throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
                   }
                   final int freq = postings.freq();
                   if (freq <= 0) {
                     throw new RuntimeException("termFreq " + freq + " is out of bounds");
                   }
                   int lastPosition = -1;
                   for(int posUpto=0;posUpto<freq;posUpto++) {
                     final int pos = postings.nextPosition();
                     if (pos < 0) {
                       throw new RuntimeException("position " + pos + " is out of bounds");
                     }
                     if (pos <= lastPosition) {
                       throw new RuntimeException("position " + pos + " is <= lastPosition " + lastPosition);
                     }
                     lastPosition = pos;
                   }

                   final int nextDocID = postings.nextDoc();
                   if (nextDocID == DocsEnum.NO_MORE_DOCS) {
                     break;
                   }
                   if (nextDocID <= docID) {
                     throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
                   }
                 }
               }
             } else {
               for(int idx=0;idx<7;idx++) {
                 final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
                 docs = terms.docs(delDocs, docs);
                 final int docID = docs.advance(skipDocID);
                 if (docID == DocsEnum.NO_MORE_DOCS) {
                   break;
                 } else {
                   if (docID < skipDocID) {
                     throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
                   }
                   final int nextDocID = docs.nextDoc();
                   if (nextDocID == DocsEnum.NO_MORE_DOCS) {
                     break;
                   }
                   if (nextDocID <= docID) {
                     throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
                   }
                 }
               }
             }
           }
         }

         if (sumTotalTermFreq != 0) {
           final long v = fields.terms(field).getSumTotalTermFreq();
           if (v != -1 && sumTotalTermFreq != v) {
             throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
           }
         }

         // Test seek to last term:
         if (lastTerm != null) {
           if (terms.seek(lastTerm) != TermsEnum.SeekStatus.FOUND) {
             throw new RuntimeException("seek to last term " + lastTerm + " failed");
           }

           is.search(new TermQuery(new Term(field, lastTerm)), 1);
         }

         // Test seeking by ord
         if (hasOrd && status.termCount-termCountStart > 0) {
           long termCount;
           try {
             termCount = fields.terms(field).getUniqueTermCount();
           } catch (UnsupportedOperationException uoe) {
             termCount = -1;
           }

           if (termCount != -1 && termCount != status.termCount - termCountStart) {
             throw new RuntimeException("termCount mismatch " + termCount + " vs " + (status.termCount - termCountStart));
           }

           int seekCount = (int) Math.min(10000L, termCount);
           if (seekCount > 0) {
             BytesRef[] seekTerms = new BytesRef[seekCount];

             // Seek by ord
             for(int i=seekCount-1;i>=0;i--) {
               long ord = i*(termCount/seekCount);
               terms.seek(ord);
               seekTerms[i] = new BytesRef(terms.term());
             }

             // Seek by term
             long totDocCount = 0;
             for(int i=seekCount-1;i>=0;i--) {
               if (terms.seek(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
                 throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
               }

               docs = terms.docs(delDocs, docs);
               if (docs == null) {
                 throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
               }

               while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                 totDocCount++;
               }
             }

             // TermQuery
             long totDocCount2 = 0;
             for(int i=0;i<seekCount;i++) {
               totDocCount2 += is.search(new TermQuery(new Term(field, seekTerms[i])), 1).totalHits;
             }

             if (totDocCount != totDocCount2) {
               throw new RuntimeException("search to seek terms produced wrong number of hits: " + totDocCount + " vs " + totDocCount2);
             }
           }
         }
       }

       msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");

     } catch (Throwable e) {
       msg("ERROR: " + e);
       status.error = e;
       if (infoStream != null) {
         e.printStackTrace(infoStream);
       }
     }

     return status;
   }

   /**
    * Test stored fields for a segment.
    */
   private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) {
     final Status.StoredFieldStatus status = new Status.StoredFieldStatus();

     try {
       if (infoStream != null) {
         infoStream.print("    test: stored fields.......");
       }

       // Scan stored fields for all documents
       final Bits delDocs = reader.getDeletedDocs();
       for (int j = 0; j < info.docCount; ++j) {
         if (delDocs == null || !delDocs.get(j)) {
           status.docCount++;
           Document doc = reader.document(j);
           status.totFields += doc.getFields().size();
         }
       }

       // Validate docCount
       if (status.docCount != reader.numDocs()) {
         throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
       }

       msg("OK [" + status.totFields + " total field count; avg " +
           format.format((((float) status.totFields)/status.docCount)) + " fields per doc]");
     } catch (Throwable e) {
       msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
       status.error = e;
       if (infoStream != null) {
         e.printStackTrace(infoStream);
       }
     }

     return status;
   }

   /**
    * Test term vectors for a segment.
    */
   private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
     final Status.TermVectorStatus status = new Status.TermVectorStatus();

     try {
       if (infoStream != null) {
         infoStream.print("    test: term vectors........");
       }

       final Bits delDocs = reader.getDeletedDocs();
       for (int j = 0; j < info.docCount; ++j) {
         if (delDocs == null || !delDocs.get(j)) {
           status.docCount++;
           TermFreqVector[] tfv = reader.getTermFreqVectors(j);
           if (tfv != null) {
             status.totVectors += tfv.length;
           }
         }
       }

       msg("OK [" + status.totVectors + " total vector count; avg " +
           format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
     } catch (Throwable e) {
       msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
       status.error = e;
       if (infoStream != null) {
         e.printStackTrace(infoStream);
       }
     }

     return status;
   }

   /** Repairs the index using previously returned result
    *  from {@link #checkIndex}.  Note that this does not
    *  remove any of the unreferenced files after it's done;
    *  you must separately open an {@link IndexWriter}, which
    *  deletes unreferenced files when it's created.
    *
    * <p><b>WARNING</b>: this writes a
    *  new segments file into the index, effectively removing
    *  all documents in broken segments from the index.
    *  BE CAREFUL.
    *
    * <p><b>WARNING</b>: Make sure you only call this when the
    *  index is not opened  by any writer. */
   public void fixIndex(Status result) throws IOException {
     if (result.partial)
       throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
     result.newSegments.changed();
     result.newSegments.commit(result.dir);
   }

   private static boolean assertsOn;

   private static boolean testAsserts() {
     assertsOn = true;
     return true;
   }

   private static boolean assertsOn() {
     assert testAsserts();
     return assertsOn;
   }

   /** Command-line interface to check and fix an index.

     <p>
     Run it like this:
     <pre>
     java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
     </pre>
     <ul>
     <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments

     <li><code>-segment X</code>: only check the specified
     segment(s).  This can be specified multiple times,
     to check more than one segment, eg <code>-segment _2
     -segment _a</code>.  You can't use this with the -fix
     option.
     </ul>

     <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
                        documents (perhaps many) to be permanently removed from the index.  Always make
                        a backup copy of your index before running this!  Do not run this tool on an index
                        that is actively being written to.  You have been warned!

     <p>                Run without -fix, this tool will open the index, report version information
                        and report any exceptions it hits and what action it would take if -fix were
                        specified.  With -fix, this tool will remove any segments that have issues and
                        write a new segments_N file.  This means all documents contained in the affected
                        segments will be removed.

     <p>
                        This tool exits with exit code 1 if the index cannot be opened or has any
                        corruption, else 0.
    */
   public static void main(String[] args) throws IOException, InterruptedException {

     boolean doFix = false;
     List<String> onlySegments = new ArrayList<String>();
     String indexPath = null;
     int i = 0;
     while(i < args.length) {
       if (args[i].equals("-fix")) {
         doFix = true;
         i++;
       } else if (args[i].equals("-segment")) {
         if (i == args.length-1) {
           System.out.println("ERROR: missing name for -segment option");
           System.exit(1);
         }
         onlySegments.add(args[i+1]);
         i += 2;
       } else {
         if (indexPath != null) {
           System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
           System.exit(1);
         }
         indexPath = args[i];
         i++;
       }
     }

     if (indexPath == null) {
       System.out.println("\nERROR: index path not specified");
       System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
                          "\n" +
                          "  -fix: actually write a new segments_N file, removing any problematic segments\n" +
                          "  -segment X: only check the specified segments.  This can be specified multiple\n" +
                          "              times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
                          "              You can't use this with the -fix option\n" +
                          "\n" +
                          "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
                          "documents (perhaps many) to be permanently removed from the index.  Always make\n" +
                          "a backup copy of your index before running this!  Do not run this tool on an index\n" +
                          "that is actively being written to.  You have been warned!\n" +
                          "\n" +
                          "Run without -fix, this tool will open the index, report version information\n" +
                          "and report any exceptions it hits and what action it would take if -fix were\n" +
                          "specified.  With -fix, this tool will remove any segments that have issues and\n" +
                          "write a new segments_N file.  This means all documents contained in the affected\n" +
                          "segments will be removed.\n" +
                          "\n" +
                          "This tool exits with exit code 1 if the index cannot be opened or has any\n" +
                          "corruption, else 0.\n");
       System.exit(1);
     }

     if (!assertsOn())
       System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");

     if (onlySegments.size() == 0)
       onlySegments = null;
     else if (doFix) {
       System.out.println("ERROR: cannot specify both -fix and -segment");
       System.exit(1);
     }

     System.out.println("\nOpening index @ " + indexPath + "\n");
     Directory dir = null;
     try {
       dir = FSDirectory.open(new File(indexPath));
     } catch (Throwable t) {
       System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
       t.printStackTrace(System.out);
       System.exit(1);
     }

     CheckIndex checker = new CheckIndex(dir);
     checker.setInfoStream(System.out);

     Status result = checker.checkIndex(onlySegments);
     if (result.missingSegments) {
       System.exit(1);
     }

     if (!result.clean) {
       if (!doFix) {
         System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
       } else {
         System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
         System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
         for(int s=0;s<5;s++) {
           Thread.sleep(1000);
           System.out.println("  " + (5-s) + "...");
         }
         System.out.println("Writing...");
         checker.fixIndex(result);
         System.out.println("OK");
         System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
       }
     }
     System.out.println("");

     final int exitCode;
     if (result.clean == true)
       exitCode = 0;
     else
       exitCode = 1;
     System.exit(exitCode);
   }
 }