src/java/org/apache/lucene/index/LogMergePolicy.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Set;

 /** <p>This class implements a {@link MergePolicy} that tries
  *  to merge segments into levels of exponentially
  *  increasing size, where each level has fewer segments than
  *  the value of the merge factor. Whenever extra segments
  *  (beyond the merge factor upper bound) are encountered,
  *  all segments within the level are merged. You can get or
  *  set the merge factor using {@link #getMergeFactor()} and
  *  {@link #setMergeFactor(int)} respectively.</p>
  *
  * <p>This class is abstract and requires a subclass to
  * define the {@link #size} method which specifies how a
  * segment's size is determined.  {@link LogDocMergePolicy}
  * is one subclass that measures size by document count in
  * the segment.  {@link LogByteSizeMergePolicy} is another
  * subclass that measures size as the total byte size of the
  * file(s) for the segment.</p>
  */

 public abstract class LogMergePolicy extends MergePolicy {

   /** Defines the allowed range of log(size) for each
    *  level.  A level is computed by taking the max segment
    *  log size, minus LEVEL_LOG_SPAN, and finding all
    *  segments falling within that range. */
   public static final double LEVEL_LOG_SPAN = 0.75;

   /** Default merge factor, which is how many segments are
    *  merged at a time */
   public static final int DEFAULT_MERGE_FACTOR = 10;

   /** Default maximum segment size.  A segment of this size
    *  or larger will never be merged.  @see setMaxMergeDocs */
   public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;

   private int mergeFactor = DEFAULT_MERGE_FACTOR;

   long minMergeSize;
   long maxMergeSize;
   int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;

   /* TODO 3.0: change this default to true */
   protected boolean calibrateSizeByDeletes = false;

   private boolean useCompoundFile = true;
   private boolean useCompoundDocStore = true;

   public LogMergePolicy() {
     super();
   }

   protected boolean verbose() {
     IndexWriter w = writer.get();
     return w != null && w.verbose();
   }

   private void message(String message) {
     if (verbose())
       writer.get().message("LMP: " + message);
   }

   /** <p>Returns the number of segments that are merged at
    * once and also controls the total number of segments
    * allowed to accumulate in the index.</p> */
   public int getMergeFactor() {
     return mergeFactor;
   }

   /** Determines how often segment indices are merged by
    * addDocument().  With smaller values, less RAM is used
    * while indexing, and searches on unoptimized indices are
    * faster, but indexing speed is slower.  With larger
    * values, more RAM is used during indexing, and while
    * searches on unoptimized indices are slower, indexing is
    * faster.  Thus larger values (> 10) are best for batch
    * index creation, and smaller values (< 10) for indices
    * that are interactively maintained. */
   public void setMergeFactor(int mergeFactor) {
     if (mergeFactor < 2)
       throw new IllegalArgumentException("mergeFactor cannot be less than 2");
     this.mergeFactor = mergeFactor;
   }

   // Javadoc inherited
   @Override
   public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) {
     return useCompoundFile;
   }

   /** Sets whether compound file format should be used for
    *  newly flushed and newly merged segments. */
   public void setUseCompoundFile(boolean useCompoundFile) {
     this.useCompoundFile = useCompoundFile;
   }

   /** Returns true if newly flushed and newly merge segments
    *  are written in compound file format. @see
    *  #setUseCompoundFile */
   public boolean getUseCompoundFile() {
     return useCompoundFile;
   }

   // Javadoc inherited
   @Override
   public boolean useCompoundDocStore(SegmentInfos infos) {
     return useCompoundDocStore;
   }

   /** Sets whether compound file format should be used for
    *  newly flushed and newly merged doc store
    *  segment files (term vectors and stored fields). */
   public void setUseCompoundDocStore(boolean useCompoundDocStore) {
     this.useCompoundDocStore = useCompoundDocStore;
   }

   /** Returns true if newly flushed and newly merge doc
    *  store segment files (term vectors and stored fields)
    *  are written in compound file format. @see
    *  #setUseCompoundDocStore */
   public boolean getUseCompoundDocStore() {
     return useCompoundDocStore;
   }

   /** Sets whether the segment size should be calibrated by
    *  the number of deletes when choosing segments for merge. */
   public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) {
     this.calibrateSizeByDeletes = calibrateSizeByDeletes;
   }

   /** Returns true if the segment size should be calibrated
    *  by the number of deletes when choosing segments for merge. */
   public boolean getCalibrateSizeByDeletes() {
     return calibrateSizeByDeletes;
   }

   @Override
   public void close() {}

   abstract protected long size(SegmentInfo info) throws IOException;

   protected long sizeDocs(SegmentInfo info) throws IOException {
     if (calibrateSizeByDeletes) {
       int delCount = writer.get().numDeletedDocs(info);
       return (info.docCount - (long)delCount);
     } else {
       return info.docCount;
     }
   }

   protected long sizeBytes(SegmentInfo info) throws IOException {
     long byteSize = info.sizeInBytes();
     if (calibrateSizeByDeletes) {
       int delCount = writer.get().numDeletedDocs(info);
       float delRatio = (info.docCount <= 0 ? 0.0f : ((float)delCount / (float)info.docCount));
       return (info.docCount <= 0 ?  byteSize : (long)(byteSize * (1.0f - delRatio)));
     } else {
       return byteSize;
     }
   }

   private boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
     final int numSegments = infos.size();
     int numToOptimize = 0;
     SegmentInfo optimizeInfo = null;
     for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {
       final SegmentInfo info = infos.info(i);
       if (segmentsToOptimize.contains(info)) {
         numToOptimize++;
         optimizeInfo = info;
       }
     }

     return numToOptimize <= maxNumSegments &&
       (numToOptimize != 1 || isOptimized(optimizeInfo));
   }

   /** Returns true if this single info is optimized (has no
    *  pending norms or deletes, is in the same dir as the
    *  writer, and matches the current compound file setting */
   private boolean isOptimized(SegmentInfo info)
     throws IOException {
     IndexWriter w = writer.get();
     assert w != null;
     boolean hasDeletions = w.numDeletedDocs(info) > 0;
     return !hasDeletions &&
       !info.hasSeparateNorms() &&
       info.dir == w.getDirectory() &&
       info.getUseCompoundFile() == useCompoundFile;
   }

   /** Returns the merges necessary to optimize the index.
    *  This merge policy defines "optimized" to mean only one
    *  segment in the index, where that segment has no
    *  deletions pending nor separate norms, and it is in
    *  compound file format if the current useCompoundFile
    *  setting is true.  This method returns multiple merges
    *  (mergeFactor at a time) so the {@link MergeScheduler}
    *  in use may make use of concurrency. */
   @Override
   public MergeSpecification findMergesForOptimize(SegmentInfos infos,
       int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
     MergeSpecification spec;

     assert maxNumSegments > 0;

     if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {

       // Find the newest (rightmost) segment that needs to
       // be optimized (other segments may have been flushed
       // since optimize started):
       int last = infos.size();
       while(last > 0) {
         final SegmentInfo info = infos.info(--last);
         if (segmentsToOptimize.contains(info)) {
           last++;
           break;
         }
       }

       if (last > 0) {

         spec = new MergeSpecification();

         // First, enroll all "full" merges (size
         // mergeFactor) to potentially be run concurrently:
         while (last - maxNumSegments + 1 >= mergeFactor) {
           spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
           last -= mergeFactor;
         }

         // Only if there are no full merges pending do we
         // add a final partial (< mergeFactor segments) merge:
         if (0 == spec.merges.size()) {
           if (maxNumSegments == 1) {

             // Since we must optimize down to 1 segment, the
             // choice is simple:
             if (last > 1 || !isOptimized(infos.info(0)))
               spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
           } else if (last > maxNumSegments) {

             // Take care to pick a partial merge that is
             // least cost, but does not make the index too
             // lopsided.  If we always just picked the
             // partial tail then we could produce a highly
             // lopsided index over time:

             // We must merge this many segments to leave
             // maxNumSegments in the index (from when
             // optimize was first kicked off):
             final int finalMergeSize = last - maxNumSegments + 1;

             // Consider all possible starting points:
             long bestSize = 0;
             int bestStart = 0;

             for(int i=0;i<last-finalMergeSize+1;i++) {
               long sumSize = 0;
               for(int j=0;j<finalMergeSize;j++)
                 sumSize += size(infos.info(j+i));
               if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
                 bestStart = i;
                 bestSize = sumSize;
               }
             }

             spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
           }
         }

       } else
         spec = null;
     } else
       spec = null;

     return spec;
   }

   /**
    * Finds merges necessary to expunge all deletes from the
    * index.  We simply merge adjacent segments that have
    * deletes, up to mergeFactor at a time.
    */
   @Override
   public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
       throws CorruptIndexException, IOException {
     final int numSegments = segmentInfos.size();

     if (verbose())
       message("findMergesToExpungeDeletes: " + numSegments + " segments");

     MergeSpecification spec = new MergeSpecification();
     int firstSegmentWithDeletions = -1;
     IndexWriter w = writer.get();
     assert w != null;
     for(int i=0;i<numSegments;i++) {
       final SegmentInfo info = segmentInfos.info(i);
       int delCount = w.numDeletedDocs(info);
       if (delCount > 0) {
         if (verbose())
           message("  segment " + info.name + " has deletions");
         if (firstSegmentWithDeletions == -1)
           firstSegmentWithDeletions = i;
         else if (i - firstSegmentWithDeletions == mergeFactor) {
           // We've seen mergeFactor segments in a row with
           // deletions, so force a merge now:
           if (verbose())
             message("  add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
           spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
           firstSegmentWithDeletions = i;
         }
       } else if (firstSegmentWithDeletions != -1) {
         // End of a sequence of segments with deletions, so,
         // merge those past segments even if it's fewer than
         // mergeFactor segments
         if (verbose())
           message("  add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
         spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
         firstSegmentWithDeletions = -1;
       }
     }

     if (firstSegmentWithDeletions != -1) {
       if (verbose())
         message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
       spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments), useCompoundFile));
     }

     return spec;
   }

   /** Checks if any merges are now necessary and returns a
    *  {@link MergePolicy.MergeSpecification} if so.  A merge
    *  is necessary when there are more than {@link
    *  #setMergeFactor} segments at a given level.  When
    *  multiple levels have too many segments, this method
    *  will return multiple merges, allowing the {@link
    *  MergeScheduler} to use concurrency. */
   @Override
   public MergeSpecification findMerges(SegmentInfos infos) throws IOException {

     final int numSegments = infos.size();
     if (verbose())
       message("findMerges: " + numSegments + " segments");

     // Compute levels, which is just log (base mergeFactor)
     // of the size of each segment
     float[] levels = new float[numSegments];
     final float norm = (float) Math.log(mergeFactor);

     for(int i=0;i<numSegments;i++) {
       final SegmentInfo info = infos.info(i);
       long size = size(info);

       // Floor tiny segments
       if (size < 1)
         size = 1;
       levels[i] = (float) Math.log(size)/norm;
     }

     final float levelFloor;
     if (minMergeSize <= 0)
       levelFloor = (float) 0.0;
     else
       levelFloor = (float) (Math.log(minMergeSize)/norm);

     // Now, we quantize the log values into levels.  The
     // first level is any segment whose log size is within
     // LEVEL_LOG_SPAN of the max size, or, who has such as
     // segment "to the right".  Then, we find the max of all
     // other segments and use that to define the next level
     // segment, etc.

     MergeSpecification spec = null;

     int start = 0;
     while(start < numSegments) {

       // Find max level of all segments not already
       // quantized.
       float maxLevel = levels[start];
       for(int i=1+start;i<numSegments;i++) {
         final float level = levels[i];
         if (level > maxLevel)
           maxLevel = level;
       }

       // Now search backwards for the rightmost segment that
       // falls into this level:
       float levelBottom;
       if (maxLevel < levelFloor)
         // All remaining segments fall into the min level
         levelBottom = -1.0F;
       else {
         levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);

         // Force a boundary at the level floor
         if (levelBottom < levelFloor && maxLevel >= levelFloor)
           levelBottom = levelFloor;
       }

       int upto = numSegments-1;
       while(upto >= start) {
         if (levels[upto] >= levelBottom) {
           break;
         }
         upto--;
       }
       if (verbose())
         message("  level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments");

       // Finally, record all merges that are viable at this level:
       int end = start + mergeFactor;
       while(end <= 1+upto) {
         boolean anyTooLarge = false;
         for(int i=start;i<end;i++) {
           final SegmentInfo info = infos.info(i);
           anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
         }

         if (!anyTooLarge) {
           if (spec == null)
             spec = new MergeSpecification();
           if (verbose())
             message("    " + start + " to " + end + ": add this merge");
           spec.add(new OneMerge(infos.range(start, end), useCompoundFile));
         } else if (verbose())
           message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");

         start = end;
         end = start + mergeFactor;
       }

       start = 1+upto;
     }

     return spec;
   }

   /** <p>Determines the largest segment (measured by
    * document count) that may be merged with other segments.
    * Small values (e.g., less than 10,000) are best for
    * interactive indexing, as this limits the length of
    * pauses while indexing to a few seconds.  Larger values
    * are best for batched indexing and speedier
    * searches.</p>
    *
    * <p>The default value is {@link Integer#MAX_VALUE}.</p>
    *
    * <p>The default merge policy ({@link
    * LogByteSizeMergePolicy}) also allows you to set this
    * limit by net size (in MB) of the segment, using {@link
    * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
    */
   public void setMaxMergeDocs(int maxMergeDocs) {
     this.maxMergeDocs = maxMergeDocs;
   }

   /** Returns the largest segment (measured by document
    *  count) that may be merged with other segments.
    *  @see #setMaxMergeDocs */
   public int getMaxMergeDocs() {
     return maxMergeDocs;
   }

 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Set;

	/** <p>This class implements a {@link MergePolicy} that tries
	* to merge segments into levels of exponentially
	* increasing size, where each level has fewer segments than
	* the value of the merge factor. Whenever extra segments
	* (beyond the merge factor upper bound) are encountered,
	* all segments within the level are merged. You can get or
	* set the merge factor using {@link #getMergeFactor()} and
	* {@link #setMergeFactor(int)} respectively.</p>
	*
	* <p>This class is abstract and requires a subclass to
	* define the {@link #size} method which specifies how a
	* segment's size is determined. {@link LogDocMergePolicy}
	* is one subclass that measures size by document count in
	* the segment. {@link LogByteSizeMergePolicy} is another
	* subclass that measures size as the total byte size of the
	* file(s) for the segment.</p>
	*/

	public abstract class LogMergePolicy extends MergePolicy {

	/** Defines the allowed range of log(size) for each
	* level. A level is computed by taking the max segment
	* log size, minus LEVEL_LOG_SPAN, and finding all
	* segments falling within that range. */
	public static final double LEVEL_LOG_SPAN = 0.75;

	/** Default merge factor, which is how many segments are
	* merged at a time */
	public static final int DEFAULT_MERGE_FACTOR = 10;

	/** Default maximum segment size. A segment of this size
	* or larger will never be merged. @see setMaxMergeDocs */
	public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;

	private int mergeFactor = DEFAULT_MERGE_FACTOR;

	long minMergeSize;
	long maxMergeSize;
	int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;

	/* TODO 3.0: change this default to true */
	protected boolean calibrateSizeByDeletes = false;

	private boolean useCompoundFile = true;
	private boolean useCompoundDocStore = true;

	public LogMergePolicy() {
	super();
	}

	protected boolean verbose() {
	IndexWriter w = writer.get();
	return w != null && w.verbose();
	}

	private void message(String message) {
	if (verbose())
	writer.get().message("LMP: " + message);
	}

	/** <p>Returns the number of segments that are merged at
	* once and also controls the total number of segments
	* allowed to accumulate in the index.</p> */
	public int getMergeFactor() {
	return mergeFactor;
	}

	/** Determines how often segment indices are merged by
	* addDocument(). With smaller values, less RAM is used
	* while indexing, and searches on unoptimized indices are
	* faster, but indexing speed is slower. With larger
	* values, more RAM is used during indexing, and while
	* searches on unoptimized indices are slower, indexing is
	* faster. Thus larger values (> 10) are best for batch
	* index creation, and smaller values (< 10) for indices
	* that are interactively maintained. */
	public void setMergeFactor(int mergeFactor) {
	if (mergeFactor < 2)
	throw new IllegalArgumentException("mergeFactor cannot be less than 2");
	this.mergeFactor = mergeFactor;
	}

	// Javadoc inherited
	@Override
	public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) {
	return useCompoundFile;
	}

	/** Sets whether compound file format should be used for
	* newly flushed and newly merged segments. */
	public void setUseCompoundFile(boolean useCompoundFile) {
	this.useCompoundFile = useCompoundFile;
	}

	/** Returns true if newly flushed and newly merge segments
	* are written in compound file format. @see
	* #setUseCompoundFile */
	public boolean getUseCompoundFile() {
	return useCompoundFile;
	}

	// Javadoc inherited
	@Override
	public boolean useCompoundDocStore(SegmentInfos infos) {
	return useCompoundDocStore;
	}

	/** Sets whether compound file format should be used for
	* newly flushed and newly merged doc store
	* segment files (term vectors and stored fields). */
	public void setUseCompoundDocStore(boolean useCompoundDocStore) {
	this.useCompoundDocStore = useCompoundDocStore;
	}

	/** Returns true if newly flushed and newly merge doc
	* store segment files (term vectors and stored fields)
	* are written in compound file format. @see
	* #setUseCompoundDocStore */
	public boolean getUseCompoundDocStore() {
	return useCompoundDocStore;
	}

	/** Sets whether the segment size should be calibrated by
	* the number of deletes when choosing segments for merge. */
	public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) {
	this.calibrateSizeByDeletes = calibrateSizeByDeletes;
	}

	/** Returns true if the segment size should be calibrated
	* by the number of deletes when choosing segments for merge. */
	public boolean getCalibrateSizeByDeletes() {
	return calibrateSizeByDeletes;
	}

	@Override
	public void close() {}

	abstract protected long size(SegmentInfo info) throws IOException;

	protected long sizeDocs(SegmentInfo info) throws IOException {
	if (calibrateSizeByDeletes) {
	int delCount = writer.get().numDeletedDocs(info);
	return (info.docCount - (long)delCount);
	} else {
	return info.docCount;
	}
	}

	protected long sizeBytes(SegmentInfo info) throws IOException {
	long byteSize = info.sizeInBytes();
	if (calibrateSizeByDeletes) {
	int delCount = writer.get().numDeletedDocs(info);
	float delRatio = (info.docCount <= 0 ? 0.0f : ((float)delCount / (float)info.docCount));
	return (info.docCount <= 0 ? byteSize : (long)(byteSize * (1.0f - delRatio)));
	} else {
	return byteSize;
	}
	}

	private boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
	final int numSegments = infos.size();
	int numToOptimize = 0;
	SegmentInfo optimizeInfo = null;
	for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {
	final SegmentInfo info = infos.info(i);
	if (segmentsToOptimize.contains(info)) {
	numToOptimize++;
	optimizeInfo = info;
	}
	}

	return numToOptimize <= maxNumSegments &&
	(numToOptimize != 1 \|\| isOptimized(optimizeInfo));
	}

	/** Returns true if this single info is optimized (has no
	* pending norms or deletes, is in the same dir as the
	* writer, and matches the current compound file setting */
	private boolean isOptimized(SegmentInfo info)
	throws IOException {
	IndexWriter w = writer.get();
	assert w != null;
	boolean hasDeletions = w.numDeletedDocs(info) > 0;
	return !hasDeletions &&
	!info.hasSeparateNorms() &&
	info.dir == w.getDirectory() &&
	info.getUseCompoundFile() == useCompoundFile;
	}

	/** Returns the merges necessary to optimize the index.
	* This merge policy defines "optimized" to mean only one
	* segment in the index, where that segment has no
	* deletions pending nor separate norms, and it is in
	* compound file format if the current useCompoundFile
	* setting is true. This method returns multiple merges
	* (mergeFactor at a time) so the {@link MergeScheduler}
	* in use may make use of concurrency. */
	@Override
	public MergeSpecification findMergesForOptimize(SegmentInfos infos,
	int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
	MergeSpecification spec;

	assert maxNumSegments > 0;

	if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {

	// Find the newest (rightmost) segment that needs to
	// be optimized (other segments may have been flushed
	// since optimize started):
	int last = infos.size();
	while(last > 0) {
	final SegmentInfo info = infos.info(--last);
	if (segmentsToOptimize.contains(info)) {
	last++;
	break;
	}
	}

	if (last > 0) {

	spec = new MergeSpecification();

	// First, enroll all "full" merges (size
	// mergeFactor) to potentially be run concurrently:
	while (last - maxNumSegments + 1 >= mergeFactor) {
	spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
	last -= mergeFactor;
	}

	// Only if there are no full merges pending do we
	// add a final partial (< mergeFactor segments) merge:
	if (0 == spec.merges.size()) {
	if (maxNumSegments == 1) {

	// Since we must optimize down to 1 segment, the
	// choice is simple:
	if (last > 1 \|\| !isOptimized(infos.info(0)))
	spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
	} else if (last > maxNumSegments) {

	// Take care to pick a partial merge that is
	// least cost, but does not make the index too
	// lopsided. If we always just picked the
	// partial tail then we could produce a highly
	// lopsided index over time:

	// We must merge this many segments to leave
	// maxNumSegments in the index (from when
	// optimize was first kicked off):
	final int finalMergeSize = last - maxNumSegments + 1;

	// Consider all possible starting points:
	long bestSize = 0;
	int bestStart = 0;

	for(int i=0;i<last-finalMergeSize+1;i++) {
	long sumSize = 0;
	for(int j=0;j<finalMergeSize;j++)
	sumSize += size(infos.info(j+i));
	if (i == 0 \|\| (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
	bestStart = i;
	bestSize = sumSize;
	}
	}

	spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
	}
	}

	} else
	spec = null;
	} else
	spec = null;

	return spec;
	}

	/**
	* Finds merges necessary to expunge all deletes from the
	* index. We simply merge adjacent segments that have
	* deletes, up to mergeFactor at a time.
	*/
	@Override
	public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
	throws CorruptIndexException, IOException {
	final int numSegments = segmentInfos.size();

	if (verbose())
	message("findMergesToExpungeDeletes: " + numSegments + " segments");

	MergeSpecification spec = new MergeSpecification();
	int firstSegmentWithDeletions = -1;
	IndexWriter w = writer.get();
	assert w != null;
	for(int i=0;i<numSegments;i++) {
	final SegmentInfo info = segmentInfos.info(i);
	int delCount = w.numDeletedDocs(info);
	if (delCount > 0) {
	if (verbose())
	message(" segment " + info.name + " has deletions");
	if (firstSegmentWithDeletions == -1)
	firstSegmentWithDeletions = i;
	else if (i - firstSegmentWithDeletions == mergeFactor) {
	// We've seen mergeFactor segments in a row with
	// deletions, so force a merge now:
	if (verbose())
	message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
	spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
	firstSegmentWithDeletions = i;
	}
	} else if (firstSegmentWithDeletions != -1) {
	// End of a sequence of segments with deletions, so,
	// merge those past segments even if it's fewer than
	// mergeFactor segments
	if (verbose())
	message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
	spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
	firstSegmentWithDeletions = -1;
	}
	}

	if (firstSegmentWithDeletions != -1) {
	if (verbose())
	message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
	spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments), useCompoundFile));
	}

	return spec;
	}

	/** Checks if any merges are now necessary and returns a
	* {@link MergePolicy.MergeSpecification} if so. A merge
	* is necessary when there are more than {@link
	* #setMergeFactor} segments at a given level. When
	* multiple levels have too many segments, this method
	* will return multiple merges, allowing the {@link
	* MergeScheduler} to use concurrency. */
	@Override
	public MergeSpecification findMerges(SegmentInfos infos) throws IOException {

	final int numSegments = infos.size();
	if (verbose())
	message("findMerges: " + numSegments + " segments");

	// Compute levels, which is just log (base mergeFactor)
	// of the size of each segment
	float[] levels = new float[numSegments];
	final float norm = (float) Math.log(mergeFactor);

	for(int i=0;i<numSegments;i++) {
	final SegmentInfo info = infos.info(i);
	long size = size(info);

	// Floor tiny segments
	if (size < 1)
	size = 1;
	levels[i] = (float) Math.log(size)/norm;
	}

	final float levelFloor;
	if (minMergeSize <= 0)
	levelFloor = (float) 0.0;
	else
	levelFloor = (float) (Math.log(minMergeSize)/norm);

	// Now, we quantize the log values into levels. The
	// first level is any segment whose log size is within
	// LEVEL_LOG_SPAN of the max size, or, who has such as
	// segment "to the right". Then, we find the max of all
	// other segments and use that to define the next level
	// segment, etc.

	MergeSpecification spec = null;

	int start = 0;
	while(start < numSegments) {

	// Find max level of all segments not already
	// quantized.
	float maxLevel = levels[start];
	for(int i=1+start;i<numSegments;i++) {
	final float level = levels[i];
	if (level > maxLevel)
	maxLevel = level;
	}

	// Now search backwards for the rightmost segment that
	// falls into this level:
	float levelBottom;
	if (maxLevel < levelFloor)
	// All remaining segments fall into the min level
	levelBottom = -1.0F;
	else {
	levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);

	// Force a boundary at the level floor
	if (levelBottom < levelFloor && maxLevel >= levelFloor)
	levelBottom = levelFloor;
	}

	int upto = numSegments-1;
	while(upto >= start) {
	if (levels[upto] >= levelBottom) {
	break;
	}
	upto--;
	}
	if (verbose())
	message(" level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments");

	// Finally, record all merges that are viable at this level:
	int end = start + mergeFactor;
	while(end <= 1+upto) {
	boolean anyTooLarge = false;
	for(int i=start;i<end;i++) {
	final SegmentInfo info = infos.info(i);
	anyTooLarge \|= (size(info) >= maxMergeSize \|\| sizeDocs(info) >= maxMergeDocs);
	}

	if (!anyTooLarge) {
	if (spec == null)
	spec = new MergeSpecification();
	if (verbose())
	message(" " + start + " to " + end + ": add this merge");
	spec.add(new OneMerge(infos.range(start, end), useCompoundFile));
	} else if (verbose())
	message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");

	start = end;
	end = start + mergeFactor;
	}

	start = 1+upto;
	}

	return spec;
	}

	/** <p>Determines the largest segment (measured by
	* document count) that may be merged with other segments.
	* Small values (e.g., less than 10,000) are best for
	* interactive indexing, as this limits the length of
	* pauses while indexing to a few seconds. Larger values
	* are best for batched indexing and speedier
	* searches.</p>
	*
	* <p>The default value is {@link Integer#MAX_VALUE}.</p>
	*
	* <p>The default merge policy ({@link
	* LogByteSizeMergePolicy}) also allows you to set this
	* limit by net size (in MB) of the segment, using {@link
	* LogByteSizeMergePolicy#setMaxMergeMB}.</p>
	*/
	public void setMaxMergeDocs(int maxMergeDocs) {
	this.maxMergeDocs = maxMergeDocs;
	}

	/** Returns the largest segment (measured by document
	* count) that may be merged with other segments.
	* @see #setMaxMergeDocs */
	public int getMaxMergeDocs() {
	return maxMergeDocs;
	}

	}