docs/attachments/LUCENE-2701/LUCENE-2701.patch - lucene-jira-archive - Git at Google

 Index: CHANGES.txt
 ===================================================================
 --- CHANGES.txt	(revision 1021827)
 +++ CHANGES.txt	(working copy)
 @@ -100,6 +100,10 @@
    test lock just before the real lock is acquired.  (Surinder Pal
    Singh Bindra via Mike McCandless)

 +* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now
 +  affect optimize() as well (as opposed to only regular merges). This means that
 +  you can run optimize() and too large segments won't be merged. (Shai Erera)
 +
  API Changes

  * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory.  (George
 Index: src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
 ===================================================================
 --- src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java	(revision 0)
 +++ src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java	(revision 0)
 @@ -0,0 +1,311 @@
 +package org.apache.lucene.index;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.document.Document;
 +import org.apache.lucene.store.Directory;
 +import org.apache.lucene.store.RAMDirectory;
 +import org.apache.lucene.util.LuceneTestCase;
 +
 +public class TestSizeBoundedOptimize extends LuceneTestCase {
 +
 +  private void addDocs(IndexWriter writer, int numDocs) throws IOException {
 +    for (int i = 0; i < numDocs; i++) {
 +      Document doc = new Document();
 +      writer.addDocument(doc);
 +    }
 +    writer.commit();
 +  }
 +
 +  public void testByteSizeLimit() throws Exception {
 +    // tests that the max merge size constraint is applied during optimize.
 +    Directory dir = new RAMDirectory();
 +
 +    // Prepare an index w/ several small segments and a large one.
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    // prevent any merges from happening.
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +    final int numSegments = 15;
 +    for (int i = 0; i < numSegments; i++) {
 +      int numDocs = i == 7 ? 10 : 1;
 +      addDocs(writer, numDocs);
 +    }
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
 +    lmp.setMaxMergeMB(200.0 / (1 << 20)); // ~100 bytes tops
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    // Should only be 3 segments in the index, because one of them exceeds the size limit
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(3, sis.size());
 +  }
 +
 +  public void testNumDocsLimit() throws Exception {
 +    // tests that the max merge docs constraint is applied during optimize.
 +    Directory dir = new RAMDirectory();
 +
 +    // Prepare an index w/ several small segments and a large one.
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    // prevent any merges from happening.
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 5);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(3);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    // Should only be 3 segments in the index, because one of them exceeds the size limit
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(3, sis.size());
 +  }
 +
 +  public void testLastSegmentTooLarge() throws Exception {
 +    Directory dir = new RAMDirectory();
 +
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 5);
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(3);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(2, sis.size());
 +  }
 +
 +  public void testFirstSegmentTooLarge() throws Exception {
 +    Directory dir = new RAMDirectory();
 +
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 5);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(3);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(2, sis.size());
 +  }
 +
 +  public void testAllSegmentsSmall() throws Exception {
 +    Directory dir = new RAMDirectory();
 +
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(3);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(1, sis.size());
 +  }
 +
 +  public void testAllSegmentsLarge() throws Exception {
 +    Directory dir = new RAMDirectory();
 +
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(2);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(3, sis.size());
 +  }
 +
 +  public void testOneLargeOneSmall() throws Exception {
 +    Directory dir = new RAMDirectory();
 +
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 3);
 +    addDocs(writer, 5);
 +    addDocs(writer, 3);
 +    addDocs(writer, 5);
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(3);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(4, sis.size());
 +  }
 +
 +  public void testMergeFactor() throws Exception {
 +    Directory dir = new RAMDirectory();
 +
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +    addDocs(writer, 5);
 +    addDocs(writer, 3);
 +    addDocs(writer, 3);
 +
 +    writer.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(3);
 +    lmp.setMergeFactor(2);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    // Should only be 4 segments in the index, because of the merge factor and
 +    // max merge docs settings.
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(4, sis.size());
 +  }
 +
 +  public void testSingleNonOptimizedSegment() throws Exception {
 +    Directory dir = new RAMDirectory();
 +
 +    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
 +    IndexWriter writer = new IndexWriter(dir, conf);
 +
 +    addDocs(writer, 3);
 +    addDocs(writer, 5);
 +    addDocs(writer, 3);
 +
 +    writer.close();
 +
 +    // delete the last document, so that the last segment is optimized.
 +    IndexReader r = IndexReader.open(dir, false);
 +    r.deleteDocument(r.numDocs() - 1);
 +    r.close();
 +
 +    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
 +    LogMergePolicy lmp = new LogDocMergePolicy();
 +    lmp.setMaxMergeDocs(3);
 +    conf.setMergePolicy(lmp);
 +
 +    writer = new IndexWriter(dir, conf);
 +    writer.optimize();
 +    writer.close();
 +
 +    // Verify that the last segment does not have deletions.
 +    SegmentInfos sis = new SegmentInfos();
 +    sis.read(dir);
 +    assertEquals(3, sis.size());
 +    assertFalse(sis.info(2).hasDeletions());
 +  }
 +
 +}

 Property changes on: src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
 ___________________________________________________________________
 Added: svn:keywords
    + Date Author Id Revision HeadURL
 Added: svn:eol-style
    + native

 Index: src/java/org/apache/lucene/index/MergePolicy.java
 ===================================================================
 --- src/java/org/apache/lucene/index/MergePolicy.java	(revision 1021827)
 +++ src/java/org/apache/lucene/index/MergePolicy.java	(working copy)
 @@ -77,8 +77,8 @@
      SegmentReader[] readers;        // used by IndexWriter
      SegmentReader[] readersClone;   // used by IndexWriter
      List<String> mergeFiles;            // used by IndexWriter
 -    final SegmentInfos segments;
 -    final boolean useCompoundFile;
 +    public final SegmentInfos segments;
 +    public final boolean useCompoundFile;
      boolean aborted;
      Throwable error;
      boolean paused;
 @@ -146,7 +146,7 @@
        return paused;
      }

 -    String segString(Directory dir) {
 +    public String segString(Directory dir) {
        StringBuilder b = new StringBuilder();
        final int numSegments = segments.size();
        for(int i=0;i<numSegments;i++) {
 @@ -162,6 +162,30 @@
        }
        return b.toString();
      }
 +
 +    /**
 +     * Returns the total size in bytes of this merge. Note that this does not
 +     * indicate the size of the merged segment, but the input total size.
 +     * */
 +    public long totalBytesSize() throws IOException {
 +      long total = 0;
 +      for (SegmentInfo info : segments) {
 +        total += info.sizeInBytes();
 +      }
 +      return total;
 +    }
 +
 +    /**
 +     * Returns the total number of documents that are included with this merge.
 +     * Note that this does not indicate the number of documents after the merge.
 +     * */
 +    public int totalNumDocs() throws IOException {
 +      int total = 0;
 +      for (SegmentInfo info : segments) {
 +        total += info.docCount;
 +      }
 +      return total;
 +    }
    }

    /**
 @@ -176,7 +200,7 @@
       * The subset of segments to be included in the primitive merge.
       */

 -    public List<OneMerge> merges = new ArrayList<OneMerge>();
 +    public final List<OneMerge> merges = new ArrayList<OneMerge>();

      public void add(OneMerge merge) {
        merges.add(merge);
 Index: src/java/org/apache/lucene/index/LogMergePolicy.java
 ===================================================================
 --- src/java/org/apache/lucene/index/LogMergePolicy.java	(revision 1021827)
 +++ src/java/org/apache/lucene/index/LogMergePolicy.java	(working copy)
 @@ -54,16 +54,16 @@
     *  or larger will never be merged.  @see setMaxMergeDocs */
    public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;

 -  private int mergeFactor = DEFAULT_MERGE_FACTOR;
 +  protected int mergeFactor = DEFAULT_MERGE_FACTOR;

 -  long minMergeSize;
 -  long maxMergeSize;
 -  int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 +  protected long minMergeSize;
 +  protected long maxMergeSize;
 +  protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;

    protected boolean calibrateSizeByDeletes = true;

 -  private boolean useCompoundFile = true;
 -  private boolean useCompoundDocStore = true;
 +  protected boolean useCompoundFile = true;
 +  protected boolean useCompoundDocStore = true;

    public LogMergePolicy() {
      super();
 @@ -74,7 +74,7 @@
      return w != null && w.verbose();
    }

 -  private void message(String message) {
 +  protected void message(String message) {
      if (verbose())
        writer.get().message("LMP: " + message);
    }
 @@ -180,7 +180,7 @@
      }
    }

 -  private boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
 +  protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
      final int numSegments = infos.size();
      int numToOptimize = 0;
      SegmentInfo optimizeInfo = null;
 @@ -199,7 +199,7 @@
    /** Returns true if this single info is optimized (has no
     *  pending norms or deletes, is in the same dir as the
     *  writer, and matches the current compound file setting */
 -  private boolean isOptimized(SegmentInfo info)
 +  protected boolean isOptimized(SegmentInfo info)
      throws IOException {
      IndexWriter w = writer.get();
      assert w != null;
 @@ -210,6 +210,103 @@
        info.getUseCompoundFile() == useCompoundFile;
    }

 +  /**
 +   * Returns the merges necessary to optimize the index, taking the max merge
 +   * size or max merge docs into consideration. This method attempts to respect
 +   * the {@code maxNumSegments} parameter, however it might be, due to size
 +   * constraints, that more than that number of segments will remain in the
 +   * index. Also, this method does not guarantee that exactly {@code
 +   * maxNumSegments} will remain, but &lt;= that number.
 +   */
 +  private MergeSpecification findMergesForOptimizeSizeLimit(
 +      SegmentInfos infos, int maxNumSegments, int last) throws IOException {
 +    MergeSpecification spec = new MergeSpecification();
 +
 +    int start = last - 1;
 +    while (start >= 0) {
 +      SegmentInfo info = infos.info(start);
 +      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
 +        // need to skip that segment + add a merge for the 'right' segments,
 +        // unless there is only 1 which is optimized.
 +        if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
 +          // there is more than 1 segment to the right of this one, or an unoptimized single segment.
 +          spec.add(new OneMerge(infos.range(start + 1, last), useCompoundFile));
 +        }
 +        last = start;
 +      } else if (last - start == mergeFactor) {
 +        // mergeFactor eligible segments were found, add them as a merge.
 +        spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
 +        last = start;
 +      }
 +      --start;
 +    }
 +
 +    // Add any left-over segments, unless there is just 1 already optimized.
 +    if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
 +      spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
 +    }
 +
 +    return spec.merges.size() == 0 ? null : spec;
 +  }
 +
 +  /**
 +   * Returns the merges necessary to optimize the index. This method constraints
 +   * the returned merges only by the {@code maxNumSegments} parameter, and
 +   * guaranteed that exactly that number of segments will remain in the index.
 +   */
 +  private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
 +    MergeSpecification spec = new MergeSpecification();
 +
 +    // First, enroll all "full" merges (size
 +    // mergeFactor) to potentially be run concurrently:
 +    while (last - maxNumSegments + 1 >= mergeFactor) {
 +      spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
 +      last -= mergeFactor;
 +    }
 +
 +    // Only if there are no full merges pending do we
 +    // add a final partial (< mergeFactor segments) merge:
 +    if (0 == spec.merges.size()) {
 +      if (maxNumSegments == 1) {
 +
 +        // Since we must optimize down to 1 segment, the
 +        // choice is simple:
 +        if (last > 1 || !isOptimized(infos.info(0))) {
 +          spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
 +        }
 +      } else if (last > maxNumSegments) {
 +
 +        // Take care to pick a partial merge that is
 +        // least cost, but does not make the index too
 +        // lopsided.  If we always just picked the
 +        // partial tail then we could produce a highly
 +        // lopsided index over time:
 +
 +        // We must merge this many segments to leave
 +        // maxNumSegments in the index (from when
 +        // optimize was first kicked off):
 +        final int finalMergeSize = last - maxNumSegments + 1;
 +
 +        // Consider all possible starting points:
 +        long bestSize = 0;
 +        int bestStart = 0;
 +
 +        for(int i=0;i<last-finalMergeSize+1;i++) {
 +          long sumSize = 0;
 +          for(int j=0;j<finalMergeSize;j++)
 +            sumSize += size(infos.info(j+i));
 +          if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
 +            bestStart = i;
 +            bestSize = sumSize;
 +          }
 +        }
 +
 +        spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
 +      }
 +    }
 +    return spec.merges.size() == 0 ? null : spec;
 +  }
 +
    /** Returns the merges necessary to optimize the index.
     *  This merge policy defines "optimized" to mean only one
     *  segment in the index, where that segment has no
 @@ -221,81 +318,45 @@
    @Override
    public MergeSpecification findMergesForOptimize(SegmentInfos infos,
        int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
 -    MergeSpecification spec;

      assert maxNumSegments > 0;

 -    if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {
 -
 -      // Find the newest (rightmost) segment that needs to
 -      // be optimized (other segments may have been flushed
 -      // since optimize started):
 -      int last = infos.size();
 -      while(last > 0) {
 -        final SegmentInfo info = infos.info(--last);
 -        if (segmentsToOptimize.contains(info)) {
 -          last++;
 -          break;
 -        }
 +    // If the segments are already optimized (e.g. there's only 1 segment), or
 +    // there are <maxNumSegements, all optimized, nothing to do.
 +    if (isOptimized(infos, maxNumSegments, segmentsToOptimize)) return null;
 +
 +    // Find the newest (rightmost) segment that needs to
 +    // be optimized (other segments may have been flushed
 +    // since optimize started):
 +    int last = infos.size();
 +    while (last > 0) {
 +      final SegmentInfo info = infos.info(--last);
 +      if (segmentsToOptimize.contains(info)) {
 +        last++;
 +        break;
        }
 +    }

 -      if (last > 0) {
 +    if (last == 0) return null;
 +
 +    // There is only one segment already, and it is optimized
 +    if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) return null;

 -        spec = new MergeSpecification();
 -
 -        // First, enroll all "full" merges (size
 -        // mergeFactor) to potentially be run concurrently:
 -        while (last - maxNumSegments + 1 >= mergeFactor) {
 -          spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
 -          last -= mergeFactor;
 -        }
 -
 -        // Only if there are no full merges pending do we
 -        // add a final partial (< mergeFactor segments) merge:
 -        if (0 == spec.merges.size()) {
 -          if (maxNumSegments == 1) {
 -
 -            // Since we must optimize down to 1 segment, the
 -            // choice is simple:
 -            if (last > 1 || !isOptimized(infos.info(0)))
 -              spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
 -          } else if (last > maxNumSegments) {
 -
 -            // Take care to pick a partial merge that is
 -            // least cost, but does not make the index too
 -            // lopsided.  If we always just picked the
 -            // partial tail then we could produce a highly
 -            // lopsided index over time:
 -
 -            // We must merge this many segments to leave
 -            // maxNumSegments in the index (from when
 -            // optimize was first kicked off):
 -            final int finalMergeSize = last - maxNumSegments + 1;
 -
 -            // Consider all possible starting points:
 -            long bestSize = 0;
 -            int bestStart = 0;
 -
 -            for(int i=0;i<last-finalMergeSize+1;i++) {
 -              long sumSize = 0;
 -              for(int j=0;j<finalMergeSize;j++)
 -                sumSize += size(infos.info(j+i));
 -              if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
 -                bestStart = i;
 -                bestSize = sumSize;
 -              }
 -            }
 -
 -            spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
 -          }
 -        }
 -
 -      } else
 -        spec = null;
 -    } else
 -      spec = null;
 -
 -    return spec;
 +    // Check if there are any segments above the threshold
 +    boolean anyTooLarge = false;
 +    for (int i = 0; i < last; i++) {
 +      SegmentInfo info = infos.info(i);
 +      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
 +        anyTooLarge = true;
 +        break;
 +      }
 +    }
 +
 +    if (anyTooLarge) {
 +      return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last);
 +    } else {
 +      return findMergesForOptimizeMaxNumSegments(infos, maxNumSegments, last);
 +    }
    }

    /**
 Index: contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
 ===================================================================
 --- contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java	(revision 1021827)
 +++ contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java	(working copy)
 @@ -103,31 +103,6 @@
      }
    }

 -  private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
 -    final int numSegments = infos.size();
 -    int numToOptimize = 0;
 -    SegmentInfo optimizeInfo = null;
 -    for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {
 -      final SegmentInfo info = infos.info(i);
 -      if (segmentsToOptimize.contains(info)) {
 -        numToOptimize++;
 -        optimizeInfo = info;
 -      }
 -    }
 -
 -    return numToOptimize <= maxNumSegments &&
 -      (numToOptimize != 1 || isOptimized(writer, optimizeInfo));
 -  }
 -
 -  private boolean isOptimized(IndexWriter writer, SegmentInfo info)
 -    throws IOException {
 -    assert writer != null;
 -    return !info.hasDeletions() &&
 -      !info.hasSeparateNorms() &&
 -      info.dir == writer.getDirectory() &&
 -      info.getUseCompoundFile() == getUseCompoundFile();
 -  }
 -
    @Override
    public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {

 @@ -135,7 +110,7 @@

      MergeSpecification spec = null;

 -    if (!isOptimized(infos, writer.get(), maxNumSegments, segmentsToOptimize)) {
 +    if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {

        // Find the newest (rightmost) segment that needs to
        // be optimized (other segments may have been flushed
 @@ -158,7 +133,7 @@
            // Since we must optimize down to 1 segment, the
            // choice is simple:
            boolean useCompoundFile = getUseCompoundFile();
 -          if (last > 1 || !isOptimized(writer.get(), infos.info(0))) {
 +          if (last > 1 || !isOptimized(infos.info(0))) {

              spec = new MergeSpecification();
              spec.add(new OneMerge(infos.range(0, last), useCompoundFile));