| Index: CHANGES.txt |
| =================================================================== |
| --- CHANGES.txt (revision 1021827) |
| +++ CHANGES.txt (working copy) |
| @@ -100,6 +100,10 @@ |
| test lock just before the real lock is acquired. (Surinder Pal |
| Singh Bindra via Mike McCandless) |
| |
| +* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now |
| + affect optimize() as well (as opposed to only regular merges). This means that |
| + you can run optimize() and too large segments won't be merged. (Shai Erera) |
| + |
| API Changes |
| |
| * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George |
| Index: src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (revision 0) |
| +++ src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (revision 0) |
| @@ -0,0 +1,311 @@ |
| +package org.apache.lucene.index; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.RAMDirectory; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +public class TestSizeBoundedOptimize extends LuceneTestCase { |
| + |
| + private void addDocs(IndexWriter writer, int numDocs) throws IOException { |
| + for (int i = 0; i < numDocs; i++) { |
| + Document doc = new Document(); |
| + writer.addDocument(doc); |
| + } |
| + writer.commit(); |
| + } |
| + |
| + public void testByteSizeLimit() throws Exception { |
| + // tests that the max merge size constraint is applied during optimize. |
| + Directory dir = new RAMDirectory(); |
| + |
| + // Prepare an index w/ several small segments and a large one. |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + // prevent any merges from happening. |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + final int numSegments = 15; |
| + for (int i = 0; i < numSegments; i++) { |
| + int numDocs = i == 7 ? 10 : 1; |
| + addDocs(writer, numDocs); |
| + } |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); |
| + lmp.setMaxMergeMB(200.0 / (1 << 20)); // ~100 bytes tops |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + // Should only be 3 segments in the index, because one of them exceeds the size limit |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(3, sis.size()); |
| + } |
| + |
| + public void testNumDocsLimit() throws Exception { |
| + // tests that the max merge docs constraint is applied during optimize. |
| + Directory dir = new RAMDirectory(); |
| + |
| + // Prepare an index w/ several small segments and a large one. |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + // prevent any merges from happening. |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 5); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(3); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + // Should only be 3 segments in the index, because one of them exceeds the size limit |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(3, sis.size()); |
| + } |
| + |
| + public void testLastSegmentTooLarge() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 5); |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(3); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(2, sis.size()); |
| + } |
| + |
| + public void testFirstSegmentTooLarge() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 5); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(3); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(2, sis.size()); |
| + } |
| + |
| + public void testAllSegmentsSmall() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(3); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(1, sis.size()); |
| + } |
| + |
| + public void testAllSegmentsLarge() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(2); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(3, sis.size()); |
| + } |
| + |
| + public void testOneLargeOneSmall() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 3); |
| + addDocs(writer, 5); |
| + addDocs(writer, 3); |
| + addDocs(writer, 5); |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(3); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(4, sis.size()); |
| + } |
| + |
| + public void testMergeFactor() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + addDocs(writer, 5); |
| + addDocs(writer, 3); |
| + addDocs(writer, 3); |
| + |
| + writer.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(3); |
| + lmp.setMergeFactor(2); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + // Should only be 4 segments in the index, because of the merge factor and |
| + // max merge docs settings. |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(4, sis.size()); |
| + } |
| + |
| + public void testSingleNonOptimizedSegment() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + |
| + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); |
| + IndexWriter writer = new IndexWriter(dir, conf); |
| + |
| + addDocs(writer, 3); |
| + addDocs(writer, 5); |
| + addDocs(writer, 3); |
| + |
| + writer.close(); |
| + |
| + // delete the last document, so that the last segment is optimized. |
| + IndexReader r = IndexReader.open(dir, false); |
| + r.deleteDocument(r.numDocs() - 1); |
| + r.close(); |
| + |
| + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| + LogMergePolicy lmp = new LogDocMergePolicy(); |
| + lmp.setMaxMergeDocs(3); |
| + conf.setMergePolicy(lmp); |
| + |
| + writer = new IndexWriter(dir, conf); |
| + writer.optimize(); |
| + writer.close(); |
| + |
| + // Verify that the last segment does not have deletions. |
| + SegmentInfos sis = new SegmentInfos(); |
| + sis.read(dir); |
| + assertEquals(3, sis.size()); |
| + assertFalse(sis.info(2).hasDeletions()); |
| + } |
| + |
| +} |
| |
| Property changes on: src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java |
| ___________________________________________________________________ |
| Added: svn:keywords |
| + Date Author Id Revision HeadURL |
| Added: svn:eol-style |
| + native |
| |
| Index: src/java/org/apache/lucene/index/MergePolicy.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/MergePolicy.java (revision 1021827) |
| +++ src/java/org/apache/lucene/index/MergePolicy.java (working copy) |
| @@ -77,8 +77,8 @@ |
| SegmentReader[] readers; // used by IndexWriter |
| SegmentReader[] readersClone; // used by IndexWriter |
| List<String> mergeFiles; // used by IndexWriter |
| - final SegmentInfos segments; |
| - final boolean useCompoundFile; |
| + public final SegmentInfos segments; |
| + public final boolean useCompoundFile; |
| boolean aborted; |
| Throwable error; |
| boolean paused; |
| @@ -146,7 +146,7 @@ |
| return paused; |
| } |
| |
| - String segString(Directory dir) { |
| + public String segString(Directory dir) { |
| StringBuilder b = new StringBuilder(); |
| final int numSegments = segments.size(); |
| for(int i=0;i<numSegments;i++) { |
| @@ -162,6 +162,30 @@ |
| } |
| return b.toString(); |
| } |
| + |
| + /** |
| + * Returns the total size in bytes of this merge. Note that this does not |
| + * indicate the size of the merged segment, but the input total size. |
| + * */ |
| + public long totalBytesSize() throws IOException { |
| + long total = 0; |
| + for (SegmentInfo info : segments) { |
| + total += info.sizeInBytes(); |
| + } |
| + return total; |
| + } |
| + |
| + /** |
| + * Returns the total number of documents that are included with this merge. |
| + * Note that this does not indicate the number of documents after the merge. |
| + * */ |
| + public int totalNumDocs() throws IOException { |
| + int total = 0; |
| + for (SegmentInfo info : segments) { |
| + total += info.docCount; |
| + } |
| + return total; |
| + } |
| } |
| |
| /** |
| @@ -176,7 +200,7 @@ |
| * The subset of segments to be included in the primitive merge. |
| */ |
| |
| - public List<OneMerge> merges = new ArrayList<OneMerge>(); |
| + public final List<OneMerge> merges = new ArrayList<OneMerge>(); |
| |
| public void add(OneMerge merge) { |
| merges.add(merge); |
| Index: src/java/org/apache/lucene/index/LogMergePolicy.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1021827) |
| +++ src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) |
| @@ -54,16 +54,16 @@ |
| * or larger will never be merged. @see setMaxMergeDocs */ |
| public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; |
| |
| - private int mergeFactor = DEFAULT_MERGE_FACTOR; |
| + protected int mergeFactor = DEFAULT_MERGE_FACTOR; |
| |
| - long minMergeSize; |
| - long maxMergeSize; |
| - int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; |
| + protected long minMergeSize; |
| + protected long maxMergeSize; |
| + protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; |
| |
| protected boolean calibrateSizeByDeletes = true; |
| |
| - private boolean useCompoundFile = true; |
| - private boolean useCompoundDocStore = true; |
| + protected boolean useCompoundFile = true; |
| + protected boolean useCompoundDocStore = true; |
| |
| public LogMergePolicy() { |
| super(); |
| @@ -74,7 +74,7 @@ |
| return w != null && w.verbose(); |
| } |
| |
| - private void message(String message) { |
| + protected void message(String message) { |
| if (verbose()) |
| writer.get().message("LMP: " + message); |
| } |
| @@ -180,7 +180,7 @@ |
| } |
| } |
| |
| - private boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { |
| + protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { |
| final int numSegments = infos.size(); |
| int numToOptimize = 0; |
| SegmentInfo optimizeInfo = null; |
| @@ -199,7 +199,7 @@ |
| /** Returns true if this single info is optimized (has no |
| * pending norms or deletes, is in the same dir as the |
| * writer, and matches the current compound file setting */ |
| - private boolean isOptimized(SegmentInfo info) |
| + protected boolean isOptimized(SegmentInfo info) |
| throws IOException { |
| IndexWriter w = writer.get(); |
| assert w != null; |
| @@ -210,6 +210,103 @@ |
| info.getUseCompoundFile() == useCompoundFile; |
| } |
| |
| + /** |
| + * Returns the merges necessary to optimize the index, taking the max merge |
| + * size or max merge docs into consideration. This method attempts to respect |
| + * the {@code maxNumSegments} parameter, however it might be, due to size |
| + * constraints, that more than that number of segments will remain in the |
| + * index. Also, this method does not guarantee that exactly {@code |
| + * maxNumSegments} will remain, but <= that number. |
| + */ |
| + private MergeSpecification findMergesForOptimizeSizeLimit( |
| + SegmentInfos infos, int maxNumSegments, int last) throws IOException { |
| + MergeSpecification spec = new MergeSpecification(); |
| + |
| + int start = last - 1; |
| + while (start >= 0) { |
| + SegmentInfo info = infos.info(start); |
| + if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { |
| + // need to skip that segment + add a merge for the 'right' segments, |
| + // unless there is only 1 which is optimized. |
| + if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) { |
| + // there is more than 1 segment to the right of this one, or an unoptimized single segment. |
| + spec.add(new OneMerge(infos.range(start + 1, last), useCompoundFile)); |
| + } |
| + last = start; |
| + } else if (last - start == mergeFactor) { |
| + // mergeFactor eligible segments were found, add them as a merge. |
| + spec.add(new OneMerge(infos.range(start, last), useCompoundFile)); |
| + last = start; |
| + } |
| + --start; |
| + } |
| + |
| + // Add any left-over segments, unless there is just 1 already optimized. |
| + if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) { |
| + spec.add(new OneMerge(infos.range(start, last), useCompoundFile)); |
| + } |
| + |
| + return spec.merges.size() == 0 ? null : spec; |
| + } |
| + |
| + /** |
| + * Returns the merges necessary to optimize the index. This method constraints |
| + * the returned merges only by the {@code maxNumSegments} parameter, and |
| + * guaranteed that exactly that number of segments will remain in the index. |
| + */ |
| + private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException { |
| + MergeSpecification spec = new MergeSpecification(); |
| + |
| + // First, enroll all "full" merges (size |
| + // mergeFactor) to potentially be run concurrently: |
| + while (last - maxNumSegments + 1 >= mergeFactor) { |
| + spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile)); |
| + last -= mergeFactor; |
| + } |
| + |
| + // Only if there are no full merges pending do we |
| + // add a final partial (< mergeFactor segments) merge: |
| + if (0 == spec.merges.size()) { |
| + if (maxNumSegments == 1) { |
| + |
| + // Since we must optimize down to 1 segment, the |
| + // choice is simple: |
| + if (last > 1 || !isOptimized(infos.info(0))) { |
| + spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); |
| + } |
| + } else if (last > maxNumSegments) { |
| + |
| + // Take care to pick a partial merge that is |
| + // least cost, but does not make the index too |
| + // lopsided. If we always just picked the |
| + // partial tail then we could produce a highly |
| + // lopsided index over time: |
| + |
| + // We must merge this many segments to leave |
| + // maxNumSegments in the index (from when |
| + // optimize was first kicked off): |
| + final int finalMergeSize = last - maxNumSegments + 1; |
| + |
| + // Consider all possible starting points: |
| + long bestSize = 0; |
| + int bestStart = 0; |
| + |
| + for(int i=0;i<last-finalMergeSize+1;i++) { |
| + long sumSize = 0; |
| + for(int j=0;j<finalMergeSize;j++) |
| + sumSize += size(infos.info(j+i)); |
| + if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) { |
| + bestStart = i; |
| + bestSize = sumSize; |
| + } |
| + } |
| + |
| + spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile)); |
| + } |
| + } |
| + return spec.merges.size() == 0 ? null : spec; |
| + } |
| + |
| /** Returns the merges necessary to optimize the index. |
| * This merge policy defines "optimized" to mean only one |
| * segment in the index, where that segment has no |
| @@ -221,81 +318,45 @@ |
| @Override |
| public MergeSpecification findMergesForOptimize(SegmentInfos infos, |
| int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { |
| - MergeSpecification spec; |
| |
| assert maxNumSegments > 0; |
| |
| - if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) { |
| - |
| - // Find the newest (rightmost) segment that needs to |
| - // be optimized (other segments may have been flushed |
| - // since optimize started): |
| - int last = infos.size(); |
| - while(last > 0) { |
| - final SegmentInfo info = infos.info(--last); |
| - if (segmentsToOptimize.contains(info)) { |
| - last++; |
| - break; |
| - } |
| + // If the segments are already optimized (e.g. there's only 1 segment), or |
| + // there are <maxNumSegements, all optimized, nothing to do. |
| + if (isOptimized(infos, maxNumSegments, segmentsToOptimize)) return null; |
| + |
| + // Find the newest (rightmost) segment that needs to |
| + // be optimized (other segments may have been flushed |
| + // since optimize started): |
| + int last = infos.size(); |
| + while (last > 0) { |
| + final SegmentInfo info = infos.info(--last); |
| + if (segmentsToOptimize.contains(info)) { |
| + last++; |
| + break; |
| } |
| + } |
| |
| - if (last > 0) { |
| + if (last == 0) return null; |
| + |
| + // There is only one segment already, and it is optimized |
| + if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) return null; |
| |
| - spec = new MergeSpecification(); |
| - |
| - // First, enroll all "full" merges (size |
| - // mergeFactor) to potentially be run concurrently: |
| - while (last - maxNumSegments + 1 >= mergeFactor) { |
| - spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile)); |
| - last -= mergeFactor; |
| - } |
| - |
| - // Only if there are no full merges pending do we |
| - // add a final partial (< mergeFactor segments) merge: |
| - if (0 == spec.merges.size()) { |
| - if (maxNumSegments == 1) { |
| - |
| - // Since we must optimize down to 1 segment, the |
| - // choice is simple: |
| - if (last > 1 || !isOptimized(infos.info(0))) |
| - spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); |
| - } else if (last > maxNumSegments) { |
| - |
| - // Take care to pick a partial merge that is |
| - // least cost, but does not make the index too |
| - // lopsided. If we always just picked the |
| - // partial tail then we could produce a highly |
| - // lopsided index over time: |
| - |
| - // We must merge this many segments to leave |
| - // maxNumSegments in the index (from when |
| - // optimize was first kicked off): |
| - final int finalMergeSize = last - maxNumSegments + 1; |
| - |
| - // Consider all possible starting points: |
| - long bestSize = 0; |
| - int bestStart = 0; |
| - |
| - for(int i=0;i<last-finalMergeSize+1;i++) { |
| - long sumSize = 0; |
| - for(int j=0;j<finalMergeSize;j++) |
| - sumSize += size(infos.info(j+i)); |
| - if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) { |
| - bestStart = i; |
| - bestSize = sumSize; |
| - } |
| - } |
| - |
| - spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile)); |
| - } |
| - } |
| - |
| - } else |
| - spec = null; |
| - } else |
| - spec = null; |
| - |
| - return spec; |
| + // Check if there are any segments above the threshold |
| + boolean anyTooLarge = false; |
| + for (int i = 0; i < last; i++) { |
| + SegmentInfo info = infos.info(i); |
| + if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { |
| + anyTooLarge = true; |
| + break; |
| + } |
| + } |
| + |
| + if (anyTooLarge) { |
| + return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last); |
| + } else { |
| + return findMergesForOptimizeMaxNumSegments(infos, maxNumSegments, last); |
| + } |
| } |
| |
| /** |
| Index: contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java |
| =================================================================== |
| --- contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java (revision 1021827) |
| +++ contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java (working copy) |
| @@ -103,31 +103,6 @@ |
| } |
| } |
| |
| - private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { |
| - final int numSegments = infos.size(); |
| - int numToOptimize = 0; |
| - SegmentInfo optimizeInfo = null; |
| - for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) { |
| - final SegmentInfo info = infos.info(i); |
| - if (segmentsToOptimize.contains(info)) { |
| - numToOptimize++; |
| - optimizeInfo = info; |
| - } |
| - } |
| - |
| - return numToOptimize <= maxNumSegments && |
| - (numToOptimize != 1 || isOptimized(writer, optimizeInfo)); |
| - } |
| - |
| - private boolean isOptimized(IndexWriter writer, SegmentInfo info) |
| - throws IOException { |
| - assert writer != null; |
| - return !info.hasDeletions() && |
| - !info.hasSeparateNorms() && |
| - info.dir == writer.getDirectory() && |
| - info.getUseCompoundFile() == getUseCompoundFile(); |
| - } |
| - |
| @Override |
| public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { |
| |
| @@ -135,7 +110,7 @@ |
| |
| MergeSpecification spec = null; |
| |
| - if (!isOptimized(infos, writer.get(), maxNumSegments, segmentsToOptimize)) { |
| + if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) { |
| |
| // Find the newest (rightmost) segment that needs to |
| // be optimized (other segments may have been flushed |
| @@ -158,7 +133,7 @@ |
| // Since we must optimize down to 1 segment, the |
| // choice is simple: |
| boolean useCompoundFile = getUseCompoundFile(); |
| - if (last > 1 || !isOptimized(writer.get(), infos.info(0))) { |
| + if (last > 1 || !isOptimized(infos.info(0))) { |
| |
| spec = new MergeSpecification(); |
| spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); |