| Index: lucene/CHANGES.txt |
| =================================================================== |
| --- lucene/CHANGES.txt (révision 1343822) |
| +++ lucene/CHANGES.txt (copie de travail) |
| @@ -932,6 +932,9 @@ |
| performance; add float acceptableOverheadRatio to getWriter and |
| getMutable API to give packed ints freedom to pick faster |
| implementations (Adrien Grand via Mike McCandless) |
| + |
| +* LUCENE-2357: Reduce transient RAM usage when merging segments in |
| + IndexWriter. (Adrien Grand via Mike McCandless) |
| |
| Bug fixes |
| |
| Index: lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (révision 1343822) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (copie de travail) |
| @@ -36,7 +36,7 @@ |
| private MultiDocsAndPositionsEnum.EnumWithSlice[] subs; |
| int numSubs; |
| int upto; |
| - int[] currentMap; |
| + MergeState.DocMap currentMap; |
| DocsAndPositionsEnum current; |
| int currentBase; |
| int doc = -1; |
| @@ -94,12 +94,10 @@ |
| |
| int doc = current.nextDoc(); |
| if (doc != NO_MORE_DOCS) { |
| - if (currentMap != null) { |
| - // compact deletions |
| - doc = currentMap[doc]; |
| - if (doc == -1) { |
| - continue; |
| - } |
| + // compact deletions |
| + doc = currentMap.get(doc); |
| + if (doc == -1) { |
| + continue; |
| } |
| return this.doc = currentBase + doc; |
| } else { |
| Index: lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java (révision 1343822) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java (copie de travail) |
| @@ -35,7 +35,7 @@ |
| private MultiDocsEnum.EnumWithSlice[] subs; |
| int numSubs; |
| int upto; |
| - int[] currentMap; |
| + MergeState.DocMap currentMap; |
| DocsEnum current; |
| int currentBase; |
| int doc = -1; |
| @@ -88,18 +88,16 @@ |
| current = subs[upto].docsEnum; |
| currentBase = mergeState.docBase[reader]; |
| currentMap = mergeState.docMaps[reader]; |
| - assert currentMap == null || currentMap.length == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.length + " vs " + subs[upto].slice.length; |
| + assert currentMap.maxDoc() == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.maxDoc() + " vs " + subs[upto].slice.length; |
| } |
| } |
| |
| int doc = current.nextDoc(); |
| if (doc != NO_MORE_DOCS) { |
| - if (currentMap != null) { |
| - // compact deletions |
| - doc = currentMap[doc]; |
| - if (doc == -1) { |
| - continue; |
| - } |
| + // compact deletions |
| + doc = currentMap.get(doc); |
| + if (doc == -1) { |
| + continue; |
| } |
| return this.doc = currentBase + doc; |
| } else { |
| Index: lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java (révision 1343822) |
| +++ lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java (copie de travail) |
| @@ -81,7 +81,8 @@ |
| } |
| } |
| |
| - public static List<SortedSourceSlice> buildSlices(int[] docBases, int[][] docMaps, |
| + public static List<SortedSourceSlice> buildSlices( |
| + int[] docBases, MergeState.DocMap[] docMaps, |
| DocValues[] docValues, MergeContext ctx) throws IOException { |
| final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>(); |
| for (int i = 0; i < docValues.length; i++) { |
| @@ -111,15 +112,15 @@ |
| * mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping |
| * contains the new global ordinals for the relative index. |
| */ |
| - private static void createOrdMapping(int[] docBases, int[][] docMaps, |
| + private static void createOrdMapping(int[] docBases, MergeState.DocMap[] docMaps, |
| SortedSourceSlice currentSlice) { |
| final int readerIdx = currentSlice.readerIdx; |
| - final int[] currentDocMap = docMaps[readerIdx]; |
| + final MergeState.DocMap currentDocMap = docMaps[readerIdx]; |
| final int docBase = currentSlice.docToOrdStart; |
| assert docBase == docBases[readerIdx]; |
| - if (currentDocMap != null) { // we have deletes |
| - for (int i = 0; i < currentDocMap.length; i++) { |
| - final int doc = currentDocMap[i]; |
| + if (currentDocMap != null && currentDocMap.hasDeletions()) { // we have deletes |
| + for (int i = 0; i < currentDocMap.maxDoc(); i++) { |
| + final int doc = currentDocMap.get(i); |
| if (doc != -1) { // not deleted |
| final int ord = currentSlice.source.ord(i); // collect ords strictly |
| // increasing |
| Index: lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (révision 1343822) |
| +++ lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (copie de travail) |
| @@ -322,7 +322,7 @@ |
| final MergeContext ctx = SortedBytesMergeUtils.init(type, values, |
| comp, globalNumDocs); |
| List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices( |
| - docBases(), new int[values.length][], values, ctx); |
| + docBases(), new MergeState.DocMap[values.length], values, ctx); |
| RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer( |
| type); |
| final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer, |
| Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (révision 1343822) |
| +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (copie de travail) |
| @@ -3483,7 +3483,7 @@ |
| merge.readers.add(reader); |
| assert delCount <= info.info.getDocCount(): "delCount=" + delCount + " info.docCount=" + info.info.getDocCount() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount(); |
| if (delCount < info.info.getDocCount()) { |
| - merger.add(reader, liveDocs); |
| + merger.add(reader, liveDocs, delCount); |
| } |
| segUpto++; |
| } |
| Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (révision 1343822) |
| +++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (copie de travail) |
| @@ -80,7 +80,7 @@ |
| new ReaderUtil.Gather(reader) { |
| @Override |
| protected void add(int base, AtomicReader r) { |
| - mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(r, r.getLiveDocs())); |
| + mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(r, r.getLiveDocs(), r.numDeletedDocs())); |
| } |
| }.run(); |
| } catch (IOException ioe) { |
| @@ -89,8 +89,8 @@ |
| } |
| } |
| |
| - final void add(SegmentReader reader, Bits liveDocs) { |
| - mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(reader, liveDocs)); |
| + final void add(SegmentReader reader, Bits liveDocs, int delCount) { |
| + mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(reader, liveDocs, delCount)); |
| } |
| |
| /** |
| @@ -287,7 +287,7 @@ |
| final int numReaders = mergeState.readers.size(); |
| |
| // Remap docIDs |
| - mergeState.docMaps = new int[numReaders][]; |
| + mergeState.docMaps = new MergeState.DocMap[numReaders]; |
| mergeState.docBase = new int[numReaders]; |
| mergeState.readerPayloadProcessor = new PayloadProcessorProvider.ReaderPayloadProcessor[numReaders]; |
| mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders]; |
| @@ -300,30 +300,9 @@ |
| final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i); |
| |
| mergeState.docBase[i] = docBase; |
| - final int maxDoc = reader.reader.maxDoc(); |
| - final int docCount; |
| - final Bits liveDocs = reader.liveDocs; |
| - final int[] docMap; |
| - if (liveDocs != null) { |
| - int delCount = 0; |
| - docMap = new int[maxDoc]; |
| - int newDocID = 0; |
| - for(int j=0;j<maxDoc;j++) { |
| - if (!liveDocs.get(j)) { |
| - docMap[j] = -1; |
| - delCount++; |
| - } else { |
| - docMap[j] = newDocID++; |
| - } |
| - } |
| - docCount = maxDoc - delCount; |
| - } else { |
| - docCount = maxDoc; |
| - docMap = null; |
| - } |
| - |
| + final MergeState.DocMap docMap = MergeState.DocMap.build(reader); |
| mergeState.docMaps[i] = docMap; |
| - docBase += docCount; |
| + docBase += docMap.numDocs(); |
| |
| if (mergeState.payloadProcessorProvider != null) { |
| mergeState.readerPayloadProcessor[i] = mergeState.payloadProcessorProvider.getReaderProcessor(reader.reader); |
| Index: lucene/core/src/java/org/apache/lucene/index/MergeState.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/MergeState.java (révision 1343822) |
| +++ lucene/core/src/java/org/apache/lucene/index/MergeState.java (copie de travail) |
| @@ -19,11 +19,12 @@ |
| |
| import java.util.List; |
| |
| +import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor; |
| import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor; |
| -import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.InfoStream; |
| +import org.apache.lucene.util.packed.PackedInts; |
| |
| /** Holds common state used during segment merging |
| * |
| @@ -33,17 +34,171 @@ |
| public static class IndexReaderAndLiveDocs { |
| public final AtomicReader reader; |
| public final Bits liveDocs; |
| + public final int numDeletedDocs; |
| |
| - public IndexReaderAndLiveDocs(AtomicReader reader, Bits liveDocs) { |
| + public IndexReaderAndLiveDocs(AtomicReader reader, Bits liveDocs, int numDeletedDocs) { |
| this.reader = reader; |
| this.liveDocs = liveDocs; |
| + this.numDeletedDocs = numDeletedDocs; |
| } |
| } |
| |
| + public static abstract class DocMap { |
| + private final Bits liveDocs; |
| + |
| + protected DocMap(Bits liveDocs) { |
| + this.liveDocs = liveDocs; |
| + } |
| + |
| + public static DocMap build(IndexReaderAndLiveDocs reader) { |
| + final int maxDoc = reader.reader.maxDoc(); |
| + final int numDeletes = reader.numDeletedDocs; |
| + final int numDocs = maxDoc - numDeletes; |
| + assert reader.liveDocs != null || numDeletes == 0; |
| + if (numDeletes == 0) { |
| + return new NoDelDocMap(maxDoc); |
| + } else if (numDeletes < numDocs) { |
| + return buildDelCountDocmap(maxDoc, numDeletes, reader.liveDocs, PackedInts.FAST); |
| + } else { |
| + return buildDirectDocMap(maxDoc, numDocs, reader.liveDocs, PackedInts.FAST); |
| + } |
| + } |
| + |
| + static DocMap buildDelCountDocmap(int maxDoc, int numDeletes, Bits liveDocs, float acceptableOverheadRatio) { |
| + PackedInts.Mutable numDeletesSoFar = PackedInts.getMutable(maxDoc, |
| + PackedInts.bitsRequired(numDeletes), acceptableOverheadRatio); |
| + int del = 0; |
| + for (int i = 0; i < maxDoc; ++i) { |
| + if (!liveDocs.get(i)) { |
| + ++del; |
| + } |
| + numDeletesSoFar.set(i, del); |
| + } |
| + assert del == numDeletes : "del=" + del + ", numdeletes=" + numDeletes; |
| + return new DelCountDocMap(liveDocs, numDeletesSoFar); |
| + } |
| + |
| + static DocMap buildDirectDocMap(int maxDoc, int numDocs, Bits liveDocs, float acceptableOverheadRatio) { |
| + PackedInts.Mutable docIds = PackedInts.getMutable(maxDoc, |
| + PackedInts.bitsRequired(Math.max(0, numDocs - 1)), acceptableOverheadRatio); |
| + int del = 0; |
| + for (int i = 0; i < maxDoc; ++i) { |
| + if (liveDocs.get(i)) { |
| + docIds.set(i, i - del); |
| + } else { |
| + ++del; |
| + } |
| + } |
| + assert numDocs + del == maxDoc : "maxDoc=" + maxDoc + ", del=" + del + ", numDocs=" + numDocs; |
| + return new DirectDocMap(liveDocs, docIds, del); |
| + } |
| + |
| + public int get(int docId) { |
| + if (liveDocs == null || liveDocs.get(docId)) { |
| + return remap(docId); |
| + } else { |
| + return -1; |
| + } |
| + } |
| + |
| + public abstract int remap(int docId); |
| + |
| + public abstract int maxDoc(); |
| + |
| + public final int numDocs() { |
| + return maxDoc() - numDeletedDocs(); |
| + } |
| + |
| + public abstract int numDeletedDocs(); |
| + |
| + public boolean hasDeletions() { |
| + return numDeletedDocs() > 0; |
| + } |
| + |
| + } |
| + |
| + private static class NoDelDocMap extends DocMap { |
| + |
| + private final int maxDoc; |
| + |
| + private NoDelDocMap(int maxDoc) { |
| + super(null); |
| + this.maxDoc = maxDoc; |
| + } |
| + |
| + @Override |
| + public int remap(int docId) { |
| + return docId; |
| + } |
| + |
| + @Override |
| + public int maxDoc() { |
| + return maxDoc; |
| + } |
| + |
| + @Override |
| + public int numDeletedDocs() { |
| + return 0; |
| + } |
| + } |
| + |
| + private static class DirectDocMap extends DocMap { |
| + |
| + private final PackedInts.Mutable docIds; |
| + private final int numDeletedDocs; |
| + |
| + private DirectDocMap(Bits liveDocs, PackedInts.Mutable docIds, int numDeletedDocs) { |
| + super(liveDocs); |
| + this.docIds = docIds; |
| + this.numDeletedDocs = numDeletedDocs; |
| + } |
| + |
| + @Override |
| + public int remap(int docId) { |
| + return (int) docIds.get(docId); |
| + } |
| + |
| + @Override |
| + public int maxDoc() { |
| + return docIds.size(); |
| + } |
| + |
| + @Override |
| + public int numDeletedDocs() { |
| + return numDeletedDocs; |
| + } |
| + } |
| + |
| + private static class DelCountDocMap extends DocMap { |
| + |
| + private final PackedInts.Mutable numDeletesSoFar; |
| + |
| + private DelCountDocMap(Bits liveDocs, PackedInts.Mutable numDeletesSoFar) { |
| + super(liveDocs); |
| + this.numDeletesSoFar = numDeletesSoFar; |
| + } |
| + |
| + @Override |
| + public int remap(int docId) { |
| + return docId - (int) numDeletesSoFar.get(docId); |
| + } |
| + |
| + @Override |
| + public int maxDoc() { |
| + return numDeletesSoFar.size(); |
| + } |
| + |
| + @Override |
| + public int numDeletedDocs() { |
| + final int maxDoc = maxDoc(); |
| + return (int) numDeletesSoFar.get(maxDoc - 1); |
| + } |
| + } |
| + |
| public SegmentInfo segmentInfo; |
| public FieldInfos fieldInfos; |
| public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged |
| - public int[][] docMaps; // Maps docIDs around deletions |
| + public DocMap[] docMaps; // Maps docIDs around deletions |
| public int[] docBase; // New docID base per reader |
| public CheckAbort checkAbort; |
| public InfoStream infoStream; |
| @@ -65,8 +220,8 @@ |
| |
| public static class CheckAbort { |
| private double workCount; |
| - private MergePolicy.OneMerge merge; |
| - private Directory dir; |
| + private final MergePolicy.OneMerge merge; |
| + private final Directory dir; |
| public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { |
| this.merge = merge; |
| this.dir = dir; |
| Index: lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (révision 1343822) |
| +++ lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (copie de travail) |
| @@ -25,9 +25,11 @@ |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.Constants; |
| +import org.apache.lucene.util.FixedBitSet; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util._TestUtil; |
| +import org.apache.lucene.util.packed.PackedInts; |
| |
| |
| public class TestSegmentMerger extends LuceneTestCase { |
| @@ -139,4 +141,41 @@ |
| TestSegmentReader.checkNorms(mergedReader); |
| mergedReader.close(); |
| } |
| + |
| + private static boolean equals(MergeState.DocMap map1, MergeState.DocMap map2) { |
| + if (map1.maxDoc() != map2.maxDoc()) { |
| + return false; |
| + } |
| + for (int i = 0; i < map1.maxDoc(); ++i) { |
| + if (map1.get(i) != map2.get(i)) { |
| + return false; |
| + } |
| + } |
| + return true; |
| + } |
| + |
| + public void testBuildDocMap() { |
| + final int maxDoc = 128; |
| + final FixedBitSet liveDocs = new FixedBitSet(maxDoc); |
| + |
| + MergeState.DocMap docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT); |
| + MergeState.DocMap docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, 0, liveDocs, PackedInts.COMPACT); |
| + assertTrue(equals(docMap1, docMap2)); |
| + |
| + liveDocs.set(1); |
| + for (int i = 7; i < 79; ++i) { |
| + liveDocs.set(i); |
| + } |
| + liveDocs.set(80); |
| + liveDocs.set(88); |
| + int numDocs = liveDocs.cardinality(); |
| + docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc - numDocs, liveDocs, PackedInts.COMPACT); |
| + docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, numDocs, liveDocs, PackedInts.COMPACT); |
| + assertTrue(equals(docMap1, docMap2)); |
| + |
| + liveDocs.set(0, maxDoc); |
| + docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, 0, liveDocs, PackedInts.COMPACT); |
| + docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT); |
| + assertTrue(equals(docMap1, docMap2)); |
| + } |
| } |