blob: 7111579e8f92096a268803233d530c2655769e75 [file] [log] [blame]
Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (révision 1343822)
+++ lucene/CHANGES.txt (copie de travail)
@@ -932,6 +932,9 @@
performance; add float acceptableOverheadRatio to getWriter and
getMutable API to give packed ints freedom to pick faster
implementations (Adrien Grand via Mike McCandless)
+
+* LUCENE-2357: Reduce transient RAM usage when merging segments in
+ IndexWriter. (Adrien Grand via Mike McCandless)
Bug fixes
Index: lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (révision 1343822)
+++ lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (copie de travail)
@@ -36,7 +36,7 @@
private MultiDocsAndPositionsEnum.EnumWithSlice[] subs;
int numSubs;
int upto;
- int[] currentMap;
+ MergeState.DocMap currentMap;
DocsAndPositionsEnum current;
int currentBase;
int doc = -1;
@@ -94,12 +94,10 @@
int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
- if (currentMap != null) {
- // compact deletions
- doc = currentMap[doc];
- if (doc == -1) {
- continue;
- }
+ // compact deletions
+ doc = currentMap.get(doc);
+ if (doc == -1) {
+ continue;
}
return this.doc = currentBase + doc;
} else {
Index: lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java (révision 1343822)
+++ lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java (copie de travail)
@@ -35,7 +35,7 @@
private MultiDocsEnum.EnumWithSlice[] subs;
int numSubs;
int upto;
- int[] currentMap;
+ MergeState.DocMap currentMap;
DocsEnum current;
int currentBase;
int doc = -1;
@@ -88,18 +88,16 @@
current = subs[upto].docsEnum;
currentBase = mergeState.docBase[reader];
currentMap = mergeState.docMaps[reader];
- assert currentMap == null || currentMap.length == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.length + " vs " + subs[upto].slice.length;
+ assert currentMap.maxDoc() == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.maxDoc() + " vs " + subs[upto].slice.length;
}
}
int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
- if (currentMap != null) {
- // compact deletions
- doc = currentMap[doc];
- if (doc == -1) {
- continue;
- }
+ // compact deletions
+ doc = currentMap.get(doc);
+ if (doc == -1) {
+ continue;
}
return this.doc = currentBase + doc;
} else {
Index: lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java (révision 1343822)
+++ lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java (copie de travail)
@@ -81,7 +81,8 @@
}
}
- public static List<SortedSourceSlice> buildSlices(int[] docBases, int[][] docMaps,
+ public static List<SortedSourceSlice> buildSlices(
+ int[] docBases, MergeState.DocMap[] docMaps,
DocValues[] docValues, MergeContext ctx) throws IOException {
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
for (int i = 0; i < docValues.length; i++) {
@@ -111,15 +112,15 @@
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
* contains the new global ordinals for the relative index.
*/
- private static void createOrdMapping(int[] docBases, int[][] docMaps,
+ private static void createOrdMapping(int[] docBases, MergeState.DocMap[] docMaps,
SortedSourceSlice currentSlice) {
final int readerIdx = currentSlice.readerIdx;
- final int[] currentDocMap = docMaps[readerIdx];
+ final MergeState.DocMap currentDocMap = docMaps[readerIdx];
final int docBase = currentSlice.docToOrdStart;
assert docBase == docBases[readerIdx];
- if (currentDocMap != null) { // we have deletes
- for (int i = 0; i < currentDocMap.length; i++) {
- final int doc = currentDocMap[i];
+ if (currentDocMap != null && currentDocMap.hasDeletions()) { // we have deletes
+ for (int i = 0; i < currentDocMap.maxDoc(); i++) {
+ final int doc = currentDocMap.get(i);
if (doc != -1) { // not deleted
final int ord = currentSlice.source.ord(i); // collect ords strictly
// increasing
Index: lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (révision 1343822)
+++ lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (copie de travail)
@@ -322,7 +322,7 @@
final MergeContext ctx = SortedBytesMergeUtils.init(type, values,
comp, globalNumDocs);
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(
- docBases(), new int[values.length][], values, ctx);
+ docBases(), new MergeState.DocMap[values.length], values, ctx);
RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer(
type);
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer,
Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (révision 1343822)
+++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (copie de travail)
@@ -3483,7 +3483,7 @@
merge.readers.add(reader);
assert delCount <= info.info.getDocCount(): "delCount=" + delCount + " info.docCount=" + info.info.getDocCount() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
if (delCount < info.info.getDocCount()) {
- merger.add(reader, liveDocs);
+ merger.add(reader, liveDocs, delCount);
}
segUpto++;
}
Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (révision 1343822)
+++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (copie de travail)
@@ -80,7 +80,7 @@
new ReaderUtil.Gather(reader) {
@Override
protected void add(int base, AtomicReader r) {
- mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(r, r.getLiveDocs()));
+ mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(r, r.getLiveDocs(), r.numDeletedDocs()));
}
}.run();
} catch (IOException ioe) {
@@ -89,8 +89,8 @@
}
}
- final void add(SegmentReader reader, Bits liveDocs) {
- mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(reader, liveDocs));
+ final void add(SegmentReader reader, Bits liveDocs, int delCount) {
+ mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(reader, liveDocs, delCount));
}
/**
@@ -287,7 +287,7 @@
final int numReaders = mergeState.readers.size();
// Remap docIDs
- mergeState.docMaps = new int[numReaders][];
+ mergeState.docMaps = new MergeState.DocMap[numReaders];
mergeState.docBase = new int[numReaders];
mergeState.readerPayloadProcessor = new PayloadProcessorProvider.ReaderPayloadProcessor[numReaders];
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
@@ -300,30 +300,9 @@
final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i);
mergeState.docBase[i] = docBase;
- final int maxDoc = reader.reader.maxDoc();
- final int docCount;
- final Bits liveDocs = reader.liveDocs;
- final int[] docMap;
- if (liveDocs != null) {
- int delCount = 0;
- docMap = new int[maxDoc];
- int newDocID = 0;
- for(int j=0;j<maxDoc;j++) {
- if (!liveDocs.get(j)) {
- docMap[j] = -1;
- delCount++;
- } else {
- docMap[j] = newDocID++;
- }
- }
- docCount = maxDoc - delCount;
- } else {
- docCount = maxDoc;
- docMap = null;
- }
-
+ final MergeState.DocMap docMap = MergeState.DocMap.build(reader);
mergeState.docMaps[i] = docMap;
- docBase += docCount;
+ docBase += docMap.numDocs();
if (mergeState.payloadProcessorProvider != null) {
mergeState.readerPayloadProcessor[i] = mergeState.payloadProcessorProvider.getReaderProcessor(reader.reader);
Index: lucene/core/src/java/org/apache/lucene/index/MergeState.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/MergeState.java (révision 1343822)
+++ lucene/core/src/java/org/apache/lucene/index/MergeState.java (copie de travail)
@@ -19,11 +19,12 @@
import java.util.List;
+import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
-import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
+import org.apache.lucene.util.packed.PackedInts;
/** Holds common state used during segment merging
*
@@ -33,17 +34,171 @@
public static class IndexReaderAndLiveDocs {
public final AtomicReader reader;
public final Bits liveDocs;
+ public final int numDeletedDocs;
- public IndexReaderAndLiveDocs(AtomicReader reader, Bits liveDocs) {
+ public IndexReaderAndLiveDocs(AtomicReader reader, Bits liveDocs, int numDeletedDocs) {
this.reader = reader;
this.liveDocs = liveDocs;
+ this.numDeletedDocs = numDeletedDocs;
}
}
+ public static abstract class DocMap {
+ private final Bits liveDocs;
+
+ protected DocMap(Bits liveDocs) {
+ this.liveDocs = liveDocs;
+ }
+
+ public static DocMap build(IndexReaderAndLiveDocs reader) {
+ final int maxDoc = reader.reader.maxDoc();
+ final int numDeletes = reader.numDeletedDocs;
+ final int numDocs = maxDoc - numDeletes;
+ assert reader.liveDocs != null || numDeletes == 0;
+ if (numDeletes == 0) {
+ return new NoDelDocMap(maxDoc);
+ } else if (numDeletes < numDocs) {
+ return buildDelCountDocmap(maxDoc, numDeletes, reader.liveDocs, PackedInts.FAST);
+ } else {
+ return buildDirectDocMap(maxDoc, numDocs, reader.liveDocs, PackedInts.FAST);
+ }
+ }
+
+ static DocMap buildDelCountDocmap(int maxDoc, int numDeletes, Bits liveDocs, float acceptableOverheadRatio) {
+ PackedInts.Mutable numDeletesSoFar = PackedInts.getMutable(maxDoc,
+ PackedInts.bitsRequired(numDeletes), acceptableOverheadRatio);
+ int del = 0;
+ for (int i = 0; i < maxDoc; ++i) {
+ if (!liveDocs.get(i)) {
+ ++del;
+ }
+ numDeletesSoFar.set(i, del);
+ }
+ assert del == numDeletes : "del=" + del + ", numdeletes=" + numDeletes;
+ return new DelCountDocMap(liveDocs, numDeletesSoFar);
+ }
+
+ static DocMap buildDirectDocMap(int maxDoc, int numDocs, Bits liveDocs, float acceptableOverheadRatio) {
+ PackedInts.Mutable docIds = PackedInts.getMutable(maxDoc,
+ PackedInts.bitsRequired(Math.max(0, numDocs - 1)), acceptableOverheadRatio);
+ int del = 0;
+ for (int i = 0; i < maxDoc; ++i) {
+ if (liveDocs.get(i)) {
+ docIds.set(i, i - del);
+ } else {
+ ++del;
+ }
+ }
+ assert numDocs + del == maxDoc : "maxDoc=" + maxDoc + ", del=" + del + ", numDocs=" + numDocs;
+ return new DirectDocMap(liveDocs, docIds, del);
+ }
+
+ public int get(int docId) {
+ if (liveDocs == null || liveDocs.get(docId)) {
+ return remap(docId);
+ } else {
+ return -1;
+ }
+ }
+
+ public abstract int remap(int docId);
+
+ public abstract int maxDoc();
+
+ public final int numDocs() {
+ return maxDoc() - numDeletedDocs();
+ }
+
+ public abstract int numDeletedDocs();
+
+ public boolean hasDeletions() {
+ return numDeletedDocs() > 0;
+ }
+
+ }
+
+ private static class NoDelDocMap extends DocMap {
+
+ private final int maxDoc;
+
+ private NoDelDocMap(int maxDoc) {
+ super(null);
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public int remap(int docId) {
+ return docId;
+ }
+
+ @Override
+ public int maxDoc() {
+ return maxDoc;
+ }
+
+ @Override
+ public int numDeletedDocs() {
+ return 0;
+ }
+ }
+
+ private static class DirectDocMap extends DocMap {
+
+ private final PackedInts.Mutable docIds;
+ private final int numDeletedDocs;
+
+ private DirectDocMap(Bits liveDocs, PackedInts.Mutable docIds, int numDeletedDocs) {
+ super(liveDocs);
+ this.docIds = docIds;
+ this.numDeletedDocs = numDeletedDocs;
+ }
+
+ @Override
+ public int remap(int docId) {
+ return (int) docIds.get(docId);
+ }
+
+ @Override
+ public int maxDoc() {
+ return docIds.size();
+ }
+
+ @Override
+ public int numDeletedDocs() {
+ return numDeletedDocs;
+ }
+ }
+
+ private static class DelCountDocMap extends DocMap {
+
+ private final PackedInts.Mutable numDeletesSoFar;
+
+ private DelCountDocMap(Bits liveDocs, PackedInts.Mutable numDeletesSoFar) {
+ super(liveDocs);
+ this.numDeletesSoFar = numDeletesSoFar;
+ }
+
+ @Override
+ public int remap(int docId) {
+ return docId - (int) numDeletesSoFar.get(docId);
+ }
+
+ @Override
+ public int maxDoc() {
+ return numDeletesSoFar.size();
+ }
+
+ @Override
+ public int numDeletedDocs() {
+ final int maxDoc = maxDoc();
+ return (int) numDeletesSoFar.get(maxDoc - 1);
+ }
+ }
+
public SegmentInfo segmentInfo;
public FieldInfos fieldInfos;
public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
- public int[][] docMaps; // Maps docIDs around deletions
+ public DocMap[] docMaps; // Maps docIDs around deletions
public int[] docBase; // New docID base per reader
public CheckAbort checkAbort;
public InfoStream infoStream;
@@ -65,8 +220,8 @@
public static class CheckAbort {
private double workCount;
- private MergePolicy.OneMerge merge;
- private Directory dir;
+ private final MergePolicy.OneMerge merge;
+ private final Directory dir;
public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
this.merge = merge;
this.dir = dir;
Index: lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (révision 1343822)
+++ lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (copie de travail)
@@ -25,9 +25,11 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.packed.PackedInts;
public class TestSegmentMerger extends LuceneTestCase {
@@ -139,4 +141,41 @@
TestSegmentReader.checkNorms(mergedReader);
mergedReader.close();
}
+
+ private static boolean equals(MergeState.DocMap map1, MergeState.DocMap map2) {
+ if (map1.maxDoc() != map2.maxDoc()) {
+ return false;
+ }
+ for (int i = 0; i < map1.maxDoc(); ++i) {
+ if (map1.get(i) != map2.get(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public void testBuildDocMap() {
+ final int maxDoc = 128;
+ final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
+
+ MergeState.DocMap docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
+ MergeState.DocMap docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
+ assertTrue(equals(docMap1, docMap2));
+
+ liveDocs.set(1);
+ for (int i = 7; i < 79; ++i) {
+ liveDocs.set(i);
+ }
+ liveDocs.set(80);
+ liveDocs.set(88);
+ int numDocs = liveDocs.cardinality();
+ docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc - numDocs, liveDocs, PackedInts.COMPACT);
+ docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, numDocs, liveDocs, PackedInts.COMPACT);
+ assertTrue(equals(docMap1, docMap2));
+
+ liveDocs.set(0, maxDoc);
+ docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
+ docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
+ assertTrue(equals(docMap1, docMap2));
+ }
}