blob: 25119096f540dca47a9e51716e74dbd09a9cd365 [file] [log] [blame]
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java (revision 1603658)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java (working copy)
@@ -25,10 +25,10 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MathUtil;
-import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
-import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
@@ -114,8 +114,12 @@
dataInput = dataPagedBytes.getDataInput();
indexToDataOffset = indexToTerms.getMutable();
- ramBytesUsed = fields.length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.shallowSizeOfInstance(Term.class))
- + dataPagedBytes.ramBytesUsed() + indexToDataOffset.ramBytesUsed();
+ long ramBytesUsed = RamUsageEstimator.shallowSizeOf(fields);
+ ramBytesUsed += RamUsageEstimator.shallowSizeOf(dataInput);
+ ramBytesUsed += fields.length * RamUsageEstimator.shallowSizeOfInstance(Term.class);
+ ramBytesUsed += dataPagedBytes.ramBytesUsed();
+ ramBytesUsed += indexToDataOffset.ramBytesUsed();
+ this.ramBytesUsed = ramBytesUsed;
}
private static int estimatePageBits(long estSize) {
Index: lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/PagedBytes.java (revision 1603658)
+++ lucene/core/src/java/org/apache/lucene/util/PagedBytes.java (working copy)
@@ -236,8 +236,11 @@
@Override
public long ramBytesUsed() {
- long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks)
- + bytesUsedPerBlock * numBlocks;
+ long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);;
+ if (numBlocks > 0) {
+ size += (numBlocks - 1) * bytesUsedPerBlock;
+ size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]);
+ }
if (currentBlock != null) {
size += RamUsageEstimator.sizeOf(currentBlock);
}
Index: lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (revision 1603658)
+++ lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (working copy)
@@ -29,7 +29,6 @@
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.junit.Test;
Index: lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xPostingsFormat.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xPostingsFormat.java (revision 1603658)
+++ lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xPostingsFormat.java (working copy)
@@ -1,12 +1,5 @@
package org.apache.lucene.codecs.lucene3x;
-import java.io.IOException;
-
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.index.BasePostingsFormatTestCase;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.BeforeClass;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,6 +17,11 @@
* limitations under the License.
*/
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.BeforeClass;
+
/**
* Tests Lucene3x postings format
*/
@@ -40,10 +38,4 @@
protected Codec getCodec() {
return codec;
}
-
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-5773")
- @Override
- public void testRamBytesUsed() throws IOException {
- super.testRamBytesUsed();
- }
}
Index: lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java (revision 1603658)
+++ lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java (working copy)
@@ -27,7 +27,6 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.util.PagedBytes.Reader;
import org.junit.Ignore;
public class TestPagedBytes extends LuceneTestCase {
@@ -196,6 +195,7 @@
}
assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed());
final PagedBytes.Reader reader = b.freeze(random().nextBoolean());
+ assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed());
assertEquals(RamUsageTester.sizeOf(reader), reader.ramBytesUsed());
}
Index: lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java (revision 1603658)
+++ lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java (working copy)
@@ -25,22 +25,23 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.sep.IntIndexInput;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.DoubleBarrelLRUCache;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.RamUsageTester;
-import org.apache.lucene.util.packed.PackedInts;
/**
* Common tests to all index formats.
@@ -50,9 +51,6 @@
// metadata or Directory-level objects
private static final Set<Class<?>> EXCLUDED_CLASSES = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
- // Notorious singletons
- private static final Set<Object> EXCLUDED_OBJECTS = Collections.newSetFromMap(new IdentityHashMap<Object,Boolean>());
-
static {
// Directory objects, don't take into account eg. the NIO buffers
EXCLUDED_CLASSES.add(Directory.class);
@@ -61,6 +59,7 @@
// used for thread management, not by the index
EXCLUDED_CLASSES.add(CloseableThreadLocal.class);
+ EXCLUDED_CLASSES.add(ThreadLocal.class);
// don't follow references to the top-level reader
EXCLUDED_CLASSES.add(IndexReader.class);
@@ -73,18 +72,12 @@
EXCLUDED_CLASSES.add(SegmentCommitInfo.class);
EXCLUDED_CLASSES.add(FieldInfo.class);
- // singletons
- EXCLUDED_CLASSES.add(Codec.class);
- EXCLUDED_CLASSES.add(Enum.class);
- EXCLUDED_OBJECTS.add(BytesRef.getUTF8SortedAsUnicodeComparator());
- EXCLUDED_OBJECTS.add(BytesRef.getUTF8SortedAsUTF16Comparator());
- for (PackedInts.Format format : PackedInts.Format.values()) {
- for (int i = 1; i <= 64; ++i) {
- if (format.isSupported(i)) {
- EXCLUDED_OBJECTS.add(PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, i));
- }
- }
- }
+ // used by lucene3x to maintain a cache. Doesn't depend on the number of docs
+ EXCLUDED_CLASSES.add(DoubleBarrelLRUCache.class);
+
+ // constant overhead is typically due to strings
+ // TODO: can we remove this and still pass the test consistently
+ EXCLUDED_CLASSES.add(String.class);
}
static class Accumulator extends RamUsageTester.Accumulator {
@@ -96,9 +89,6 @@
}
public long accumulateObject(Object o, long shallowSize, java.util.Map<Field, Object> fieldValues, java.util.Collection<Object> queue) {
- if (EXCLUDED_OBJECTS.contains(o)) {
- return 0L;
- }
for (Class<?> clazz = o.getClass(); clazz != null; clazz = clazz.getSuperclass()) {
if (EXCLUDED_CLASSES.contains(clazz) && o != root) {
return 0;
@@ -123,6 +113,14 @@
return v;
}
+ @Override
+ public long accumulateArray(Object array, long shallowSize,
+ List<Object> values, Collection<Object> queue) {
+ long v = super.accumulateArray(array, shallowSize, values, queue);
+ // System.out.println(array.getClass() + "=" + v);
+ return v;
+ }
+
};
/** Returns the codec to run tests against */
@@ -209,35 +207,48 @@
/** Test the accuracy of the ramBytesUsed estimations. */
public void testRamBytesUsed() throws IOException {
+ if (Codec.getDefault() instanceof RandomCodec) {
+ // this test relies on the fact that two segments will be written with
+ // the same codec so we need to disable MockRandomPF
+ final Set<String> avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs);
+ avoidCodecs.add(new MockRandomPostingsFormat().getName());
+ Codec.setDefault(new RandomCodec(random(), avoidCodecs));
+ }
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, cfg);
// we need to index enough documents so that constant overhead doesn't dominate
final int numDocs = atLeast(10000);
+ AtomicReader reader1 = null;
for (int i = 0; i < numDocs; ++i) {
Document d = new Document();
addRandomFields(d);
w.addDocument(d);
+ if (i == 100) {
+ w.forceMerge(1);
+ w.commit();
+ reader1 = getOnlySegmentReader(DirectoryReader.open(dir));
+ }
}
w.forceMerge(1);
w.commit();
w.close();
- IndexReader reader = DirectoryReader.open(dir);
+ AtomicReader reader2 = getOnlySegmentReader(DirectoryReader.open(dir));
- for (AtomicReaderContext context : reader.leaves()) {
- final AtomicReader r = context.reader();
- // beware of lazy-loaded stuff
- new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(r);
- final long actualBytes = RamUsageTester.sizeOf(r, new Accumulator(r));
- final long expectedBytes = ((SegmentReader) r).ramBytesUsed();
- final long absoluteError = actualBytes - expectedBytes;
- final double relativeError = (double) absoluteError / actualBytes;
- final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error";
- assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
+ for (AtomicReader reader : Arrays.asList(reader1, reader2)) {
+ new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader);
}
- reader.close();
+ final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1));
+ final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed();
+ final long absoluteError = actualBytes - expectedBytes;
+ final double relativeError = (double) absoluteError / actualBytes;
+ final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error";
+ assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
+
+ reader1.close();
+ reader2.close();
dir.close();
}
Index: lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (revision 1603658)
+++ lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (working copy)
@@ -76,6 +76,8 @@
/** unique set of docvalues format names this codec knows about */
public Set<String> dvFormatNames = new HashSet<>();
+ public final Set<String> avoidCodecs;
+
/** memorized field->postingsformat mappings */
// note: we have to sync this map even though its just for debugging/toString,
// otherwise DWPT's .toString() calls that iterate over the map can
@@ -118,6 +120,7 @@
public RandomCodec(Random random, Set<String> avoidCodecs) {
this.perFieldSeed = random.nextInt();
+ this.avoidCodecs = avoidCodecs;
// TODO: make it possible to specify min/max iterms per
// block via CL:
int minItemsPerBlock = TestUtil.nextInt(random, 2, 100);