| Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java (revision 1603658) |
| +++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java (working copy) |
| @@ -25,10 +25,10 @@ |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.MathUtil; |
| -import org.apache.lucene.util.RamUsageEstimator; |
| +import org.apache.lucene.util.PagedBytes; |
| import org.apache.lucene.util.PagedBytes.PagedBytesDataInput; |
| import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput; |
| -import org.apache.lucene.util.PagedBytes; |
| +import org.apache.lucene.util.RamUsageEstimator; |
| import org.apache.lucene.util.packed.GrowableWriter; |
| import org.apache.lucene.util.packed.PackedInts; |
| |
| @@ -114,8 +114,12 @@ |
| dataInput = dataPagedBytes.getDataInput(); |
| indexToDataOffset = indexToTerms.getMutable(); |
| |
| - ramBytesUsed = fields.length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.shallowSizeOfInstance(Term.class)) |
| - + dataPagedBytes.ramBytesUsed() + indexToDataOffset.ramBytesUsed(); |
| + long ramBytesUsed = RamUsageEstimator.shallowSizeOf(fields); |
| + ramBytesUsed += RamUsageEstimator.shallowSizeOf(dataInput); |
| + ramBytesUsed += fields.length * RamUsageEstimator.shallowSizeOfInstance(Term.class); |
| + ramBytesUsed += dataPagedBytes.ramBytesUsed(); |
| + ramBytesUsed += indexToDataOffset.ramBytesUsed(); |
| + this.ramBytesUsed = ramBytesUsed; |
| } |
| |
| private static int estimatePageBits(long estSize) { |
| Index: lucene/core/src/java/org/apache/lucene/util/PagedBytes.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/util/PagedBytes.java (revision 1603658) |
| +++ lucene/core/src/java/org/apache/lucene/util/PagedBytes.java (working copy) |
| @@ -236,8 +236,11 @@ |
| |
| @Override |
| public long ramBytesUsed() { |
| - long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks) |
| - + bytesUsedPerBlock * numBlocks; |
| + long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);; |
| + if (numBlocks > 0) { |
| + size += (numBlocks - 1) * bytesUsedPerBlock; |
| + size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]); |
| + } |
| if (currentBlock != null) { |
| size += RamUsageEstimator.sizeOf(currentBlock); |
| } |
| Index: lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (revision 1603658) |
| +++ lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (working copy) |
| @@ -29,7 +29,6 @@ |
| import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.store.Directory; |
| import org.junit.Test; |
| |
| Index: lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xPostingsFormat.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xPostingsFormat.java (revision 1603658) |
| +++ lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xPostingsFormat.java (working copy) |
| @@ -1,12 +1,5 @@ |
| package org.apache.lucene.codecs.lucene3x; |
| |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.codecs.Codec; |
| -import org.apache.lucene.index.BasePostingsFormatTestCase; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.junit.BeforeClass; |
| - |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| @@ -24,6 +17,11 @@ |
| * limitations under the License. |
| */ |
| |
| +import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.index.BasePostingsFormatTestCase; |
| +import org.apache.lucene.util.LuceneTestCase; |
| +import org.junit.BeforeClass; |
| + |
| /** |
| * Tests Lucene3x postings format |
| */ |
| @@ -40,10 +38,4 @@ |
| protected Codec getCodec() { |
| return codec; |
| } |
| - |
| - @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-5773") |
| - @Override |
| - public void testRamBytesUsed() throws IOException { |
| - super.testRamBytesUsed(); |
| - } |
| } |
| Index: lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java (revision 1603658) |
| +++ lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java (working copy) |
| @@ -27,7 +27,6 @@ |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.store.MockDirectoryWrapper; |
| -import org.apache.lucene.util.PagedBytes.Reader; |
| import org.junit.Ignore; |
| |
| public class TestPagedBytes extends LuceneTestCase { |
| @@ -196,6 +195,7 @@ |
| } |
| assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed()); |
| final PagedBytes.Reader reader = b.freeze(random().nextBoolean()); |
| + assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed()); |
| assertEquals(RamUsageTester.sizeOf(reader), reader.ramBytesUsed()); |
| } |
| |
| Index: lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java (revision 1603658) |
| +++ lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java (working copy) |
| @@ -25,22 +25,23 @@ |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.IdentityHashMap; |
| +import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; |
| import org.apache.lucene.codecs.sep.IntIndexInput; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IndexInput; |
| -import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.CloseableThreadLocal; |
| +import org.apache.lucene.util.DoubleBarrelLRUCache; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.RamUsageEstimator; |
| import org.apache.lucene.util.RamUsageTester; |
| -import org.apache.lucene.util.packed.PackedInts; |
| |
| /** |
| * Common tests to all index formats. |
| @@ -50,9 +51,6 @@ |
| // metadata or Directory-level objects |
| private static final Set<Class<?>> EXCLUDED_CLASSES = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>()); |
| |
| - // Notorious singletons |
| - private static final Set<Object> EXCLUDED_OBJECTS = Collections.newSetFromMap(new IdentityHashMap<Object,Boolean>()); |
| - |
| static { |
| // Directory objects, don't take into account eg. the NIO buffers |
| EXCLUDED_CLASSES.add(Directory.class); |
| @@ -61,6 +59,7 @@ |
| |
| // used for thread management, not by the index |
| EXCLUDED_CLASSES.add(CloseableThreadLocal.class); |
| + EXCLUDED_CLASSES.add(ThreadLocal.class); |
| |
| // don't follow references to the top-level reader |
| EXCLUDED_CLASSES.add(IndexReader.class); |
| @@ -73,18 +72,12 @@ |
| EXCLUDED_CLASSES.add(SegmentCommitInfo.class); |
| EXCLUDED_CLASSES.add(FieldInfo.class); |
| |
| - // singletons |
| - EXCLUDED_CLASSES.add(Codec.class); |
| - EXCLUDED_CLASSES.add(Enum.class); |
| - EXCLUDED_OBJECTS.add(BytesRef.getUTF8SortedAsUnicodeComparator()); |
| - EXCLUDED_OBJECTS.add(BytesRef.getUTF8SortedAsUTF16Comparator()); |
| - for (PackedInts.Format format : PackedInts.Format.values()) { |
| - for (int i = 1; i <= 64; ++i) { |
| - if (format.isSupported(i)) { |
| - EXCLUDED_OBJECTS.add(PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, i)); |
| - } |
| - } |
| - } |
| + // used by lucene3x to maintain a cache. Doesn't depend on the number of docs |
| + EXCLUDED_CLASSES.add(DoubleBarrelLRUCache.class); |
| + |
| + // constant overhead is typically due to strings |
| + // TODO: can we remove this and still pass the test consistently |
| + EXCLUDED_CLASSES.add(String.class); |
| } |
| |
| static class Accumulator extends RamUsageTester.Accumulator { |
| @@ -96,9 +89,6 @@ |
| } |
| |
| public long accumulateObject(Object o, long shallowSize, java.util.Map<Field, Object> fieldValues, java.util.Collection<Object> queue) { |
| - if (EXCLUDED_OBJECTS.contains(o)) { |
| - return 0L; |
| - } |
| for (Class<?> clazz = o.getClass(); clazz != null; clazz = clazz.getSuperclass()) { |
| if (EXCLUDED_CLASSES.contains(clazz) && o != root) { |
| return 0; |
| @@ -123,6 +113,14 @@ |
| return v; |
| } |
| |
| + @Override |
| + public long accumulateArray(Object array, long shallowSize, |
| + List<Object> values, Collection<Object> queue) { |
| + long v = super.accumulateArray(array, shallowSize, values, queue); |
| + // System.out.println(array.getClass() + "=" + v); |
| + return v; |
| + } |
| + |
| }; |
| |
| /** Returns the codec to run tests against */ |
| @@ -209,35 +207,48 @@ |
| |
| /** Test the accuracy of the ramBytesUsed estimations. */ |
| public void testRamBytesUsed() throws IOException { |
| + if (Codec.getDefault() instanceof RandomCodec) { |
| + // this test relies on the fact that two segments will be written with |
| + // the same codec so we need to disable MockRandomPF |
| + final Set<String> avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs); |
| + avoidCodecs.add(new MockRandomPostingsFormat().getName()); |
| + Codec.setDefault(new RandomCodec(random(), avoidCodecs)); |
| + } |
| Directory dir = newDirectory(); |
| IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| IndexWriter w = new IndexWriter(dir, cfg); |
| // we need to index enough documents so that constant overhead doesn't dominate |
| final int numDocs = atLeast(10000); |
| + AtomicReader reader1 = null; |
| for (int i = 0; i < numDocs; ++i) { |
| Document d = new Document(); |
| addRandomFields(d); |
| w.addDocument(d); |
| + if (i == 100) { |
| + w.forceMerge(1); |
| + w.commit(); |
| + reader1 = getOnlySegmentReader(DirectoryReader.open(dir)); |
| + } |
| } |
| w.forceMerge(1); |
| w.commit(); |
| w.close(); |
| |
| - IndexReader reader = DirectoryReader.open(dir); |
| + AtomicReader reader2 = getOnlySegmentReader(DirectoryReader.open(dir)); |
| |
| - for (AtomicReaderContext context : reader.leaves()) { |
| - final AtomicReader r = context.reader(); |
| - // beware of lazy-loaded stuff |
| - new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(r); |
| - final long actualBytes = RamUsageTester.sizeOf(r, new Accumulator(r)); |
| - final long expectedBytes = ((SegmentReader) r).ramBytesUsed(); |
| - final long absoluteError = actualBytes - expectedBytes; |
| - final double relativeError = (double) absoluteError / actualBytes; |
| - final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error"; |
| - assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000); |
| + for (AtomicReader reader : Arrays.asList(reader1, reader2)) { |
| + new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader); |
| } |
| |
| - reader.close(); |
| + final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1)); |
| + final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed(); |
| + final long absoluteError = actualBytes - expectedBytes; |
| + final double relativeError = (double) absoluteError / actualBytes; |
| + final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error"; |
| + assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000); |
| + |
| + reader1.close(); |
| + reader2.close(); |
| dir.close(); |
| } |
| |
| Index: lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (revision 1603658) |
| +++ lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (working copy) |
| @@ -76,6 +76,8 @@ |
| /** unique set of docvalues format names this codec knows about */ |
| public Set<String> dvFormatNames = new HashSet<>(); |
| |
| + public final Set<String> avoidCodecs; |
| + |
| /** memorized field->postingsformat mappings */ |
| // note: we have to sync this map even though its just for debugging/toString, |
| // otherwise DWPT's .toString() calls that iterate over the map can |
| @@ -118,6 +120,7 @@ |
| |
| public RandomCodec(Random random, Set<String> avoidCodecs) { |
| this.perFieldSeed = random.nextInt(); |
| + this.avoidCodecs = avoidCodecs; |
| // TODO: make it possible to specify min/max iterms per |
| // block via CL: |
| int minItemsPerBlock = TestUtil.nextInt(random, 2, 100); |