LUCENE-9816: lazy-init LZ4-HC hashtable in BlockTreeTermsWriter
LZ4-HC hashtable is heavy (128kb int[] + 128kb short[]) and must be
filled with special values on initialization. This is a lot of overhead
for fields that might not use the compression at all.
Don't initialize this for a field until we see hints that the data might
be compressible and need to use the table in order to test it out.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
index 11a02e3..3908992 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
@@ -917,6 +917,9 @@
// it out if the
// average suffix length is greater than 6.
if (suffixWriter.length() > 6L * numEntries) {
+ if (compressionHashTable == null) {
+ compressionHashTable = new LZ4.HighCompressionHashTable();
+ }
LZ4.compress(
suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable);
if (spareWriter.size() < suffixWriter.length() - (suffixWriter.length() >>> 2)) {
@@ -1139,8 +1142,7 @@
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput spareWriter = ByteBuffersDataOutput.newResettableInstance();
private byte[] spareBytes = BytesRef.EMPTY_BYTES;
- private final LZ4.HighCompressionHashTable compressionHashTable =
- new LZ4.HighCompressionHashTable();
+ private LZ4.HighCompressionHashTable compressionHashTable;
}
private boolean closed;