blob: a553d6d3d2ef8393f567e6726f95f013775b3fd8 [file] [log] [blame]
From 83b8bb91cde00e1e4abd39bba4bdd62a8fa00966 Mon Sep 17 00:00:00 2001
From: Boaz Leskes <b.leskes@gmail.com>
Date: Thu, 25 Jul 2013 15:48:57 +0200
Subject: [PATCH 1/3] Introduced bulk retrieval to AbstractAppendingLongBuffer
classes, for faster retrieval. Introduced a new variant,
AppendingPackedLongBuffer which solely relies on PackedInts as a backend.
This new class is useful where people have non-negative numbers with a
uniform distribution over a fixed (limited) range. Ex. facets ordinals. To
distinguish it from AppendingPackedLongBuffer, delta based
AppendingLongBuffer was renamed to AppendingDeltaPackedLongBuffer Fixed an
Issue with NullReader where it didn't respect it's valueCount in bulk gets.
diff --git lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index e9cc2fa..643408f 100644
--- lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -26,7 +26,8 @@ import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
@@ -36,14 +37,14 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
class BinaryDocValuesWriter extends DocValuesWriter {
private final ByteBlockPool pool;
- private final AppendingLongBuffer lengths;
+ private final AppendingDeltaPackedLongBuffer lengths;
private final FieldInfo fieldInfo;
private int addedValues = 0;
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
- this.lengths = new AppendingLongBuffer();
+ this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
}
public void addValue(int docID, BytesRef value) {
@@ -90,7 +91,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
// iterates over the values we have in ram
private class BytesIterator implements Iterator<BytesRef> {
final BytesRef value = new BytesRef();
- final AppendingLongBuffer.Iterator lengthsIterator = lengths.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
final int size = (int) lengths.size();
final int maxDoc;
int upto;
diff --git lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
index f4aa8cd..8ca9ccf 100644
--- lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@@ -23,8 +23,9 @@ import java.util.List;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
/**
* A wrapper for CompositeIndexReader providing access to DocValues.
@@ -277,7 +278,7 @@ public class MultiDocValues {
// globalOrd -> (globalOrd - segmentOrd)
final MonotonicAppendingLongBuffer globalOrdDeltas;
// globalOrd -> sub index
- final AppendingLongBuffer subIndexes;
+ final AppendingDeltaPackedLongBuffer subIndexes;
// segmentOrd -> (globalOrd - segmentOrd)
final MonotonicAppendingLongBuffer ordDeltas[];
@@ -293,8 +294,8 @@ public class MultiDocValues {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
- globalOrdDeltas = new MonotonicAppendingLongBuffer();
- subIndexes = new AppendingLongBuffer();
+ globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
+ subIndexes = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = new MonotonicAppendingLongBuffer();
diff --git lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index a77e8ea..cc07083 100644
--- lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -23,7 +23,8 @@ import java.util.NoSuchElementException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
/** Buffers up pending long per doc, then flushes when
* segment flushes. */
@@ -31,13 +32,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
private final static long MISSING = 0L;
- private AppendingLongBuffer pending;
+ private AppendingDeltaPackedLongBuffer pending;
private final Counter iwBytesUsed;
private long bytesUsed;
private final FieldInfo fieldInfo;
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
- pending = new AppendingLongBuffer();
+ pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed();
this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed;
@@ -89,7 +90,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
// iterates over the values we have in ram
private class NumericIterator implements Iterator<Number> {
- final AppendingLongBuffer.Iterator iter = pending.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
final int size = (int)pending.size();
final int maxDoc;
int upto;
diff --git lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
index 2d038e3..5576172 100644
--- lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
+++ lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
@@ -30,13 +30,14 @@ import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
/** Buffers up pending byte[] per doc, deref and sorting via
* int ord, then flushes when segment flushes. */
class SortedDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
- private AppendingLongBuffer pending;
+ private AppendingDeltaPackedLongBuffer pending;
private final Counter iwBytesUsed;
private long bytesUsed; // this currently only tracks differences in 'pending'
private final FieldInfo fieldInfo;
@@ -51,7 +52,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
- pending = new AppendingLongBuffer();
+ pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@@ -176,7 +177,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
// iterates over the ords for each doc we have in ram
private class OrdsIterator implements Iterator<Number> {
- final AppendingLongBuffer.Iterator iter = pending.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
final int ordMap[];
final int maxDoc;
int docUpto;
diff --git lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
index 43a5ae7..db48b48 100644
--- lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@@ -32,14 +32,15 @@ import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
/** Buffers up pending byte[]s per doc, deref and sorting via
* int ord, then flushes when segment flushes. */
class SortedSetDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
- private AppendingLongBuffer pending; // stream of all termIDs
- private AppendingLongBuffer pendingCounts; // termIDs per doc
+ private AppendingDeltaPackedLongBuffer pending; // stream of all termIDs
+ private AppendingDeltaPackedLongBuffer pendingCounts; // termIDs per doc
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
@@ -56,8 +57,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
- pending = new AppendingLongBuffer();
- pendingCounts = new AppendingLongBuffer();
+ pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+ pendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@@ -224,8 +225,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
// iterates over the ords for each doc we have in ram
private class OrdsIterator implements Iterator<Number> {
- final AppendingLongBuffer.Iterator iter = pending.iterator();
- final AppendingLongBuffer.Iterator counts = pendingCounts.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator counts = pendingCounts.iterator();
final int ordMap[];
final long numOrds;
long ordUpto;
@@ -273,7 +274,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
}
private class OrdCountIterator implements Iterator<Number> {
- final AppendingLongBuffer.Iterator iter = pendingCounts.iterator();
+ final AppendingDeltaPackedLongBuffer.Iterator iter = pendingCounts.iterator();
final int maxDoc;
int docUpto;
diff --git lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java
index be6baf1..c28a918 100644
--- lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java
+++ lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java
@@ -27,6 +27,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
/**
* {@link DocIdSet} implementation based on word-aligned hybrid encoding on
@@ -330,9 +331,9 @@ public final class WAH8DocIdSet extends DocIdSet {
} else {
final int pageSize = 128;
final int initialPageCount = (valueCount + pageSize - 1) / pageSize;
- final MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize);
- final MonotonicAppendingLongBuffer wordNums = new MonotonicAppendingLongBuffer(initialPageCount, pageSize);
-
+ final MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
+ final MonotonicAppendingLongBuffer wordNums = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
+
positions.add(0L);
wordNums.add(0L);
final Iterator it = new Iterator(data, cardinality, Integer.MAX_VALUE, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
diff --git lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
index 4fab936..c0da058 100644
--- lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
+++ lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
@@ -17,14 +17,14 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
-import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
import java.util.Arrays;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
-/** Common functionality shared by {@link AppendingLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
+/** Common functionality shared by {@link AppendingDeltaPackedLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
abstract class AbstractAppendingLongBuffer {
static final int MIN_PAGE_SIZE = 64;
@@ -33,21 +33,21 @@ abstract class AbstractAppendingLongBuffer {
static final int MAX_PAGE_SIZE = 1 << 20;
final int pageShift, pageMask;
- long[] minValues;
- PackedInts.Reader[] deltas;
- private long deltasBytes;
+ PackedInts.Reader[] values;
+ private long valuesBytes;
int valuesOff;
long[] pending;
int pendingOff;
+ float acceptableOverheadRatio;
- AbstractAppendingLongBuffer(int initialBlockCount, int pageSize) {
- minValues = new long[initialBlockCount];
- deltas = new PackedInts.Reader[initialBlockCount];
+ AbstractAppendingLongBuffer(int initialBlockCount, int pageSize, float acceptableOverheadRatio) {
+ values = new PackedInts.Reader[initialBlockCount];
pending = new long[pageSize];
pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
pageMask = pageSize - 1;
valuesOff = 0;
pendingOff = 0;
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
}
final int pageSize() {
@@ -58,7 +58,7 @@ abstract class AbstractAppendingLongBuffer {
public final long size() {
long size = pendingOff;
if (valuesOff > 0) {
- size += deltas[valuesOff - 1].size();
+ size += values[valuesOff - 1].size();
}
if (valuesOff > 1) {
size += (long) (valuesOff - 1) * pageSize();
@@ -73,12 +73,12 @@ abstract class AbstractAppendingLongBuffer {
}
if (pendingOff == pending.length) {
// check size
- if (deltas.length == valuesOff) {
+ if (values.length == valuesOff) {
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
grow(newLength);
}
packPendingValues();
- deltasBytes += deltas[valuesOff].ramBytesUsed();
+ valuesBytes += values[valuesOff].ramBytesUsed();
++valuesOff;
// reset pending buffer
pendingOff = 0;
@@ -87,8 +87,7 @@ abstract class AbstractAppendingLongBuffer {
}
void grow(int newBlockCount) {
- minValues = Arrays.copyOf(minValues, newBlockCount);
- deltas = Arrays.copyOf(deltas, newBlockCount);
+ values = Arrays.copyOf(values, newBlockCount);
}
abstract void packPendingValues();
@@ -101,11 +100,33 @@ abstract class AbstractAppendingLongBuffer {
return get(block, element);
}
+ /**
+ * Bulk get: read at least one and at most <code>len</code> longs starting
+ * from <code>index</code> into <code>arr[off:off+len]</code> and return
+ * the actual number of values that have been read.
+ */
+ public final int get(long index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < size();
+ assert off + len <= arr.length;
+
+ int block = (int) (index >> pageShift);
+ int element = (int) (index & pageMask);
+ return get(block, element, arr, off, len);
+ }
+
+
abstract long get(int block, int element);
- abstract Iterator iterator();
+ abstract int get(int block, int element, long[] arr, int off, int len);
+
+
+ /** Return an iterator over the values of this buffer. */
+ public Iterator iterator() {
+ return new Iterator();
+ }
- abstract class Iterator {
+ final public class Iterator {
long[] currentValues;
int vOff, pOff;
@@ -117,12 +138,22 @@ abstract class AbstractAppendingLongBuffer {
currentValues = pending;
currentCount = pendingOff;
} else {
- currentValues = new long[deltas[0].size()];
+ currentValues = new long[values[0].size()];
fillValues();
}
}
- abstract void fillValues();
+ void fillValues() {
+ if (vOff == valuesOff) {
+ currentValues = pending;
+ currentCount = pendingOff;
+ } else {
+ currentCount = values[vOff].size();
+ for (int k = 0; k < currentCount; ) {
+ k += get(vOff, k, currentValues, k, currentCount - k);
+ }
+ }
+ }
/** Whether or not there are remaining values. */
public final boolean hasNext() {
@@ -149,33 +180,31 @@ abstract class AbstractAppendingLongBuffer {
long baseRamBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
- + 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 3 arrays
+ + 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 2 arrays
+ 2 * RamUsageEstimator.NUM_BYTES_INT // the 2 offsets
+ 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
- + RamUsageEstimator.NUM_BYTES_LONG; // deltasBytes
+ + RamUsageEstimator.NUM_BYTES_FLOAT // acceptable overhead
+ + RamUsageEstimator.NUM_BYTES_LONG; // valuesBytes
}
- /**
- * Return the number of bytes used by this instance.
- */
+ /** Return the number of bytes used by this instance. */
public long ramBytesUsed() {
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
- + RamUsageEstimator.sizeOf(minValues)
- + RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * deltas.length); // values
+ + RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
- return bytesUsed + deltasBytes;
+ return bytesUsed + valuesBytes;
}
/** Pack all pending values in this buffer. Subsequent calls to {@link #add(long)} will fail. */
public void freeze() {
if (pendingOff > 0) {
- if (deltas.length == valuesOff) {
+ if (values.length == valuesOff) {
grow(valuesOff + 1); // don't oversize!
}
packPendingValues();
- deltasBytes += deltas[valuesOff].ramBytesUsed();
+ valuesBytes += values[valuesOff].ramBytesUsed();
++valuesOff;
pendingOff = 0;
}
diff --git lucene/core/src/java/org/apache/lucene/util/packed/AppendingDeltaPackedLongBuffer.java lucene/core/src/java/org/apache/lucene/util/packed/AppendingDeltaPackedLongBuffer.java
new file mode 100644
index 0000000..f5ea192
--- /dev/null
+++ lucene/core/src/java/org/apache/lucene/util/packed/AppendingDeltaPackedLongBuffer.java
@@ -0,0 +1,136 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.util.Arrays;
+
+/**
+ * Utility class to buffer a list of signed longs in memory. This class only
+ * supports appending and is optimized for the case where values are close to
+ * each other.
+ *
+ * @lucene.internal
+ */
+public final class AppendingDeltaPackedLongBuffer extends AbstractAppendingLongBuffer {
+
+ long[] minValues;
+
+
+ /** Create {@link AppendingDeltaPackedLongBuffer}
+ * @param initialPageCount the initial number of pages
+ * @param pageSize the size of a single page
+ * @param acceptableOverheadRatio an acceptable overhead ratio per value
+ */
+ public AppendingDeltaPackedLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
+ super(initialPageCount, pageSize, acceptableOverheadRatio);
+ minValues = new long[values.length];
+ }
+
+ /**
+ * Create an {@link AppendingDeltaPackedLongBuffer} with initialPageCount=16,
+ * pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
+ */
+ public AppendingDeltaPackedLongBuffer() {
+ this(16, 1024, PackedInts.DEFAULT);
+ }
+
+ /**
+ * Create an {@link AppendingDeltaPackedLongBuffer} with initialPageCount=16,
+ * pageSize=1024
+ */
+ public AppendingDeltaPackedLongBuffer(float acceptableOverheadRatio) {
+ this(16, 1024, acceptableOverheadRatio);
+ }
+
+ @Override
+ long get(int block, int element) {
+ if (block == valuesOff) {
+ return pending[element];
+ } else if (values[block] == null) {
+ return minValues[block];
+ } else {
+ return minValues[block] + values[block].get(element);
+ }
+ }
+
+ @Override
+ int get(int block, int element, long[] arr, int off, int len) {
+ if (block == valuesOff) {
+ int sysCopyToRead = Math.min(len, pendingOff - element);
+ System.arraycopy(pending, element, arr, off, sysCopyToRead);
+ return sysCopyToRead;
+ } else {
+ /* packed block */
+ int read = values[block].get(element, arr, off, len);
+ long d = minValues[block];
+ for (int r = 0; r < read; r++, off++) {
+ arr[off] += d;
+ }
+ return read;
+ }
+ }
+
+ @Override
+ void packPendingValues() {
+ // compute max delta
+ long minValue = pending[0];
+ long maxValue = pending[0];
+ for (int i = 1; i < pendingOff; ++i) {
+ minValue = Math.min(minValue, pending[i]);
+ maxValue = Math.max(maxValue, pending[i]);
+ }
+ final long delta = maxValue - minValue;
+
+ minValues[valuesOff] = minValue;
+ if (delta == 0) {
+ values[valuesOff] = new PackedInts.NullReader(pendingOff);
+ } else {
+ // build a new packed reader
+ final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
+ for (int i = 0; i < pendingOff; ++i) {
+ pending[i] -= minValue;
+ }
+ final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
+ for (int i = 0; i < pendingOff; ) {
+ i += mutable.set(i, pending, i, pendingOff - i);
+ }
+ values[valuesOff] = mutable;
+ }
+ }
+
+ @Override
+ void grow(int newBlockCount) {
+ super.grow(newBlockCount);
+ this.minValues = Arrays.copyOf(minValues, newBlockCount);
+ }
+
+ @Override
+ long baseRamBytesUsed() {
+ return super.baseRamBytesUsed()
+ + RamUsageEstimator.NUM_BYTES_OBJECT_REF; // additional array
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return super.ramBytesUsed() + RamUsageEstimator.sizeOf(minValues);
+ }
+
+}
diff --git lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java
deleted file mode 100644
index 86784ab..0000000
--- lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java
+++ /dev/null
@@ -1,111 +0,0 @@
-package org.apache.lucene.util.packed;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/**
- * Utility class to buffer a list of signed longs in memory. This class only
- * supports appending and is optimized for the case where values are close to
- * each other.
- * @lucene.internal
- */
-public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
-
- /** @param initialPageCount the initial number of pages
- * @param pageSize the size of a single page */
- public AppendingLongBuffer(int initialPageCount, int pageSize) {
- super(initialPageCount, pageSize);
- }
-
- /** Create an {@link AppendingLongBuffer} with initialPageCount=16 and
- * pageSize=1024. */
- public AppendingLongBuffer() {
- this(16, 1024);
- }
-
- @Override
- long get(int block, int element) {
- if (block == valuesOff) {
- return pending[element];
- } else if (deltas[block] == null) {
- return minValues[block];
- } else {
- return minValues[block] + deltas[block].get(element);
- }
- }
-
- @Override
- void packPendingValues() {
- // compute max delta
- long minValue = pending[0];
- long maxValue = pending[0];
- for (int i = 1; i < pendingOff; ++i) {
- minValue = Math.min(minValue, pending[i]);
- maxValue = Math.max(maxValue, pending[i]);
- }
- final long delta = maxValue - minValue;
-
- minValues[valuesOff] = minValue;
- if (delta == 0) {
- deltas[valuesOff] = new PackedInts.NullReader(pendingOff);
- } else {
- // build a new packed reader
- final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
- for (int i = 0; i < pendingOff; ++i) {
- pending[i] -= minValue;
- }
- final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, PackedInts.COMPACT);
- for (int i = 0; i < pendingOff; ) {
- i += mutable.set(i, pending, i, pendingOff - i);
- }
- deltas[valuesOff] = mutable;
- }
- }
-
- /** Return an iterator over the values of this buffer. */
- @Override
- public Iterator iterator() {
- return new Iterator();
- }
-
- /** A long iterator. */
- public final class Iterator extends AbstractAppendingLongBuffer.Iterator {
-
- Iterator() {
- super();
- }
-
- @Override
- void fillValues() {
- if (vOff == valuesOff) {
- currentValues = pending;
- currentCount = pendingOff;
- } else {
- currentCount = deltas[vOff].size();
- for (int k = 0; k < currentCount; ) {
- k += deltas[vOff].get(k, currentValues, k, currentCount - k);
- }
- for (int k = 0; k < currentCount; ++k) {
- currentValues[k] += minValues[vOff];
- }
- }
- }
-
- }
-
-}
diff --git lucene/core/src/java/org/apache/lucene/util/packed/AppendingPackedLongBuffer.java lucene/core/src/java/org/apache/lucene/util/packed/AppendingPackedLongBuffer.java
new file mode 100644
index 0000000..e2229d1
--- /dev/null
+++ lucene/core/src/java/org/apache/lucene/util/packed/AppendingPackedLongBuffer.java
@@ -0,0 +1,96 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Utility class to buffer a list of signed longs in memory. This class only
+ * supports appending and is optimized for non-negative numbers with a uniform distribution over a fixed (limited) range
+ *
+ * @lucene.internal
+ */
+public final class AppendingPackedLongBuffer extends AbstractAppendingLongBuffer {
+
+ /**{@link AppendingPackedLongBuffer}
+ * @param initialPageCount the initial number of pages
+ * @param pageSize the size of a single page
+ * @param acceptableOverheadRatio an acceptable overhead ratio per value
+ */
+ public AppendingPackedLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
+ super(initialPageCount, pageSize, acceptableOverheadRatio);
+ }
+
+ /**
+ * Create an {@link AppendingPackedLongBuffer} with initialPageCount=16,
+ * pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
+ */
+ public AppendingPackedLongBuffer() {
+ this(16, 1024, PackedInts.DEFAULT);
+ }
+
+ /**
+ * Create an {@link AppendingPackedLongBuffer} with initialPageCount=16,
+ * pageSize=1024
+ */
+ public AppendingPackedLongBuffer(float acceptableOverheadRatio) {
+ this(16, 1024, acceptableOverheadRatio);
+ }
+
+ @Override
+ long get(int block, int element) {
+ if (block == valuesOff) {
+ return pending[element];
+ } else {
+ return values[block].get(element);
+ }
+ }
+
+ @Override
+ int get(int block, int element, long[] arr, int off, int len) {
+ if (block == valuesOff) {
+ int sysCopyToRead = Math.min(len, pendingOff - element);
+ System.arraycopy(pending, element, arr, off, sysCopyToRead);
+ return sysCopyToRead;
+ } else {
+ /* packed block */
+ return values[block].get(element, arr, off, len);
+ }
+ }
+
+ @Override
+ void packPendingValues() {
+ // compute max delta
+ long minValue = pending[0];
+ long maxValue = pending[0];
+ for (int i = 1; i < pendingOff; ++i) {
+ minValue = Math.min(minValue, pending[i]);
+ maxValue = Math.max(maxValue, pending[i]);
+ }
+
+
+ // build a new packed reader
+ final int bitsRequired = minValue < 0 ? 64 : PackedInts.bitsRequired(maxValue);
+ final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
+ for (int i = 0; i < pendingOff; ) {
+ i += mutable.set(i, pending, i, pendingOff - i);
+ }
+ values[valuesOff] = mutable;
+
+ }
+
+}
diff --git lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java
index 671d2e0..bf04d16 100644
--- lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java
+++ lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java
@@ -17,14 +17,15 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
-import java.util.Arrays;
-
import org.apache.lucene.util.RamUsageEstimator;
+import java.util.Arrays;
+
/**
* Utility class to buffer signed longs in memory, which is optimized for the
* case where the sequence is monotonic, although it can encode any sequence of
* arbitrary longs. It only supports appending.
+ *
* @lucene.internal
*/
public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuffer {
@@ -32,36 +33,77 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
static long zigZagDecode(long n) {
return ((n >>> 1) ^ -(n & 1));
}
-
+
static long zigZagEncode(long n) {
return (n >> 63) ^ (n << 1);
}
float[] averages;
-
- /** @param initialPageCount the initial number of pages
- * @param pageSize the size of a single page */
- public MonotonicAppendingLongBuffer(int initialPageCount, int pageSize) {
- super(initialPageCount, pageSize);
- averages = new float[initialPageCount];
+ long[] minValues;
+
+ /**
+ * @param initialPageCount the initial number of pages
+ * @param pageSize the size of a single page
+ * @param acceptableOverheadRatio an acceptable overhead ratio per value
+ */
+ public MonotonicAppendingLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
+ super(initialPageCount, pageSize, acceptableOverheadRatio);
+ averages = new float[values.length];
+ minValues = new long[values.length];
}
- /** Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16
- * and pageSize=1024. */
+ /**
+ * Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16,
+ * pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
+ */
public MonotonicAppendingLongBuffer() {
- this(16, 1024);
+ this(16, 1024, PackedInts.DEFAULT);
+ }
+
+ /**
+ * Create an {@link AppendingDeltaPackedLongBuffer} with initialPageCount=16,
+ * pageSize=1024
+ */
+ public MonotonicAppendingLongBuffer(float acceptableOverheadRatio) {
+ this(16, 1024, acceptableOverheadRatio);
}
+
@Override
long get(int block, int element) {
if (block == valuesOff) {
return pending[element];
} else {
final long base = minValues[block] + (long) (averages[block] * (long) element);
- if (deltas[block] == null) {
+ if (values[block] == null) {
return base;
} else {
- return base + zigZagDecode(deltas[block].get(element));
+ return base + zigZagDecode(values[block].get(element));
+ }
+ }
+ }
+
+ @Override
+ int get(int block, int element, long[] arr, int off, int len) {
+ if (block == valuesOff) {
+ int sysCopyToRead = Math.min(len, pendingOff - element);
+ System.arraycopy(pending, element, arr, off, sysCopyToRead);
+ return sysCopyToRead;
+ } else {
+ if (values[block] == null) {
+ int toFill = Math.min(len, pending.length - element);
+ for (int r = 0; r < toFill; r++, off++, element++) {
+ arr[off] = minValues[block] + (long) (averages[block] * (long) element);
+ }
+ return toFill;
+ } else {
+
+ /* packed block */
+ int read = values[block].get(element, arr, off, len);
+ for (int r = 0; r < read; r++, off++, element++) {
+ arr[off] = minValues[block] + (long) (averages[block] * (long) element) + zigZagDecode(arr[off]);
+ }
+ return read;
}
}
}
@@ -70,6 +112,7 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
void grow(int newBlockCount) {
super.grow(newBlockCount);
this.averages = Arrays.copyOf(averages, newBlockCount);
+ this.minValues = Arrays.copyOf(minValues, newBlockCount);
}
@Override
@@ -91,58 +134,27 @@ public final class MonotonicAppendingLongBuffer extends AbstractAppendingLongBuf
}
}
if (maxDelta == 0) {
- deltas[valuesOff] = new PackedInts.NullReader(pendingOff);
+ values[valuesOff] = new PackedInts.NullReader(pendingOff);
} else {
final int bitsRequired = maxDelta < 0 ? 64 : PackedInts.bitsRequired(maxDelta);
- final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, PackedInts.COMPACT);
+ final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
for (int i = 0; i < pendingOff; ) {
i += mutable.set(i, pending, i, pendingOff - i);
}
- deltas[valuesOff] = mutable;
+ values[valuesOff] = mutable;
}
}
- /** Return an iterator over the values of this buffer. */
- @Override
- public Iterator iterator() {
- return new Iterator();
- }
-
- /** A long iterator. */
- public final class Iterator extends AbstractAppendingLongBuffer.Iterator {
-
- Iterator() {
- super();
- }
-
- @Override
- void fillValues() {
- if (vOff == valuesOff) {
- currentValues = pending;
- currentCount = pendingOff;
- } else {
- currentCount = deltas[vOff].size();
- for (int k = 0; k < currentCount; ) {
- k += deltas[vOff].get(k, currentValues, k, currentCount - k);
- }
- for (int k = 0; k < currentCount; ++k) {
- currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k) + zigZagDecode(currentValues[k]);
- }
- }
- }
-
- }
-
@Override
long baseRamBytesUsed() {
return super.baseRamBytesUsed()
- + RamUsageEstimator.NUM_BYTES_OBJECT_REF; // the additional array
+ + 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // 2 additional arrays
}
@Override
public long ramBytesUsed() {
return super.ramBytesUsed()
- + RamUsageEstimator.sizeOf(averages);
+ + RamUsageEstimator.sizeOf(averages) + RamUsageEstimator.sizeOf(minValues);
}
}
diff --git lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
index 3634dc2..1695403 100644
--- lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
+++ lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
@@ -705,6 +705,9 @@ public class PackedInts {
@Override
public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ len = Math.min(len, valueCount - index);
Arrays.fill(arr, off, off + len, 0);
return len;
}
diff --git lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java
index 0563846..531cd6d 100644
--- lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java
+++ lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.util.packed.PackedInts.Mutable;
/**
* A {@link PagedGrowableWriter}. This class slices data into fixed-size blocks
* which have independent numbers of bits per value and grow on-demand.
- * <p>You should use this class instead of {@link AppendingLongBuffer} only when
+ * <p>You should use this class instead of the {@link AbstractAppendingLongBuffer} related ones only when
* you need random write-access. Otherwise this class will likely be slower and
* less memory-efficient.
* @lucene.internal
diff --git lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkAppendLongBufferRead.java lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkAppendLongBufferRead.java
new file mode 100644
index 0000000..5eaaaf8
--- /dev/null
+++ lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkAppendLongBufferRead.java
@@ -0,0 +1,238 @@
+package org.apache.lucene.util.packed;
+/*
+ * Licensed to ElasticSearch under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import java.util.Random;
+
+public class BenchmarkAppendLongBufferRead {
+
+ enum ImplType {
+ PACKED,
+ DELTA_PACKED,
+ }
+
+ enum ReadStrategy {
+ RANDOM,
+ CONTINUOUS,
+ SKIPPING
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ Random random = new Random();
+ ImplType implType = ImplType.PACKED;
+ ReadStrategy readStrategy = ReadStrategy.CONTINUOUS;
+ int readSize = 1024;
+ int dataSizeInBits = 3;
+ float[] acceptableOverheadRatios = new float[]{0f, 7f};//{0f, 0.2f, 0.5f, 7f};
+ boolean updateOrdinals = false;
+
+
+ long[] data = new long[500000];
+ long[] readPlan = new long[data.length * 2];
+ int currentReadPlanLength;
+
+ int maxOrdinal = 1 << dataSizeInBits;
+ long[] ordinals = new long[maxOrdinal];
+
+
+ long[] bulkReadSpare = new long[readSize * 3];
+ int totalNumberOfReads = data.length / 2 / readSize; // read half the array
+ long curStart;
+ currentReadPlanLength = 0;
+ switch (readStrategy) {
+ case RANDOM:
+ for (int i = 0; i < totalNumberOfReads; i++) {
+ curStart = (long) random.nextInt(data.length - readSize * 2);
+ readPlan[currentReadPlanLength++] = curStart; // where to read from
+ readPlan[currentReadPlanLength++] = curStart + readSize; // 1 + random.nextInt(readSize * 2 - 1); // where to read to (exclusive)
+ }
+ break;
+ case SKIPPING:
+ curStart = 0;
+ for (int i = 0; i < totalNumberOfReads; i++) { // read half the array so it will be the same # of reads as RANDOM on avg.
+ readPlan[currentReadPlanLength++] = curStart;
+ curStart += readSize; // readLength;
+ if (curStart >= data.length) {
+ readPlan[currentReadPlanLength++] = (long) data.length;
+ curStart = 0;
+ } else {
+ readPlan[currentReadPlanLength++] = curStart;
+ curStart += readSize * 3; // read 30% of index. Hard coded for now (there are so many moving parts).
+ if (curStart >= data.length) {
+ curStart = 0;
+ }
+ }
+ }
+ break;
+ case CONTINUOUS:
+ curStart = 0;
+ for (int i = 0; i < totalNumberOfReads; i++) { // read half the array so it will be the same # of reads as RANDOM on avg.
+ readPlan[currentReadPlanLength++] = curStart;
+ curStart += readSize; // readLength;
+ if (curStart >= data.length) {
+ readPlan[currentReadPlanLength++] = (long) data.length;
+ curStart = 0;
+ } else {
+ readPlan[currentReadPlanLength++] = curStart;
+ }
+
+ }
+ break;
+ default:
+ throw new RuntimeException("Forgot something?");
+ }
+
+ for (int i = 0; i < data.length; i++) {
+ data[i] = random.nextInt(1 << dataSizeInBits);
+ }
+
+
+ System.out.println("\n------- Storage: " + implType + ", Read: " + readStrategy + ", Read size: " + readSize);
+
+ for (float acceptableOverheadRatio : acceptableOverheadRatios) {
+ AbstractAppendingLongBuffer buf;
+ switch (implType) {
+ case PACKED:
+ buf = new AppendingPackedLongBuffer(acceptableOverheadRatio);
+ break;
+ case DELTA_PACKED:
+ buf = new AppendingDeltaPackedLongBuffer(acceptableOverheadRatio);
+ break;
+ default:
+ throw new RuntimeException("Forgot something?");
+ }
+
+ for (int i = 0; i < data.length; i++) {
+ buf.add(data[i]);
+ }
+
+ // warm up
+ if (updateOrdinals) {
+ for (int i = 0; i < 100; i++) {
+ singleGetWithOrdinals(buf, readPlan, currentReadPlanLength, ordinals);
+ bulkGetWithOrdinals(buf, readPlan, currentReadPlanLength, bulkReadSpare, ordinals);
+ }
+ } else {
+ for (int i = 0; i < 100; i++) {
+ singleGet(buf, readPlan, currentReadPlanLength);
+ bulkGet(buf, readPlan, currentReadPlanLength, bulkReadSpare);
+ }
+ }
+
+ long totalSingleGet = 0;
+ long totalBulkGet = 0;
+ int outerCount = 100;
+ int innerCount = 100;
+ long singleElementRead = 0;
+ long bulkElementRead = 0;
+ for (int i = 0; i < outerCount; i++) {
+ long start = System.nanoTime();
+ if (updateOrdinals) {
+ for (int j = 0; j < innerCount; j++) {
+ singleElementRead += singleGetWithOrdinals(buf, readPlan, currentReadPlanLength, ordinals);
+ }
+ } else {
+ for (int j = 0; j < innerCount; j++) {
+ singleElementRead += singleGet(buf, readPlan, currentReadPlanLength);
+ }
+ }
+ totalSingleGet += System.nanoTime() - start;
+
+ start = System.nanoTime();
+ if (updateOrdinals) {
+ for (int j = 0; j < innerCount; j++) {
+ bulkElementRead += bulkGetWithOrdinals(buf, readPlan, currentReadPlanLength, bulkReadSpare, ordinals);
+ }
+ } else {
+ for (int j = 0; j < innerCount; j++) {
+ bulkElementRead += bulkGet(buf, readPlan, currentReadPlanLength, bulkReadSpare);
+ }
+ }
+ totalBulkGet += System.nanoTime() - start;
+ }
+
+
+ System.out.printf("SINGLE GET: %4d bits ratio %.2f (i.e., %4d bits) total time: %6.2fs avg: %5.2fms, total read: %d elm (%s, %.2fkb)\n",
+ dataSizeInBits, acceptableOverheadRatio, buf.values[0].getBitsPerValue(),
+ totalSingleGet / 1000.0 / 1000 / 1000, totalSingleGet / 1000.0 / 1000 / outerCount / innerCount, singleElementRead, buf.values[0].getClass(), buf.ramBytesUsed() / 1024.0);
+ System.out.printf("BULK GET: %4d bits ratio %.2f (i.e., %4d bits) total time: %6.2fs avg: %5.2fms, total read: %d elm (%s, %.2fkb)\n",
+ dataSizeInBits, acceptableOverheadRatio, buf.values[0].getBitsPerValue(),
+ totalBulkGet / 1000.0 / 1000 / 1000, totalBulkGet / 1000.0 / 1000 / outerCount / innerCount, bulkElementRead, buf.values[0].getClass(), buf.ramBytesUsed() / 1024.0);
+
+ }
+ }
+
+
+ private static long singleGet(AbstractAppendingLongBuffer buf, long[] readPlan, int currentReadPlanLength) {
+ long readElements = 0;
+ for (int i = 0; i < currentReadPlanLength; i += 2) {
+ for (long j = readPlan[i]; j < readPlan[i + 1]; j++) {
+ buf.get(j);
+ readElements++;
+ }
+ }
+ return readElements;
+ }
+
+ private static long singleGetWithOrdinals(AbstractAppendingLongBuffer buf, long[] readPlan, int currentReadPlanLength, long[] ordinals) {
+ long readElements = 0;
+ for (int i = 0; i < currentReadPlanLength; i += 2) {
+ for (long j = readPlan[i]; j < readPlan[i + 1]; j++) {
+ ordinals[(int) buf.get(j)]++;
+ readElements++;
+ }
+ }
+ return readElements;
+ }
+
+ private static long bulkGet(AbstractAppendingLongBuffer buf, long[] readPlan, int currentReadPlanLength, long[] spare) {
+ long readElements = 0;
+
+ for (int i = 0; i < currentReadPlanLength; i += 2) {
+ long readFrom = readPlan[i];
+ long readTo = readPlan[i + 1];
+ while (readFrom < readTo) {
+ int k = buf.get(readFrom, spare, 0, Math.min((int) (readTo - readFrom), spare.length));
+ readFrom += k;
+ readElements += k;
+ }
+ }
+ return readElements;
+ }
+
+ private static long bulkGetWithOrdinals(AbstractAppendingLongBuffer buf, long[] readPlan, int currentReadPlanLength, long[] spare, long[] ordinals) {
+ long readElements = 0;
+
+ for (int i = 0; i < currentReadPlanLength; i += 2) {
+ long readFrom = readPlan[i];
+ long readTo = readPlan[i + 1];
+ while (readFrom < readTo) {
+ int k = buf.get(readFrom, spare, 0, Math.min((int) (readTo - readFrom), spare.length));
+ readFrom += k;
+ readElements += k;
+ while (k-- > 0) {
+ ordinals[(int) spare[k]]++;
+ }
+ }
+ }
+ return readElements;
+ }
+}
diff --git lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkPackedIntsSerialRead.java lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkPackedIntsSerialRead.java
new file mode 100644
index 0000000..494f2ad
--- /dev/null
+++ lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkPackedIntsSerialRead.java
@@ -0,0 +1,143 @@
+package org.apache.lucene.util.packed;
+/*
+ * Licensed to ElasticSearch under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import java.util.ArrayList;
+import java.util.Random;
+
+public class BenchmarkPackedIntsSerialRead {
+
+ enum ImplType {
+ PACKED,
+ DELTA_PACKED,
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ Random random = new Random();
+
+ long[] data = new long[500000];
+ for (int i = 0; i < 500000; i++) {
+ data[i] = random.nextInt(8);
+ }
+// int dataSizeInBits = 32;
+// for (int i = 0; i < data.length; i++) {
+// data[i] = random.nextLong();
+// if (dataSizeInBits < 64) data[i] &= (1L << dataSizeInBits) - 1;
+// }
+
+ for (ImplType type : ImplType.values()) {
+ for (float acceptableOverheadRatio : new float[]{0f}) { //, 0.2f, 0.5f, 7f}) {
+ AbstractAppendingLongBuffer buf;
+ switch (type) {
+ case PACKED:
+ buf = new AppendingPackedLongBuffer(acceptableOverheadRatio);
+ break;
+ case DELTA_PACKED:
+ buf = new AppendingDeltaPackedLongBuffer(acceptableOverheadRatio);
+ break;
+ default:
+ throw new RuntimeException("Forgot something?");
+ }
+
+ long[] readPlan = null;
+ int readNo = data.length / 2; // read 50% of the array
+ ArrayList<Long> readPlanBuilder = new ArrayList<>(readNo + 1);
+
+ long curStart = 0;
+ for (int i = 0; i < readNo; i++) { // read half the array so it will be the same # of reads as RANDOM on avg.
+ readPlanBuilder.add(curStart);
+ int readLength = 1 + random.nextInt(1 * 2 - 1);
+ curStart += readLength;
+ if (curStart >= data.length) {
+ readPlanBuilder.add((long) data.length);
+ curStart = 0;
+ } else {
+ readPlanBuilder.add(curStart);
+ }
+ }
+ readPlan = new long[readPlanBuilder.size()];
+ for (int i = 0; i < readPlan.length; i++) readPlan[i] = readPlanBuilder.get(i);
+
+ for (int i = 0; i < data.length; i++) {
+ buf.add(data[i]);
+ }
+
+ // warm up
+ for (int i = 0; i < 100; i++) {
+ singleGet(buf, readPlan);
+ multiGet(buf);
+ }
+
+ long totalSingleGet = 0;
+ long totalBulkGet = 0;
+ int outerCount = 100;
+ int innerCount = 100;
+ for (int i = 0; i < outerCount; i++) {
+ long start = System.nanoTime();
+ for (int j = 0; j < innerCount; j++) {
+ singleGet(buf, readPlan);
+ }
+ totalSingleGet += System.nanoTime() - start;
+ start = System.nanoTime();
+ for (int j = 0; j < innerCount; j++) {
+ multiGet(buf);
+ }
+ totalBulkGet += System.nanoTime() - start;
+ }
+
+ System.out.printf("%12s ratio %.2f (%3d bits) single get : total: %6.2fs avg: %3dms (%s)\n",
+ type, acceptableOverheadRatio, buf.values[0].getBitsPerValue(), totalSingleGet / 1000.0 / 1000 / 1000, totalSingleGet / outerCount / innerCount / 1000 / 1000, buf.values[0].getClass());
+ System.out.printf("%12s ratio %.2f (%3d bits) bulk get : total: %6.2fs avg: %3dms (%s)\n",
+ type, acceptableOverheadRatio, buf.values[0].getBitsPerValue(), totalBulkGet / 1000.0 / 1000 / 1000, totalBulkGet / outerCount / innerCount / 1000 / 1000, buf.values[0].getClass());
+
+ }
+ }
+
+ }
+
+ // private static void singleGet(AbstractAppendingLongBuffer buf) {
+// long size = buf.size();
+// for (long i = 0; i < size; i++) {
+// buf.get(i);
+// }
+// }
+ private static long singleGet(AbstractAppendingLongBuffer buf, long[] readplan) {
+ long readElements = 0;
+ for (int i = 0; i < readplan.length; i += 2) {
+ for (long j = readplan[i]; j < readplan[i + 1]; j++) {
+ buf.get(j);
+ readElements++;
+ }
+ }
+ return readElements;
+ }
+
+ /**
+ * mimics buffered iteration
+ */
+ private static void multiGet(AbstractAppendingLongBuffer buf) {
+ long size = buf.size();
+ long[] spare = new long[10];
+ for (long i = 0; i < size; ) {
+ i += buf.get(i, spare, 0, 10);
+ }
+ }
+}
diff --git lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
index 93eb04f..721ed43 100644
--- lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
+++ lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
@@ -540,6 +540,27 @@ public class TestPackedInts extends LuceneTestCase {
}
}
+ public void testPackedIntsNull() {
+ int size = _TestUtil.nextInt(random(), 0, 256);
+ Reader packedInts = new PackedInts.NullReader(size);
+ assertEquals(0, packedInts.get(_TestUtil.nextInt(random(), 0, size - 1)));
+ long[] arr = new long[size + 10];
+ int r;
+ Arrays.fill(arr, 1);
+ r = packedInts.get(0, arr, 0, size - 1);
+ assertEquals(size - 1, r);
+ for (r--; r >= 0; r--) {
+ assertEquals(0, arr[r]);
+ }
+ Arrays.fill(arr, 1);
+ r = packedInts.get(10, arr, 0, size + 10);
+ assertEquals(size - 10, r);
+ for (int i = 0; i < size - 10; i++) {
+ assertEquals(0, arr[i]);
+ }
+
+ }
+
public void testBulkGet() {
final int valueCount = 1111;
final int index = random().nextInt(valueCount);
@@ -669,8 +690,8 @@ public class TestPackedInts extends LuceneTestCase {
PagedGrowableWriter writer = new PagedGrowableWriter(0, pageSize, _TestUtil.nextInt(random(), 1, 64), random().nextFloat());
assertEquals(0, writer.size());
- // compare against AppendingLongBuffer
- AppendingLongBuffer buf = new AppendingLongBuffer();
+ // compare against AppendingDeltaPackedLongBuffer
+ AppendingDeltaPackedLongBuffer buf = new AppendingDeltaPackedLongBuffer();
int size = random().nextInt(1000000);
long max = 5;
for (int i = 0; i < size; ++i) {
@@ -720,8 +741,8 @@ public class TestPackedInts extends LuceneTestCase {
PagedMutable writer = new PagedMutable(0, pageSize, bitsPerValue, random().nextFloat() / 2);
assertEquals(0, writer.size());
- // compare against AppendingLongBuffer
- AppendingLongBuffer buf = new AppendingLongBuffer();
+ // compare against AppendingDeltaPackedLongBuffer
+ AppendingDeltaPackedLongBuffer buf = new AppendingDeltaPackedLongBuffer();
int size = random().nextInt(1000000);
for (int i = 0; i < size; ++i) {
@@ -924,25 +945,46 @@ public class TestPackedInts extends LuceneTestCase {
return true;
}
+ enum DataType {
+ PACKED,
+ DELTA_PACKED,
+ MONOTONIC
+ }
+
+
public void testAppendingLongBuffer() {
+
final long[] arr = new long[RandomInts.randomIntBetween(random(), 1, 1000000)];
- for (int bpv : new int[] {0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 62)}) {
- for (boolean monotonic : new boolean[] {true, false}) {
+ float[] ratioOptions = new float[]{PackedInts.DEFAULT, PackedInts.COMPACT, PackedInts.FAST};
+ for (int bpv : new int[]{0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 62)}) {
+ for (DataType dataType : DataType.values()) {
final int pageSize = 1 << _TestUtil.nextInt(random(), 6, 20);
final int initialPageCount = _TestUtil.nextInt(random(), 0, 16);
+ float acceptableOverheadRatio = ratioOptions[_TestUtil.nextInt(random(), 0, ratioOptions.length - 1)];
AbstractAppendingLongBuffer buf;
final int inc;
- if (monotonic) {
- buf = new MonotonicAppendingLongBuffer(initialPageCount, pageSize);
- inc = _TestUtil.nextInt(random(), -1000, 1000);
- } else {
- buf = new AppendingLongBuffer(initialPageCount, pageSize);
- inc = 0;
+ switch (dataType) {
+ case PACKED:
+ buf = new AppendingPackedLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
+ inc = 0;
+ break;
+ case DELTA_PACKED:
+ buf = new AppendingDeltaPackedLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
+ inc = 0;
+ break;
+ case MONOTONIC:
+ buf = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
+ inc = _TestUtil.nextInt(random(), -1000, 1000);
+ break;
+ default:
+ throw new RuntimeException("added a type and forgot to add it here?");
+
}
+
if (bpv == 0) {
arr[0] = random().nextLong();
for (int i = 1; i < arr.length; ++i) {
- arr[i] = arr[i-1] + inc;
+ arr[i] = arr[i - 1] + inc;
}
} else if (bpv == 64) {
for (int i = 0; i < arr.length; ++i) {
@@ -954,6 +996,7 @@ public class TestPackedInts extends LuceneTestCase {
arr[i] = minValue + inc * i + random().nextLong() & PackedInts.maxValue(bpv); // _TestUtil.nextLong is too slow
}
}
+
for (int i = 0; i < arr.length; ++i) {
buf.add(arr[i]);
}
@@ -966,6 +1009,11 @@ public class TestPackedInts extends LuceneTestCase {
}
}
assertEquals(arr.length, buf.size());
+
+ for (int i = 0; i < arr.length; ++i) {
+ assertEquals(arr[i], buf.get(i));
+ }
+
final AbstractAppendingLongBuffer.Iterator it = buf.iterator();
for (int i = 0; i < arr.length; ++i) {
if (random().nextBoolean()) {
@@ -974,11 +1022,27 @@ public class TestPackedInts extends LuceneTestCase {
assertEquals(arr[i], it.next());
}
assertFalse(it.hasNext());
-
- for (int i = 0; i < arr.length; ++i) {
- assertEquals(arr[i], buf.get(i));
+
+
+ long[] target = new long[arr.length + 1024]; // check the request for more is OK.
+ for (int i = 0; i < arr.length; i += _TestUtil.nextInt(random(), 0, 10000)) {
+ int lenToRead = random().nextInt(buf.pageSize() * 2) + 1;
+ lenToRead = Math.min(lenToRead, target.length - i);
+ int lenToCheck = Math.min(lenToRead, arr.length - i);
+ int off = i;
+ while (off < arr.length && lenToRead > 0) {
+ int read = buf.get(off, target, off, lenToRead);
+ assertTrue(read > 0);
+ assertTrue(read <= lenToRead);
+ lenToRead -= read;
+ off += read;
+ }
+
+ for (int j = 0; j < lenToCheck; j++) {
+ assertEquals(arr[j + i], target[j + i]);
+ }
}
-
+
final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
final long computedBytesUsed = buf.ramBytesUsed();
assertEquals(expectedBytesUsed, computedBytesUsed);
--
1.8.2.3
From 0d72e35488c5e1cecd411250ce6b7577bb317ee4 Mon Sep 17 00:00:00 2001
From: Boaz Leskes <b.leskes@gmail.com>
Date: Mon, 29 Jul 2013 12:38:45 +0200
Subject: [PATCH 2/3] deleted BenchmarkPackedIntsSerialRead
diff --git lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkPackedIntsSerialRead.java lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkPackedIntsSerialRead.java
deleted file mode 100644
index 494f2ad..0000000
--- lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkPackedIntsSerialRead.java
+++ /dev/null
@@ -1,143 +0,0 @@
-package org.apache.lucene.util.packed;
-/*
- * Licensed to ElasticSearch under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. ElasticSearch licenses this
- * file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-import java.util.ArrayList;
-import java.util.Random;
-
-public class BenchmarkPackedIntsSerialRead {
-
- enum ImplType {
- PACKED,
- DELTA_PACKED,
- }
-
- public static void main(String[] args) throws Exception {
-
- Random random = new Random();
-
- long[] data = new long[500000];
- for (int i = 0; i < 500000; i++) {
- data[i] = random.nextInt(8);
- }
-// int dataSizeInBits = 32;
-// for (int i = 0; i < data.length; i++) {
-// data[i] = random.nextLong();
-// if (dataSizeInBits < 64) data[i] &= (1L << dataSizeInBits) - 1;
-// }
-
- for (ImplType type : ImplType.values()) {
- for (float acceptableOverheadRatio : new float[]{0f}) { //, 0.2f, 0.5f, 7f}) {
- AbstractAppendingLongBuffer buf;
- switch (type) {
- case PACKED:
- buf = new AppendingPackedLongBuffer(acceptableOverheadRatio);
- break;
- case DELTA_PACKED:
- buf = new AppendingDeltaPackedLongBuffer(acceptableOverheadRatio);
- break;
- default:
- throw new RuntimeException("Forgot something?");
- }
-
- long[] readPlan = null;
- int readNo = data.length / 2; // read 50% of the array
- ArrayList<Long> readPlanBuilder = new ArrayList<>(readNo + 1);
-
- long curStart = 0;
- for (int i = 0; i < readNo; i++) { // read half the array so it will be the same # of reads as RANDOM on avg.
- readPlanBuilder.add(curStart);
- int readLength = 1 + random.nextInt(1 * 2 - 1);
- curStart += readLength;
- if (curStart >= data.length) {
- readPlanBuilder.add((long) data.length);
- curStart = 0;
- } else {
- readPlanBuilder.add(curStart);
- }
- }
- readPlan = new long[readPlanBuilder.size()];
- for (int i = 0; i < readPlan.length; i++) readPlan[i] = readPlanBuilder.get(i);
-
- for (int i = 0; i < data.length; i++) {
- buf.add(data[i]);
- }
-
- // warm up
- for (int i = 0; i < 100; i++) {
- singleGet(buf, readPlan);
- multiGet(buf);
- }
-
- long totalSingleGet = 0;
- long totalBulkGet = 0;
- int outerCount = 100;
- int innerCount = 100;
- for (int i = 0; i < outerCount; i++) {
- long start = System.nanoTime();
- for (int j = 0; j < innerCount; j++) {
- singleGet(buf, readPlan);
- }
- totalSingleGet += System.nanoTime() - start;
- start = System.nanoTime();
- for (int j = 0; j < innerCount; j++) {
- multiGet(buf);
- }
- totalBulkGet += System.nanoTime() - start;
- }
-
- System.out.printf("%12s ratio %.2f (%3d bits) single get : total: %6.2fs avg: %3dms (%s)\n",
- type, acceptableOverheadRatio, buf.values[0].getBitsPerValue(), totalSingleGet / 1000.0 / 1000 / 1000, totalSingleGet / outerCount / innerCount / 1000 / 1000, buf.values[0].getClass());
- System.out.printf("%12s ratio %.2f (%3d bits) bulk get : total: %6.2fs avg: %3dms (%s)\n",
- type, acceptableOverheadRatio, buf.values[0].getBitsPerValue(), totalBulkGet / 1000.0 / 1000 / 1000, totalBulkGet / outerCount / innerCount / 1000 / 1000, buf.values[0].getClass());
-
- }
- }
-
- }
-
- // private static void singleGet(AbstractAppendingLongBuffer buf) {
-// long size = buf.size();
-// for (long i = 0; i < size; i++) {
-// buf.get(i);
-// }
-// }
- private static long singleGet(AbstractAppendingLongBuffer buf, long[] readplan) {
- long readElements = 0;
- for (int i = 0; i < readplan.length; i += 2) {
- for (long j = readplan[i]; j < readplan[i + 1]; j++) {
- buf.get(j);
- readElements++;
- }
- }
- return readElements;
- }
-
- /**
- * mimics buffered iteration
- */
- private static void multiGet(AbstractAppendingLongBuffer buf) {
- long size = buf.size();
- long[] spare = new long[10];
- for (long i = 0; i < size; ) {
- i += buf.get(i, spare, 0, 10);
- }
- }
-}
--
1.8.2.3
From 02425a89baa5e8aa6f8b0c78138550eec865eb89 Mon Sep 17 00:00:00 2001
From: Boaz Leskes <b.leskes@gmail.com>
Date: Mon, 29 Jul 2013 14:10:56 +0200
Subject: [PATCH 3/3] renamed CONTINUOUS to SEQUENTIAL
diff --git lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkAppendLongBufferRead.java lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkAppendLongBufferRead.java
index 5eaaaf8..19c220e 100644
--- lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkAppendLongBufferRead.java
+++ lucene/core/src/test/org/apache/lucene/util/packed/BenchmarkAppendLongBufferRead.java
@@ -30,7 +30,7 @@ public class BenchmarkAppendLongBufferRead {
enum ReadStrategy {
RANDOM,
- CONTINUOUS,
+ SEQUENTIAL,
SKIPPING
}
@@ -38,8 +38,8 @@ public class BenchmarkAppendLongBufferRead {
Random random = new Random();
ImplType implType = ImplType.PACKED;
- ReadStrategy readStrategy = ReadStrategy.CONTINUOUS;
- int readSize = 1024;
+ ReadStrategy readStrategy = ReadStrategy.SEQUENTIAL;
+ int readSize = 16;
int dataSizeInBits = 3;
float[] acceptableOverheadRatios = new float[]{0f, 7f};//{0f, 0.2f, 0.5f, 7f};
boolean updateOrdinals = false;
@@ -82,7 +82,7 @@ public class BenchmarkAppendLongBufferRead {
}
}
break;
- case CONTINUOUS:
+ case SEQUENTIAL:
curStart = 0;
for (int i = 0; i < totalNumberOfReads; i++) { // read half the array so it will be the same # of reads as RANDOM on avg.
readPlan[currentReadPlanLength++] = curStart;
--
1.8.2.3