| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.util.packed; |
| |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Accountable; |
| import org.apache.lucene.util.LongsRef; |
| import org.apache.lucene.util.RamUsageEstimator; |
| |
| /** |
| * Simplistic compression for array of unsigned long values. |
| * Each value is {@code >= 0} and {@code <=} a specified maximum value. The |
| * values are stored as packed ints, with each value |
| * consuming a fixed number of bits. |
| * |
| * @lucene.internal |
| */ |
| public class PackedInts { |
| |
| /** |
| * At most 700% memory overhead, always select a direct implementation. |
| */ |
| public static final float FASTEST = 7f; |
| |
| /** |
| * At most 50% memory overhead, always select a reasonably fast implementation. |
| */ |
| public static final float FAST = 0.5f; |
| |
| /** |
| * At most 25% memory overhead. |
| */ |
| public static final float DEFAULT = 0.25f; |
| |
| /** |
| * No memory overhead at all, but the returned implementation may be slow. |
| */ |
| public static final float COMPACT = 0f; |
| |
| /** |
| * Default amount of memory to use for bulk operations. |
| */ |
| public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K |
| |
| public final static String CODEC_NAME = "PackedInts"; |
| public static final int VERSION_MONOTONIC_WITHOUT_ZIGZAG = 2; |
| public final static int VERSION_START = VERSION_MONOTONIC_WITHOUT_ZIGZAG; |
| public final static int VERSION_CURRENT = VERSION_MONOTONIC_WITHOUT_ZIGZAG; |
| |
| /** |
| * Check the validity of a version number. |
| */ |
| public static void checkVersion(int version) { |
| if (version < VERSION_START) { |
| throw new IllegalArgumentException("Version is too old, should be at least " + VERSION_START + " (got " + version + ")"); |
| } else if (version > VERSION_CURRENT) { |
| throw new IllegalArgumentException("Version is too new, should be at most " + VERSION_CURRENT + " (got " + version + ")"); |
| } |
| } |
| |
| /** |
| * A format to write packed ints. |
| * |
| * @lucene.internal |
| */ |
| public enum Format { |
| /** |
| * Compact format, all bits are written contiguously. |
| */ |
| PACKED(0) { |
| |
| @Override |
| public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) { |
| return (long) Math.ceil((double) valueCount * bitsPerValue / 8); |
| } |
| |
| }, |
| |
| /** |
| * A format that may insert padding bits to improve encoding and decoding |
| * speed. Since this format doesn't support all possible bits per value, you |
| * should never use it directly, but rather use |
| * {@link PackedInts#fastestFormatAndBits(int, int, float)} to find the |
| * format that best suits your needs. |
| */ |
| PACKED_SINGLE_BLOCK(1) { |
| |
| @Override |
| public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) { |
| final int valuesPerBlock = 64 / bitsPerValue; |
| return (int) Math.ceil((double) valueCount / valuesPerBlock); |
| } |
| |
| @Override |
| public boolean isSupported(int bitsPerValue) { |
| return Packed64SingleBlock.isSupported(bitsPerValue); |
| } |
| |
| @Override |
| public float overheadPerValue(int bitsPerValue) { |
| assert isSupported(bitsPerValue); |
| final int valuesPerBlock = 64 / bitsPerValue; |
| final int overhead = 64 % bitsPerValue; |
| return (float) overhead / valuesPerBlock; |
| } |
| |
| }; |
| |
| /** |
| * Get a format according to its ID. |
| */ |
| public static Format byId(int id) { |
| for (Format format : Format.values()) { |
| if (format.getId() == id) { |
| return format; |
| } |
| } |
| throw new IllegalArgumentException("Unknown format id: " + id); |
| } |
| |
| private Format(int id) { |
| this.id = id; |
| } |
| |
| public int id; |
| |
| /** |
| * Returns the ID of the format. |
| */ |
| public int getId() { |
| return id; |
| } |
| |
| /** |
| * Computes how many byte blocks are needed to store <code>values</code> |
| * values of size <code>bitsPerValue</code>. |
| */ |
| public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) { |
| assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue; |
| // assume long-aligned |
| return 8L * longCount(packedIntsVersion, valueCount, bitsPerValue); |
| } |
| |
| /** |
| * Computes how many long blocks are needed to store <code>values</code> |
| * values of size <code>bitsPerValue</code>. |
| */ |
| public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) { |
| assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue; |
| final long byteCount = byteCount(packedIntsVersion, valueCount, bitsPerValue); |
| assert byteCount < 8L * Integer.MAX_VALUE; |
| if ((byteCount % 8) == 0) { |
| return (int) (byteCount / 8); |
| } else { |
| return (int) (byteCount / 8 + 1); |
| } |
| } |
| |
| /** |
| * Tests whether the provided number of bits per value is supported by the |
| * format. |
| */ |
| public boolean isSupported(int bitsPerValue) { |
| return bitsPerValue >= 1 && bitsPerValue <= 64; |
| } |
| |
| /** |
| * Returns the overhead per value, in bits. |
| */ |
| public float overheadPerValue(int bitsPerValue) { |
| assert isSupported(bitsPerValue); |
| return 0f; |
| } |
| |
| /** |
| * Returns the overhead ratio (<code>overhead per value / bits per value</code>). |
| */ |
| public final float overheadRatio(int bitsPerValue) { |
| assert isSupported(bitsPerValue); |
| return overheadPerValue(bitsPerValue) / bitsPerValue; |
| } |
| } |
| |
| /** |
| * Simple class that holds a format and a number of bits per value. |
| */ |
| public static class FormatAndBits { |
| public final Format format; |
| public final int bitsPerValue; |
| public FormatAndBits(Format format, int bitsPerValue) { |
| this.format = format; |
| this.bitsPerValue = bitsPerValue; |
| } |
| |
| @Override |
| public String toString() { |
| return "FormatAndBits(format=" + format + " bitsPerValue=" + bitsPerValue + ")"; |
| } |
| } |
| |
| /** |
| * Try to find the {@link Format} and number of bits per value that would |
| * restore from disk the fastest reader whose overhead is less than |
| * <code>acceptableOverheadRatio</code>. |
| * <p> |
| * The <code>acceptableOverheadRatio</code> parameter makes sense for |
| * random-access {@link Reader}s. In case you only plan to perform |
| * sequential access on this stream later on, you should probably use |
| * {@link PackedInts#COMPACT}. |
| * <p> |
| * If you don't know how many values you are going to write, use |
| * <code>valueCount = -1</code>. |
| */ |
| public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) { |
| if (valueCount == -1) { |
| valueCount = Integer.MAX_VALUE; |
| } |
| |
| acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio); |
| acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio); |
| float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits |
| |
| int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue; |
| |
| int actualBitsPerValue = -1; |
| Format format = Format.PACKED; |
| |
| if (bitsPerValue <= 8 && maxBitsPerValue >= 8) { |
| actualBitsPerValue = 8; |
| } else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) { |
| actualBitsPerValue = 16; |
| } else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) { |
| actualBitsPerValue = 32; |
| } else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) { |
| actualBitsPerValue = 64; |
| } else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) { |
| actualBitsPerValue = 24; |
| } else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) { |
| actualBitsPerValue = 48; |
| } else { |
| for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) { |
| if (Format.PACKED_SINGLE_BLOCK.isSupported(bpv)) { |
| float overhead = Format.PACKED_SINGLE_BLOCK.overheadPerValue(bpv); |
| float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv; |
| if (overhead <= acceptableOverhead) { |
| actualBitsPerValue = bpv; |
| format = Format.PACKED_SINGLE_BLOCK; |
| break; |
| } |
| } |
| } |
| if (actualBitsPerValue < 0) { |
| actualBitsPerValue = bitsPerValue; |
| } |
| } |
| |
| return new FormatAndBits(format, actualBitsPerValue); |
| } |
| |
| /** |
| * A decoder for packed integers. |
| */ |
| public static interface Decoder { |
| |
| /** |
| * The minimum number of long blocks to encode in a single iteration, when |
| * using long encoding. |
| */ |
| int longBlockCount(); |
| |
| /** |
| * The number of values that can be stored in {@link #longBlockCount()} long |
| * blocks. |
| */ |
| int longValueCount(); |
| |
| /** |
| * The minimum number of byte blocks to encode in a single iteration, when |
| * using byte encoding. |
| */ |
| int byteBlockCount(); |
| |
| /** |
| * The number of values that can be stored in {@link #byteBlockCount()} byte |
| * blocks. |
| */ |
| int byteValueCount(); |
| |
| /** |
| * Read <code>iterations * blockCount()</code> blocks from <code>blocks</code>, |
| * decode them and write <code>iterations * valueCount()</code> values into |
| * <code>values</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start reading blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start writing values |
| * @param iterations controls how much data to decode |
| */ |
| void decode(long[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations); |
| |
| /** |
| * Read <code>8 * iterations * blockCount()</code> blocks from <code>blocks</code>, |
| * decode them and write <code>iterations * valueCount()</code> values into |
| * <code>values</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start reading blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start writing values |
| * @param iterations controls how much data to decode |
| */ |
| void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations); |
| |
| /** |
| * Read <code>iterations * blockCount()</code> blocks from <code>blocks</code>, |
| * decode them and write <code>iterations * valueCount()</code> values into |
| * <code>values</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start reading blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start writing values |
| * @param iterations controls how much data to decode |
| */ |
| void decode(long[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations); |
| |
| /** |
| * Read <code>8 * iterations * blockCount()</code> blocks from <code>blocks</code>, |
| * decode them and write <code>iterations * valueCount()</code> values into |
| * <code>values</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start reading blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start writing values |
| * @param iterations controls how much data to decode |
| */ |
| void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations); |
| |
| } |
| |
| /** |
| * An encoder for packed integers. |
| */ |
| public static interface Encoder { |
| |
| /** |
| * The minimum number of long blocks to encode in a single iteration, when |
| * using long encoding. |
| */ |
| int longBlockCount(); |
| |
| /** |
| * The number of values that can be stored in {@link #longBlockCount()} long |
| * blocks. |
| */ |
| int longValueCount(); |
| |
| /** |
| * The minimum number of byte blocks to encode in a single iteration, when |
| * using byte encoding. |
| */ |
| int byteBlockCount(); |
| |
| /** |
| * The number of values that can be stored in {@link #byteBlockCount()} byte |
| * blocks. |
| */ |
| int byteValueCount(); |
| |
| /** |
| * Read <code>iterations * valueCount()</code> values from <code>values</code>, |
| * encode them and write <code>iterations * blockCount()</code> blocks into |
| * <code>blocks</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start writing blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start reading values |
| * @param iterations controls how much data to encode |
| */ |
| void encode(long[] values, int valuesOffset, long[] blocks, int blocksOffset, int iterations); |
| |
| /** |
| * Read <code>iterations * valueCount()</code> values from <code>values</code>, |
| * encode them and write <code>8 * iterations * blockCount()</code> blocks into |
| * <code>blocks</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start writing blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start reading values |
| * @param iterations controls how much data to encode |
| */ |
| void encode(long[] values, int valuesOffset, byte[] blocks, int blocksOffset, int iterations); |
| |
| /** |
| * Read <code>iterations * valueCount()</code> values from <code>values</code>, |
| * encode them and write <code>iterations * blockCount()</code> blocks into |
| * <code>blocks</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start writing blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start reading values |
| * @param iterations controls how much data to encode |
| */ |
| void encode(int[] values, int valuesOffset, long[] blocks, int blocksOffset, int iterations); |
| |
| /** |
| * Read <code>iterations * valueCount()</code> values from <code>values</code>, |
| * encode them and write <code>8 * iterations * blockCount()</code> blocks into |
| * <code>blocks</code>. |
| * |
| * @param blocks the long blocks that hold packed integer values |
| * @param blocksOffset the offset where to start writing blocks |
| * @param values the values buffer |
| * @param valuesOffset the offset where to start reading values |
| * @param iterations controls how much data to encode |
| */ |
| void encode(int[] values, int valuesOffset, byte[] blocks, int blocksOffset, int iterations); |
| |
| } |
| |
| /** |
| * A read-only random access array of positive integers. |
| * @lucene.internal |
| */ |
| public static abstract class Reader implements Accountable { |
| |
| /** Get the long at the given index. Behavior is undefined for out-of-range indices. */ |
| public abstract long get(int index); |
| |
| /** |
| * Bulk get: read at least one and at most <code>len</code> longs starting |
| * from <code>index</code> into <code>arr[off:off+len]</code> and return |
| * the actual number of values that have been read. |
| */ |
| public int get(int index, long[] arr, int off, int len) { |
| assert len > 0 : "len must be > 0 (got " + len + ")"; |
| assert index >= 0 && index < size(); |
| assert off + len <= arr.length; |
| |
| final int gets = Math.min(size() - index, len); |
| for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) { |
| arr[o] = get(i); |
| } |
| return gets; |
| } |
| |
| /** |
| * @return the number of values. |
| */ |
| public abstract int size(); |
| } |
| |
| /** |
| * Run-once iterator interface, to decode previously saved PackedInts. |
| */ |
| public static interface ReaderIterator { |
| /** Returns next value */ |
| long next() throws IOException; |
| /** Returns at least 1 and at most <code>count</code> next values, |
| * the returned ref MUST NOT be modified */ |
| LongsRef next(int count) throws IOException; |
| /** Returns number of bits per value */ |
| int getBitsPerValue(); |
| /** Returns number of values */ |
| int size(); |
| /** Returns the current position */ |
| int ord(); |
| } |
| |
| static abstract class ReaderIteratorImpl implements ReaderIterator { |
| |
| protected final DataInput in; |
| protected final int bitsPerValue; |
| protected final int valueCount; |
| |
| protected ReaderIteratorImpl(int valueCount, int bitsPerValue, DataInput in) { |
| this.in = in; |
| this.bitsPerValue = bitsPerValue; |
| this.valueCount = valueCount; |
| } |
| |
| @Override |
| public long next() throws IOException { |
| LongsRef nextValues = next(1); |
| assert nextValues.length > 0; |
| final long result = nextValues.longs[nextValues.offset]; |
| ++nextValues.offset; |
| --nextValues.length; |
| return result; |
| } |
| |
| @Override |
| public int getBitsPerValue() { |
| return bitsPerValue; |
| } |
| |
| @Override |
| public int size() { |
| return valueCount; |
| } |
| } |
| |
| /** |
| * A packed integer array that can be modified. |
| * @lucene.internal |
| */ |
| public static abstract class Mutable extends Reader { |
| |
| /** |
| * @return the number of bits used to store any given value. |
| * Note: This does not imply that memory usage is |
| * {@code bitsPerValue * #values} as implementations are free to |
| * use non-space-optimal packing of bits. |
| */ |
| public abstract int getBitsPerValue(); |
| |
| /** |
| * Set the value at the given index in the array. |
| * @param index where the value should be positioned. |
| * @param value a value conforming to the constraints set by the array. |
| */ |
| public abstract void set(int index, long value); |
| |
| /** |
| * Bulk set: set at least one and at most <code>len</code> longs starting |
| * at <code>off</code> in <code>arr</code> into this mutable, starting at |
| * <code>index</code>. Returns the actual number of values that have been |
| * set. |
| */ |
| public int set(int index, long[] arr, int off, int len) { |
| assert len > 0 : "len must be > 0 (got " + len + ")"; |
| assert index >= 0 && index < size(); |
| len = Math.min(len, size() - index); |
| assert off + len <= arr.length; |
| |
| for (int i = index, o = off, end = index + len; i < end; ++i, ++o) { |
| set(i, arr[o]); |
| } |
| return len; |
| } |
| |
| /** |
| * Fill the mutable from <code>fromIndex</code> (inclusive) to |
| * <code>toIndex</code> (exclusive) with <code>val</code>. |
| */ |
| public void fill(int fromIndex, int toIndex, long val) { |
| assert val <= maxValue(getBitsPerValue()); |
| assert fromIndex <= toIndex; |
| for (int i = fromIndex; i < toIndex; ++i) { |
| set(i, val); |
| } |
| } |
| |
| /** |
| * Sets all values to 0. |
| */ |
| public void clear() { |
| fill(0, size(), 0); |
| } |
| |
| /** |
| * Save this mutable into <code>out</code>. Instantiating a reader from |
| * the generated data will return a reader with the same number of bits |
| * per value. |
| */ |
| public void save(DataOutput out) throws IOException { |
| Writer writer = getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE); |
| writer.writeHeader(); |
| for (int i = 0; i < size(); ++i) { |
| writer.add(get(i)); |
| } |
| writer.finish(); |
| } |
| |
| /** The underlying format. */ |
| Format getFormat() { |
| return Format.PACKED; |
| } |
| |
| } |
| |
| /** |
| * A simple base for Readers that keeps track of valueCount and bitsPerValue. |
| * @lucene.internal |
| */ |
| static abstract class ReaderImpl extends Reader { |
| protected final int valueCount; |
| |
| protected ReaderImpl(int valueCount) { |
| this.valueCount = valueCount; |
| } |
| |
| @Override |
| public abstract long get(int index); |
| |
| @Override |
| public final int size() { |
| return valueCount; |
| } |
| } |
| |
| static abstract class MutableImpl extends Mutable { |
| |
| protected final int valueCount; |
| protected final int bitsPerValue; |
| |
| protected MutableImpl(int valueCount, int bitsPerValue) { |
| this.valueCount = valueCount; |
| assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue; |
| this.bitsPerValue = bitsPerValue; |
| } |
| |
| @Override |
| public final int getBitsPerValue() { |
| return bitsPerValue; |
| } |
| |
| @Override |
| public final int size() { |
| return valueCount; |
| } |
| |
| @Override |
| public String toString() { |
| return getClass().getSimpleName() + "(valueCount=" + valueCount + ",bitsPerValue=" + bitsPerValue + ")"; |
| } |
| } |
| |
| /** A {@link Reader} which has all its values equal to 0 (bitsPerValue = 0). */ |
| public static final class NullReader extends Reader { |
| |
| private final int valueCount; |
| |
| /** Sole constructor. */ |
| public NullReader(int valueCount) { |
| this.valueCount = valueCount; |
| } |
| |
| @Override |
| public long get(int index) { |
| return 0; |
| } |
| |
| @Override |
| public int get(int index, long[] arr, int off, int len) { |
| assert len > 0 : "len must be > 0 (got " + len + ")"; |
| assert index >= 0 && index < valueCount; |
| len = Math.min(len, valueCount - index); |
| Arrays.fill(arr, off, off + len, 0); |
| return len; |
| } |
| |
| @Override |
| public int size() { |
| return valueCount; |
| } |
| |
| @Override |
| public long ramBytesUsed() { |
| return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + Integer.BYTES); |
| } |
| } |
| |
| /** A write-once Writer. |
| * @lucene.internal |
| */ |
| public static abstract class Writer { |
| protected final DataOutput out; |
| protected final int valueCount; |
| protected final int bitsPerValue; |
| |
| protected Writer(DataOutput out, int valueCount, int bitsPerValue) { |
| assert bitsPerValue <= 64; |
| assert valueCount >= 0 || valueCount == -1; |
| this.out = out; |
| this.valueCount = valueCount; |
| this.bitsPerValue = bitsPerValue; |
| } |
| |
| void writeHeader() throws IOException { |
| assert valueCount != -1; |
| CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); |
| out.writeVInt(bitsPerValue); |
| out.writeVInt(valueCount); |
| out.writeVInt(getFormat().getId()); |
| } |
| |
| /** The format used to serialize values. */ |
| protected abstract PackedInts.Format getFormat(); |
| |
| /** Add a value to the stream. */ |
| public abstract void add(long v) throws IOException; |
| |
| /** The number of bits per value. */ |
| public final int bitsPerValue() { |
| return bitsPerValue; |
| } |
| |
| /** Perform end-of-stream operations. */ |
| public abstract void finish() throws IOException; |
| |
| /** |
| * Returns the current ord in the stream (number of values that have been |
| * written so far minus one). |
| */ |
| public abstract int ord(); |
| } |
| |
| /** |
| * Get a {@link Decoder}. |
| * |
| * @param format the format used to store packed ints |
| * @param version the compatibility version |
| * @param bitsPerValue the number of bits per value |
| * @return a decoder |
| */ |
| public static Decoder getDecoder(Format format, int version, int bitsPerValue) { |
| checkVersion(version); |
| return BulkOperation.of(format, bitsPerValue); |
| } |
| |
| /** |
| * Get an {@link Encoder}. |
| * |
| * @param format the format used to store packed ints |
| * @param version the compatibility version |
| * @param bitsPerValue the number of bits per value |
| * @return an encoder |
| */ |
| public static Encoder getEncoder(Format format, int version, int bitsPerValue) { |
| checkVersion(version); |
| return BulkOperation.of(format, bitsPerValue); |
| } |
| |
| /** |
| * Expert: Restore a {@link Reader} from a stream without reading metadata at |
| * the beginning of the stream. This method is useful to restore data from |
| * streams which have been created using |
| * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}. |
| * |
| * @param in the stream to read data from, positioned at the beginning of the packed values |
| * @param format the format used to serialize |
| * @param version the version used to serialize the data |
| * @param valueCount how many values the stream holds |
| * @param bitsPerValue the number of bits per value |
| * @return a Reader |
| * @throws IOException If there is a low-level I/O error |
| * @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int) |
| * @lucene.internal |
| */ |
| public static Reader getReaderNoHeader(DataInput in, Format format, int version, |
| int valueCount, int bitsPerValue) throws IOException { |
| checkVersion(version); |
| switch (format) { |
| case PACKED_SINGLE_BLOCK: |
| return Packed64SingleBlock.create(in, valueCount, bitsPerValue); |
| case PACKED: |
| switch (bitsPerValue) { |
| case 8: |
| return new Direct8(version, in, valueCount); |
| case 16: |
| return new Direct16(version, in, valueCount); |
| case 32: |
| return new Direct32(version, in, valueCount); |
| case 64: |
| return new Direct64(version, in, valueCount); |
| case 24: |
| if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) { |
| return new Packed8ThreeBlocks(version, in, valueCount); |
| } |
| break; |
| case 48: |
| if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) { |
| return new Packed16ThreeBlocks(version, in, valueCount); |
| } |
| break; |
| } |
| return new Packed64(version, in, valueCount, bitsPerValue); |
| default: |
| throw new AssertionError("Unknown Writer format: " + format); |
| } |
| } |
| |
| /** |
| * Restore a {@link Reader} from a stream. |
| * |
| * @param in the stream to read data from |
| * @return a Reader |
| * @throws IOException If there is a low-level I/O error |
| * @lucene.internal |
| */ |
| public static Reader getReader(DataInput in) throws IOException { |
| final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); |
| final int bitsPerValue = in.readVInt(); |
| assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; |
| final int valueCount = in.readVInt(); |
| final Format format = Format.byId(in.readVInt()); |
| |
| return getReaderNoHeader(in, format, version, valueCount, bitsPerValue); |
| } |
| |
| /** |
| * Expert: Restore a {@link ReaderIterator} from a stream without reading |
| * metadata at the beginning of the stream. This method is useful to restore |
| * data from streams which have been created using |
| * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}. |
| * |
| * @param in the stream to read data from, positioned at the beginning of the packed values |
| * @param format the format used to serialize |
| * @param version the version used to serialize the data |
| * @param valueCount how many values the stream holds |
| * @param bitsPerValue the number of bits per value |
| * @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration) |
| * @return a ReaderIterator |
| * @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int) |
| * @lucene.internal |
| */ |
| public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version, |
| int valueCount, int bitsPerValue, int mem) { |
| checkVersion(version); |
| return new PackedReaderIterator(format, version, valueCount, bitsPerValue, in, mem); |
| } |
| |
| /** |
| * Retrieve PackedInts as a {@link ReaderIterator} |
| * @param in positioned at the beginning of a stored packed int structure. |
| * @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration) |
| * @return an iterator to access the values |
| * @throws IOException if the structure could not be retrieved. |
| * @lucene.internal |
| */ |
| public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException { |
| final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); |
| final int bitsPerValue = in.readVInt(); |
| assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; |
| final int valueCount = in.readVInt(); |
| final Format format = Format.byId(in.readVInt()); |
| return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem); |
| } |
| |
| /** |
| * Expert: Construct a direct {@link Reader} from a stream without reading |
| * metadata at the beginning of the stream. This method is useful to restore |
| * data from streams which have been created using |
| * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}. |
| * <p> |
| * The returned reader will have very little memory overhead, but every call |
| * to {@link Reader#get(int)} is likely to perform a disk seek. |
| * |
| * @param in the stream to read data from |
| * @param format the format used to serialize |
| * @param version the version used to serialize the data |
| * @param valueCount how many values the stream holds |
| * @param bitsPerValue the number of bits per value |
| * @return a direct Reader |
| * @lucene.internal |
| */ |
| public static Reader getDirectReaderNoHeader(final IndexInput in, Format format, |
| int version, int valueCount, int bitsPerValue) { |
| checkVersion(version); |
| switch (format) { |
| case PACKED: |
| return new DirectPackedReader(bitsPerValue, valueCount, in); |
| case PACKED_SINGLE_BLOCK: |
| return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in); |
| default: |
| throw new AssertionError("Unknown format: " + format); |
| } |
| } |
| |
| /** |
| * Construct a direct {@link Reader} from an {@link IndexInput}. This method |
| * is useful to restore data from streams which have been created using |
| * {@link PackedInts#getWriter(DataOutput, int, int, float)}. |
| * <p> |
| * The returned reader will have very little memory overhead, but every call |
| * to {@link Reader#get(int)} is likely to perform a disk seek. |
| * |
| * @param in the stream to read data from |
| * @return a direct Reader |
| * @throws IOException If there is a low-level I/O error |
| * @lucene.internal |
| */ |
| public static Reader getDirectReader(IndexInput in) throws IOException { |
| final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); |
| final int bitsPerValue = in.readVInt(); |
| assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; |
| final int valueCount = in.readVInt(); |
| final Format format = Format.byId(in.readVInt()); |
| return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue); |
| } |
| |
| /** |
| * Create a packed integer array with the given amount of values initialized |
| * to 0. the valueCount and the bitsPerValue cannot be changed after creation. |
| * All Mutables known by this factory are kept fully in RAM. |
| * <p> |
| * Positive values of <code>acceptableOverheadRatio</code> will trade space |
| * for speed by selecting a faster but potentially less memory-efficient |
| * implementation. An <code>acceptableOverheadRatio</code> of |
| * {@link PackedInts#COMPACT} will make sure that the most memory-efficient |
| * implementation is selected whereas {@link PackedInts#FASTEST} will make sure |
| * that the fastest implementation is selected. |
| * |
| * @param valueCount the number of elements |
| * @param bitsPerValue the number of bits available for any given value |
| * @param acceptableOverheadRatio an acceptable overhead |
| * ratio per value |
| * @return a mutable packed integer array |
| * @lucene.internal |
| */ |
| public static Mutable getMutable(int valueCount, |
| int bitsPerValue, float acceptableOverheadRatio) { |
| final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio); |
| return getMutable(valueCount, formatAndBits.bitsPerValue, formatAndBits.format); |
| } |
| |
| /** Same as {@link #getMutable(int, int, float)} with a pre-computed number |
| * of bits per value and format. |
| * @lucene.internal */ |
| public static Mutable getMutable(int valueCount, |
| int bitsPerValue, PackedInts.Format format) { |
| assert valueCount >= 0; |
| switch (format) { |
| case PACKED_SINGLE_BLOCK: |
| return Packed64SingleBlock.create(valueCount, bitsPerValue); |
| case PACKED: |
| switch (bitsPerValue) { |
| case 8: |
| return new Direct8(valueCount); |
| case 16: |
| return new Direct16(valueCount); |
| case 32: |
| return new Direct32(valueCount); |
| case 64: |
| return new Direct64(valueCount); |
| case 24: |
| if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) { |
| return new Packed8ThreeBlocks(valueCount); |
| } |
| break; |
| case 48: |
| if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) { |
| return new Packed16ThreeBlocks(valueCount); |
| } |
| break; |
| } |
| return new Packed64(valueCount, bitsPerValue); |
| default: |
| throw new AssertionError(); |
| } |
| } |
| |
| /** |
| * Expert: Create a packed integer array writer for the given output, format, |
| * value count, and number of bits per value. |
| * <p> |
| * The resulting stream will be long-aligned. This means that depending on |
| * the format which is used, up to 63 bits will be wasted. An easy way to |
| * make sure that no space is lost is to always use a <code>valueCount</code> |
| * that is a multiple of 64. |
| * <p> |
| * This method does not write any metadata to the stream, meaning that it is |
| * your responsibility to store it somewhere else in order to be able to |
| * recover data from the stream later on: |
| * <ul> |
| * <li><code>format</code> (using {@link Format#getId()}),</li> |
| * <li><code>valueCount</code>,</li> |
| * <li><code>bitsPerValue</code>,</li> |
| * <li>{@link #VERSION_CURRENT}.</li> |
| * </ul> |
| * <p> |
| * It is possible to start writing values without knowing how many of them you |
| * are actually going to write. To do this, just pass <code>-1</code> as |
| * <code>valueCount</code>. On the other hand, for any positive value of |
| * <code>valueCount</code>, the returned writer will make sure that you don't |
| * write more values than expected and pad the end of stream with zeros in |
| * case you have written less than <code>valueCount</code> when calling |
| * {@link Writer#finish()}. |
| * <p> |
| * The <code>mem</code> parameter lets you control how much memory can be used |
| * to buffer changes in memory before flushing to disk. High values of |
| * <code>mem</code> are likely to improve throughput. On the other hand, if |
| * speed is not that important to you, a value of <code>0</code> will use as |
| * little memory as possible and should already offer reasonable throughput. |
| * |
| * @param out the data output |
| * @param format the format to use to serialize the values |
| * @param valueCount the number of values |
| * @param bitsPerValue the number of bits per value |
| * @param mem how much memory (in bytes) can be used to speed up serialization |
| * @return a Writer |
| * @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int) |
| * @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int) |
| * @lucene.internal |
| */ |
| public static Writer getWriterNoHeader( |
| DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) { |
| return new PackedWriter(format, out, valueCount, bitsPerValue, mem); |
| } |
| |
| /** |
| * Create a packed integer array writer for the given output, format, value |
| * count, and number of bits per value. |
| * <p> |
| * The resulting stream will be long-aligned. This means that depending on |
| * the format which is used under the hoods, up to 63 bits will be wasted. |
| * An easy way to make sure that no space is lost is to always use a |
| * <code>valueCount</code> that is a multiple of 64. |
| * <p> |
| * This method writes metadata to the stream, so that the resulting stream is |
| * sufficient to restore a {@link Reader} from it. You don't need to track |
| * <code>valueCount</code> or <code>bitsPerValue</code> by yourself. In case |
| * this is a problem, you should probably look at |
| * {@link #getWriterNoHeader(DataOutput, Format, int, int, int)}. |
| * <p> |
| * The <code>acceptableOverheadRatio</code> parameter controls how |
| * readers that will be restored from this stream trade space |
| * for speed by selecting a faster but potentially less memory-efficient |
| * implementation. An <code>acceptableOverheadRatio</code> of |
| * {@link PackedInts#COMPACT} will make sure that the most memory-efficient |
| * implementation is selected whereas {@link PackedInts#FASTEST} will make sure |
| * that the fastest implementation is selected. In case you are only interested |
| * in reading this stream sequentially later on, you should probably use |
| * {@link PackedInts#COMPACT}. |
| * |
| * @param out the data output |
| * @param valueCount the number of values |
| * @param bitsPerValue the number of bits per value |
| * @param acceptableOverheadRatio an acceptable overhead ratio per value |
| * @return a Writer |
| * @throws IOException If there is a low-level I/O error |
| * @lucene.internal |
| */ |
| public static Writer getWriter(DataOutput out, |
| int valueCount, int bitsPerValue, float acceptableOverheadRatio) |
| throws IOException { |
| assert valueCount >= 0; |
| |
| final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio); |
| final Writer writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE); |
| writer.writeHeader(); |
| return writer; |
| } |
| |
| /** Returns how many bits are required to hold values up |
| * to and including maxValue |
| * NOTE: This method returns at least 1. |
| * @param maxValue the maximum value that should be representable. |
| * @return the amount of bits needed to represent values from 0 to maxValue. |
| * @lucene.internal |
| */ |
| public static int bitsRequired(long maxValue) { |
| if (maxValue < 0) { |
| throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")"); |
| } |
| return unsignedBitsRequired(maxValue); |
| } |
| |
| /** Returns how many bits are required to store <code>bits</code>, |
| * interpreted as an unsigned value. |
| * NOTE: This method returns at least 1. |
| * @lucene.internal |
| */ |
| public static int unsignedBitsRequired(long bits) { |
| return Math.max(1, 64 - Long.numberOfLeadingZeros(bits)); |
| } |
| |
| /** |
| * Calculates the maximum unsigned long that can be expressed with the given |
| * number of bits. |
| * @param bitsPerValue the number of bits available for any given value. |
| * @return the maximum value for the given bits. |
| * @lucene.internal |
| */ |
| public static long maxValue(int bitsPerValue) { |
| return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue); |
| } |
| |
| /** |
| * Copy <code>src[srcPos:srcPos+len]</code> into |
| * <code>dest[destPos:destPos+len]</code> using at most <code>mem</code> |
| * bytes. |
| */ |
| public static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, int mem) { |
| assert srcPos + len <= src.size(); |
| assert destPos + len <= dest.size(); |
| final int capacity = mem >>> 3; |
| if (capacity == 0) { |
| for (int i = 0; i < len; ++i) { |
| dest.set(destPos++, src.get(srcPos++)); |
| } |
| } else if (len > 0) { |
| // use bulk operations |
| final long[] buf = new long[Math.min(capacity, len)]; |
| copy(src, srcPos, dest, destPos, len, buf); |
| } |
| } |
| |
| /** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */ |
| static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) { |
| assert buf.length > 0; |
| int remaining = 0; |
| while (len > 0) { |
| final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining)); |
| assert read > 0; |
| srcPos += read; |
| len -= read; |
| remaining += read; |
| final int written = dest.set(destPos, buf, 0, remaining); |
| assert written > 0; |
| destPos += written; |
| if (written < remaining) { |
| System.arraycopy(buf, written, buf, 0, remaining - written); |
| } |
| remaining -= written; |
| } |
| while (remaining > 0) { |
| final int written = dest.set(destPos, buf, 0, remaining); |
| destPos += written; |
| remaining -= written; |
| System.arraycopy(buf, written, buf, 0, remaining); |
| } |
| } |
| |
| /** Check that the block size is a power of 2, in the right bounds, and return |
| * its log in base 2. */ |
| static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) { |
| if (blockSize < minBlockSize || blockSize > maxBlockSize) { |
| throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize); |
| } |
| if ((blockSize & (blockSize - 1)) != 0) { |
| throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize); |
| } |
| return Integer.numberOfTrailingZeros(blockSize); |
| } |
| |
| /** Return the number of blocks required to store <code>size</code> values on |
| * <code>blockSize</code>. */ |
| static int numBlocks(long size, int blockSize) { |
| final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1); |
| if ((long) numBlocks * blockSize < size) { |
| throw new IllegalArgumentException("size is too large for this block size"); |
| } |
| return numBlocks; |
| } |
| |
| } |