| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.backward_codecs.lucene50; |
| |
| import static org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.packed.PackedInts; |
| import org.apache.lucene.util.packed.PackedInts.Decoder; |
| import org.apache.lucene.util.packed.PackedInts.FormatAndBits; |
| |
| /** |
| * Encode all values in normal area with fixed bit width, which is determined by the max value in |
| * this block. |
| */ |
| final class ForUtil { |
| |
| /** Special number of bits per value used whenever all values to encode are equal. */ |
| private static final int ALL_VALUES_EQUAL = 0; |
| |
| /** |
| * Upper limit of the number of bytes that might be required to stored <code>BLOCK_SIZE</code> |
| * encoded values. |
| */ |
| static final int MAX_ENCODED_SIZE = BLOCK_SIZE * 4; |
| |
| /** |
| * Upper limit of the number of values that might be decoded in a single call to {@link |
| * #readBlock(IndexInput, byte[], int[])}. Although values after <code>BLOCK_SIZE</code> are |
| * garbage, it is necessary to allocate value buffers whose size is {@code >= MAX_DATA_SIZE} to |
| * avoid {@link ArrayIndexOutOfBoundsException}s. |
| */ |
| static final int MAX_DATA_SIZE; |
| |
| static { |
| int maxDataSize = 0; |
| for (int version = PackedInts.VERSION_START; version <= PackedInts.VERSION_CURRENT; version++) { |
| for (PackedInts.Format format : PackedInts.Format.values()) { |
| for (int bpv = 1; bpv <= 32; ++bpv) { |
| if (!format.isSupported(bpv)) { |
| continue; |
| } |
| final PackedInts.Decoder decoder = PackedInts.getDecoder(format, version, bpv); |
| final int iterations = computeIterations(decoder); |
| maxDataSize = Math.max(maxDataSize, iterations * decoder.byteValueCount()); |
| } |
| } |
| } |
| MAX_DATA_SIZE = maxDataSize; |
| } |
| |
| /** |
| * Compute the number of iterations required to decode <code>BLOCK_SIZE</code> values with the |
| * provided {@link Decoder}. |
| */ |
| private static int computeIterations(PackedInts.Decoder decoder) { |
| return (int) Math.ceil((float) BLOCK_SIZE / decoder.byteValueCount()); |
| } |
| |
| /** |
| * Compute the number of bytes required to encode a block of values that require <code> |
| * bitsPerValue</code> bits per value with format <code>format</code>. |
| */ |
| private static int encodedSize( |
| PackedInts.Format format, int packedIntsVersion, int bitsPerValue) { |
| final long byteCount = format.byteCount(packedIntsVersion, BLOCK_SIZE, bitsPerValue); |
| assert byteCount >= 0 && byteCount <= Integer.MAX_VALUE : byteCount; |
| return (int) byteCount; |
| } |
| |
| private final int[] encodedSizes; |
| private final PackedInts.Encoder[] encoders; |
| private final PackedInts.Decoder[] decoders; |
| private final int[] iterations; |
| |
| /** Create a new {@link ForUtil} instance and save state into <code>out</code>. */ |
| ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException { |
| out.writeVInt(PackedInts.VERSION_CURRENT); |
| encodedSizes = new int[33]; |
| encoders = new PackedInts.Encoder[33]; |
| decoders = new PackedInts.Decoder[33]; |
| iterations = new int[33]; |
| |
| for (int bpv = 1; bpv <= 32; ++bpv) { |
| final FormatAndBits formatAndBits = |
| PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv, acceptableOverheadRatio); |
| assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue); |
| assert formatAndBits.bitsPerValue <= 32; |
| encodedSizes[bpv] = |
| encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); |
| encoders[bpv] = |
| PackedInts.getEncoder( |
| formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); |
| decoders[bpv] = |
| PackedInts.getDecoder( |
| formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); |
| iterations[bpv] = computeIterations(decoders[bpv]); |
| |
| out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1)); |
| } |
| } |
| |
| /** Restore a {@link ForUtil} from a {@link DataInput}. */ |
| ForUtil(DataInput in) throws IOException { |
| int packedIntsVersion = in.readVInt(); |
| PackedInts.checkVersion(packedIntsVersion); |
| encodedSizes = new int[33]; |
| encoders = new PackedInts.Encoder[33]; |
| decoders = new PackedInts.Decoder[33]; |
| iterations = new int[33]; |
| |
| for (int bpv = 1; bpv <= 32; ++bpv) { |
| final int code = in.readVInt(); |
| final int formatId = code >>> 5; |
| final int bitsPerValue = (code & 31) + 1; |
| |
| final PackedInts.Format format = PackedInts.Format.byId(formatId); |
| assert format.isSupported(bitsPerValue); |
| encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue); |
| encoders[bpv] = PackedInts.getEncoder(format, packedIntsVersion, bitsPerValue); |
| decoders[bpv] = PackedInts.getDecoder(format, packedIntsVersion, bitsPerValue); |
| iterations[bpv] = computeIterations(decoders[bpv]); |
| } |
| } |
| |
| /** |
| * Write a block of data (<code>For</code> format). |
| * |
| * @param data the data to write |
| * @param encoded a buffer to use to encode data |
| * @param out the destination output |
| * @throws IOException If there is a low-level I/O error |
| */ |
| void writeBlock(int[] data, byte[] encoded, IndexOutput out) throws IOException { |
| if (isAllEqual(data)) { |
| out.writeByte((byte) ALL_VALUES_EQUAL); |
| out.writeVInt(data[0]); |
| return; |
| } |
| |
| final int numBits = bitsRequired(data); |
| assert numBits > 0 && numBits <= 32 : numBits; |
| final PackedInts.Encoder encoder = encoders[numBits]; |
| final int iters = iterations[numBits]; |
| assert iters * encoder.byteValueCount() >= BLOCK_SIZE; |
| final int encodedSize = encodedSizes[numBits]; |
| assert iters * encoder.byteBlockCount() >= encodedSize; |
| |
| out.writeByte((byte) numBits); |
| |
| encoder.encode(data, 0, encoded, 0, iters); |
| out.writeBytes(encoded, encodedSize); |
| } |
| |
| /** |
| * Read the next block of data (<code>For</code> format). |
| * |
| * @param in the input to use to read data |
| * @param encoded a buffer that can be used to store encoded data |
| * @param decoded where to write decoded data |
| * @throws IOException If there is a low-level I/O error |
| */ |
| void readBlock(IndexInput in, byte[] encoded, int[] decoded) throws IOException { |
| final int numBits = in.readByte(); |
| assert numBits <= 32 : numBits; |
| |
| if (numBits == ALL_VALUES_EQUAL) { |
| final int value = in.readVInt(); |
| Arrays.fill(decoded, 0, BLOCK_SIZE, value); |
| return; |
| } |
| |
| final int encodedSize = encodedSizes[numBits]; |
| in.readBytes(encoded, 0, encodedSize); |
| |
| final PackedInts.Decoder decoder = decoders[numBits]; |
| final int iters = iterations[numBits]; |
| assert iters * decoder.byteValueCount() >= BLOCK_SIZE; |
| |
| decoder.decode(encoded, 0, decoded, 0, iters); |
| } |
| |
| /** |
| * Skip the next block of data. |
| * |
| * @param in the input where to read data |
| * @throws IOException If there is a low-level I/O error |
| */ |
| void skipBlock(IndexInput in) throws IOException { |
| final int numBits = in.readByte(); |
| if (numBits == ALL_VALUES_EQUAL) { |
| in.readVInt(); |
| return; |
| } |
| assert numBits > 0 && numBits <= 32 : numBits; |
| final int encodedSize = encodedSizes[numBits]; |
| in.seek(in.getFilePointer() + encodedSize); |
| } |
| |
| private static boolean isAllEqual(final int[] data) { |
| final int v = data[0]; |
| for (int i = 1; i < BLOCK_SIZE; ++i) { |
| if (data[i] != v) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** Compute the number of bits required to serialize any of the longs in <code>data</code>. */ |
| private static int bitsRequired(final int[] data) { |
| long or = 0; |
| for (int i = 0; i < BLOCK_SIZE; ++i) { |
| assert data[i] >= 0; |
| or |= data[i]; |
| } |
| return PackedInts.bitsRequired(or); |
| } |
| } |