| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.util.hll; |
| |
| /** |
| * A corresponding deserializer for {@link BigEndianAscendingWordSerializer}. |
| */ |
| class BigEndianAscendingWordDeserializer implements IWordDeserializer { |
| // The number of bits per byte. |
| private static final int BITS_PER_BYTE = 8; |
| |
| // long mask for the maximum value stored in a byte |
| private static final long BYTE_MASK = (1L << BITS_PER_BYTE) - 1L; |
| |
| // ************************************************************************ |
| // The length in bits of the words to be read. |
| private final int wordLength; |
| |
| // The byte array to which the words are serialized. |
| private final byte[] bytes; |
| |
| // The number of leading padding bytes in 'bytes' to be ignored. |
| private final int bytePadding; |
| |
| // The number of words that the byte array contains. |
| private final int wordCount; |
| |
| // The current read state. |
| private int currentWordIndex; |
| |
| // ======================================================================== |
| /** |
| * @param wordLength the length in bits of the words to be deserialized. Must |
| * be less than or equal to 64 and greater than or equal to 1. |
| * @param bytePadding the number of leading bytes that pad the serialized words. |
| * Must be greater than or equal to zero. |
| * @param bytes the byte array containing the serialized words. Cannot be |
| * <code>null</code>. |
| */ |
| public BigEndianAscendingWordDeserializer(final int wordLength, final int bytePadding, final byte[] bytes) { |
| if((wordLength < 1) || (wordLength > 64)) { |
| throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")"); |
| } |
| |
| if(bytePadding < 0) { |
| throw new IllegalArgumentException("Byte padding must be >= zero. (was: " + bytePadding + ")"); |
| } |
| |
| this.wordLength = wordLength; |
| this.bytes = bytes; |
| this.bytePadding = bytePadding; |
| |
| final int dataBytes = (bytes.length - bytePadding); |
| final long dataBits = (dataBytes * BITS_PER_BYTE); |
| |
| this.wordCount = (int)(dataBits/wordLength); |
| |
| currentWordIndex = 0; |
| } |
| |
| // ======================================================================== |
| /* (non-Javadoc) |
| * @see net.agkn.hll.serialization.IWordDeserializer#readWord() |
| */ |
| @Override |
| public long readWord() { |
| final long word = readWord(currentWordIndex); |
| currentWordIndex++; |
| |
| return word; |
| } |
| |
| // ------------------------------------------------------------------------ |
| /** |
| * Reads the word at the specified sequence position (zero-indexed). |
| * |
| * @param position the zero-indexed position of the word to be read. This |
| * must be greater than or equal to zero. |
| * @return the value of the serialized word at the specified position. |
| */ |
| private long readWord(final int position) { |
| if(position < 0) { |
| throw new ArrayIndexOutOfBoundsException(position); |
| } |
| |
| // First bit of the word |
| final long firstBitIndex = ((long)position) * ((long)wordLength); |
| final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE)); |
| final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE); |
| |
| // Last bit of the word |
| final long lastBitIndex = (firstBitIndex + wordLength - 1); |
| final int lastByteIndex = (bytePadding + (int)(lastBitIndex / BITS_PER_BYTE)); |
| final int lastByteBitsToConsume; |
| |
| final int bitsAfterByteBoundary = (int)((lastBitIndex + 1) % BITS_PER_BYTE); |
| // If the word terminates at the end of the last byte, consume the whole |
| // last byte. |
| if(bitsAfterByteBoundary == 0) { |
| lastByteBitsToConsume = BITS_PER_BYTE; |
| } else { |
| // Otherwise, only consume what is necessary. |
| lastByteBitsToConsume = bitsAfterByteBoundary; |
| } |
| |
| if(lastByteIndex >= bytes.length) { |
| throw new ArrayIndexOutOfBoundsException("Word out of bounds of backing array."); |
| } |
| |
| // Accumulator |
| long value = 0; |
| |
| // -------------------------------------------------------------------- |
| // First byte |
| final int bitsRemainingInFirstByte = (BITS_PER_BYTE - firstByteSkipBits); |
| final int bitsToConsumeInFirstByte = Math.min(bitsRemainingInFirstByte, wordLength); |
| long firstByte = (long)bytes[firstByteIndex]; |
| |
| // Mask off the bits to skip in the first byte. |
| final long firstByteMask = ((1L << bitsRemainingInFirstByte) - 1L); |
| firstByte &= firstByteMask; |
| // Right-align relevant bits of first byte. |
| firstByte >>>= (bitsRemainingInFirstByte - bitsToConsumeInFirstByte); |
| |
| value |= firstByte; |
| |
| // If the first byte contains the whole word, short-circuit. |
| if(firstByteIndex == lastByteIndex) { |
| return value; |
| } |
| |
| // -------------------------------------------------------------------- |
| // Middle bytes |
| final int middleByteCount = (lastByteIndex - firstByteIndex - 1); |
| for(int i=0; i<middleByteCount; i++) { |
| final long middleByte = (bytes[firstByteIndex + i + 1] & BYTE_MASK); |
| // Push middle byte onto accumulator. |
| value <<= BITS_PER_BYTE; |
| value |= middleByte; |
| } |
| |
| // -------------------------------------------------------------------- |
| // Last byte |
| long lastByte = (bytes[lastByteIndex] & BYTE_MASK); |
| lastByte >>= (BITS_PER_BYTE - lastByteBitsToConsume); |
| value <<= lastByteBitsToConsume; |
| value |= lastByte; |
| return value; |
| } |
| |
| /* (non-Javadoc) |
| * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount() |
| */ |
| @Override |
| public int totalWordCount() { |
| return wordCount; |
| } |
| } |