blob: cd16f720ecf795e844a7d24fcfda7d66b7e7f217 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector;
import static io.netty.util.internal.PlatformDependent.getByte;
import static io.netty.util.internal.PlatformDependent.getInt;
import static io.netty.util.internal.PlatformDependent.getLong;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.DataSizeRoundingUtil;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
import io.netty.buffer.ArrowBuf;
/**
* Helper class for performing generic operations on a bit vector buffer.
* External use of this class is not recommended.
*/
public class BitVectorHelper {
private BitVectorHelper() {}
/**
* Get the index of byte corresponding to bit index in validity buffer.
*/
public static int byteIndex(int absoluteBitIndex) {
return absoluteBitIndex >> 3;
}
/**
* Get the relative index of bit within the byte in validity buffer.
*/
public static int bitIndex(int absoluteBitIndex) {
return absoluteBitIndex & 7;
}
/**
* Set the bit at provided index to 1.
*
* @param validityBuffer validity buffer of the vector
* @param index index to be set
*/
public static void setValidityBitToOne(ArrowBuf validityBuffer, int index) {
final int byteIndex = byteIndex(index);
final int bitIndex = bitIndex(index);
byte currentByte = validityBuffer.getByte(byteIndex);
final byte bitMask = (byte) (1L << bitIndex);
currentByte |= bitMask;
validityBuffer.setByte(byteIndex, currentByte);
}
/**
* Set the bit at a given index to provided value (1 or 0).
*
* @param validityBuffer validity buffer of the vector
* @param index index to be set
* @param value value to set
*/
public static void setValidityBit(ArrowBuf validityBuffer, int index, int value) {
final int byteIndex = byteIndex(index);
final int bitIndex = bitIndex(index);
byte currentByte = validityBuffer.getByte(byteIndex);
final byte bitMask = (byte) (1L << bitIndex);
if (value != 0) {
currentByte |= bitMask;
} else {
currentByte -= (bitMask & currentByte);
}
validityBuffer.setByte(byteIndex, currentByte);
}
/**
* Set the bit at a given index to provided value (1 or 0). Internally
* takes care of allocating the buffer if the caller didn't do so.
*
* @param validityBuffer validity buffer of the vector
* @param allocator allocator for the buffer
* @param valueCount number of values to allocate/set
* @param index index to be set
* @param value value to set
* @return ArrowBuf
*/
public static ArrowBuf setValidityBit(ArrowBuf validityBuffer, BufferAllocator allocator,
int valueCount, int index, int value) {
if (validityBuffer == null) {
validityBuffer = allocator.buffer(getValidityBufferSize(valueCount));
}
setValidityBit(validityBuffer, index, value);
if (index == (valueCount - 1)) {
validityBuffer.writerIndex(getValidityBufferSize(valueCount));
}
return validityBuffer;
}
/**
* Check if a bit at a given index is set or not.
*
* @param buffer buffer to check
* @param index index of the buffer
* @return 1 if bit is set, 0 otherwise.
*/
public static int get(final ArrowBuf buffer, int index) {
final int byteIndex = index >> 3;
final byte b = buffer.getByte(byteIndex);
final int bitIndex = index & 7;
return (b >> bitIndex) & 0x01;
}
/**
* Compute the size of validity buffer required to manage a given number
* of elements in a vector.
*
* @param valueCount number of elements in the vector
* @return buffer size
*/
public static int getValidityBufferSize(int valueCount) {
return DataSizeRoundingUtil.divideBy8Ceil(valueCount);
}
/**
* Given a validity buffer, find the number of bits that are not set.
* This is used to compute the number of null elements in a nullable vector.
*
* @param validityBuffer validity buffer of the vector
* @param valueCount number of values in the vector
* @return number of bits not set.
*/
public static int getNullCount(final ArrowBuf validityBuffer, final int valueCount) {
if (valueCount == 0) {
return 0;
}
int count = 0;
final int sizeInBytes = getValidityBufferSize(valueCount);
// If value count is not a multiple of 8, then calculate number of used bits in the last byte
final int remainder = valueCount % 8;
final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
int index = 0;
while (index + 8 <= fullBytesCount) {
long longValue = validityBuffer.getLong(index);
count += Long.bitCount(longValue);
index += 8;
}
while (index + 4 <= fullBytesCount) {
int intValue = validityBuffer.getInt(index);
count += Integer.bitCount(intValue);
index += 4;
}
while (index < fullBytesCount) {
byte byteValue = validityBuffer.getByte(index);
count += Integer.bitCount(byteValue & 0xFF);
index += 1;
}
// handling with the last bits
if (remainder != 0) {
byte byteValue = validityBuffer.getByte(sizeInBytes - 1);
// making the remaining bits all 1s if it is not fully filled
byte mask = (byte) (0xFF << remainder);
byteValue = (byte) (byteValue | mask);
count += Integer.bitCount(byteValue & 0xFF);
}
return 8 * sizeInBytes - count;
}
/**
* Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter.
* @param validityBuffer the validity buffer.
* @param valueCount the bit count.
* @param checkOneBits if set to true, the method checks if all bits are equal to 1;
* otherwise, it checks if all bits are equal to 0.
* @return true if all bits are 0 or 1 according to the parameter, and false otherwise.
*/
public static boolean checkAllBitsEqualTo(
final ArrowBuf validityBuffer, final int valueCount, final boolean checkOneBits) {
if (valueCount == 0) {
return true;
}
final int sizeInBytes = getValidityBufferSize(valueCount);
// boundary check
validityBuffer.checkBytes(0, sizeInBytes);
// If value count is not a multiple of 8, then calculate number of used bits in the last byte
final int remainder = valueCount % 8;
final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
// the integer number to compare against
final int intToCompare = checkOneBits ? -1 : 0;
int index = 0;
while (index + 8 <= fullBytesCount) {
long longValue = getLong(validityBuffer.memoryAddress() + index);
if (longValue != (long) intToCompare) {
return false;
}
index += 8;
}
while (index + 4 <= fullBytesCount) {
int intValue = getInt(validityBuffer.memoryAddress() + index);
if (intValue != intToCompare) {
return false;
}
index += 4;
}
while (index < fullBytesCount) {
byte byteValue = getByte(validityBuffer.memoryAddress() + index);
if (byteValue != (byte) intToCompare) {
return false;
}
index += 1;
}
// handling with the last bits
if (remainder != 0) {
byte byteValue = getByte(validityBuffer.memoryAddress() + sizeInBytes - 1);
byte mask = (byte) ((1 << remainder) - 1);
byteValue = (byte) (byteValue & mask);
if (checkOneBits) {
if ((mask & byteValue) != mask) {
return false;
}
} else {
if (byteValue != (byte) 0) {
return false;
}
}
}
return true;
}
/** Returns the byte at index from data right-shifted by offset. */
public static byte getBitsFromCurrentByte(final ArrowBuf data, final int index, final int offset) {
return (byte) ((data.getByte(index) & 0xFF) >>> offset);
}
/**
* Returns the byte at <code>index</code> from left-shifted by (8 - <code>offset</code>).
*/
public static byte getBitsFromNextByte(ArrowBuf data, int index, int offset) {
return (byte) ((data.getByte(index) << (8 - offset)));
}
/**
* Returns a new buffer if the source validity buffer is either all null or all
* not-null, otherwise returns a buffer pointing to the same memory as source.
*
* @param fieldNode The fieldNode containing the null count
* @param sourceValidityBuffer The source validity buffer that will have its
* position copied if there is a mix of null and non-null values
* @param allocator The allocator to use for creating a new buffer if necessary.
* @return A new buffer that is either allocated or points to the same memory as sourceValidityBuffer.
*/
public static ArrowBuf loadValidityBuffer(final ArrowFieldNode fieldNode,
final ArrowBuf sourceValidityBuffer,
final BufferAllocator allocator) {
final int valueCount = fieldNode.getLength();
ArrowBuf newBuffer = null;
/* either all NULLs or all non-NULLs */
if (fieldNode.getNullCount() == 0 || fieldNode.getNullCount() == valueCount) {
newBuffer = allocator.buffer(getValidityBufferSize(valueCount));
newBuffer.setZero(0, newBuffer.capacity());
if (fieldNode.getNullCount() != 0) {
/* all NULLs */
return newBuffer;
}
/* all non-NULLs */
int fullBytesCount = valueCount / 8;
for (int i = 0; i < fullBytesCount; ++i) {
newBuffer.setByte(i, 0xFF);
}
int remainder = valueCount % 8;
if (remainder > 0) {
byte bitMask = (byte) (0xFFL >>> ((8 - remainder) & 7));
newBuffer.setByte(fullBytesCount, bitMask);
}
} else {
/* mixed byte pattern -- create another ArrowBuf associated with the
* target allocator
*/
newBuffer = sourceValidityBuffer.getReferenceManager().retain(sourceValidityBuffer, allocator);
}
return newBuffer;
}
/**
* Set the byte of the given index in the data buffer by applying a bit mask to
* the current byte at that index.
*
* @param data buffer to set
* @param byteIndex byteIndex within the buffer
* @param bitMask bit mask to be set
*/
static void setBitMaskedByte(ArrowBuf data, int byteIndex, byte bitMask) {
byte currentByte = data.getByte(byteIndex);
currentByte |= bitMask;
data.setByte(byteIndex, currentByte);
}
}