blob: c5bcb2decc43b9f85162cd2488257ee6a0793ddf [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector;
import io.netty.buffer.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.OutOfMemoryException;
import org.apache.arrow.vector.complex.impl.BitReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.holders.BitHolder;
import org.apache.arrow.vector.holders.NullableBitHolder;
import org.apache.arrow.vector.types.MaterializedField;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.arrow.vector.util.TransferPair;
/**
* Bit implements a vector of bit-width values. Elements in the vector are accessed by position from the logical start
* of the vector. The width of each element is 1 bit. The equivalent Java primitive is an int containing the value '0'
* or '1'.
*/
public final class BitVector extends BaseDataValueVector implements FixedWidthVector {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BitVector.class);
private final FieldReader reader = new BitReaderImpl(BitVector.this);
private final Accessor accessor = new Accessor();
private final Mutator mutator = new Mutator();
int valueCount;
private int allocationSizeInBytes = INITIAL_VALUE_ALLOCATION;
private int allocationMonitor = 0;
public BitVector(MaterializedField field, BufferAllocator allocator) {
super(field, allocator);
}
@Override
public FieldReader getReader() {
return reader;
}
@Override
public int getBufferSize() {
return getSizeFromCount(valueCount);
}
@Override
public int getBufferSizeFor(final int valueCount) {
return getSizeFromCount(valueCount);
}
int getSizeFromCount(int valueCount) {
return (int) Math.ceil(valueCount / 8.0);
}
@Override
public int getValueCapacity() {
return (int)Math.min((long)Integer.MAX_VALUE, data.capacity() * 8L);
}
private int getByteIndex(int index) {
return (int) Math.floor(index / 8.0);
}
@Override
public void setInitialCapacity(final int valueCount) {
allocationSizeInBytes = getSizeFromCount(valueCount);
}
@Override
public void allocateNew() {
if (!allocateNewSafe()) {
throw new OutOfMemoryException();
}
}
@Override
public boolean allocateNewSafe() {
long curAllocationSize = allocationSizeInBytes;
if (allocationMonitor > 10) {
curAllocationSize = Math.max(8, allocationSizeInBytes / 2);
allocationMonitor = 0;
} else if (allocationMonitor < -2) {
curAllocationSize = allocationSizeInBytes * 2L;
allocationMonitor = 0;
}
try {
allocateBytes(curAllocationSize);
} catch (OutOfMemoryException ex) {
return false;
}
return true;
}
@Override
public void reset() {
valueCount = 0;
allocationSizeInBytes = INITIAL_VALUE_ALLOCATION;
allocationMonitor = 0;
zeroVector();
super.reset();
}
/**
* Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
*
* @param valueCount
* The number of values which can be contained within this vector.
*/
@Override
public void allocateNew(int valueCount) {
final int size = getSizeFromCount(valueCount);
allocateBytes(size);
}
private void allocateBytes(final long size) {
if (size > MAX_ALLOCATION_SIZE) {
throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size");
}
final int curSize = (int) size;
clear();
data = allocator.buffer(curSize);
zeroVector();
allocationSizeInBytes = curSize;
}
/**
* Allocate new buffer with double capacity, and copy data into the new buffer. Replace vector's buffer with new buffer, and release old one
*/
public void reAlloc() {
final long newAllocationSize = allocationSizeInBytes * 2L;
if (newAllocationSize > MAX_ALLOCATION_SIZE) {
throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size");
}
final int curSize = (int)newAllocationSize;
final ArrowBuf newBuf = allocator.buffer(curSize);
newBuf.setZero(0, newBuf.capacity());
newBuf.setBytes(0, data, 0, data.capacity());
data.release();
data = newBuf;
allocationSizeInBytes = curSize;
}
/**
* {@inheritDoc}
*/
@Override
public void zeroVector() {
data.setZero(0, data.capacity());
}
public void copyFrom(int inIndex, int outIndex, BitVector from) {
this.mutator.set(outIndex, from.accessor.get(inIndex));
}
public boolean copyFromSafe(int inIndex, int outIndex, BitVector from) {
if (outIndex >= this.getValueCapacity()) {
decrementAllocationMonitor();
return false;
}
copyFrom(inIndex, outIndex, from);
return true;
}
// @Override
// public void load(SerializedField metadata, DrillBuf buffer) {
// Preconditions.checkArgument(this.field.getPath().equals(metadata.getNamePart().getName()), "The field %s doesn't match the provided metadata %s.", this.field, metadata);
// final int valueCount = metadata.getValueCount();
// final int expectedLength = getSizeFromCount(valueCount);
// final int actualLength = metadata.getBufferLength();
// assert expectedLength == actualLength: "expected and actual buffer sizes do not match";
//
// clear();
// data = buffer.slice(0, actualLength);
// data.retain();
// this.valueCount = valueCount;
// }
@Override
public Mutator getMutator() {
return new Mutator();
}
@Override
public Accessor getAccessor() {
return new Accessor();
}
@Override
public TransferPair getTransferPair(BufferAllocator allocator) {
return new TransferImpl(getField(), allocator);
}
@Override
public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
return new TransferImpl(getField().withPath(ref), allocator);
}
@Override
public TransferPair makeTransferPair(ValueVector to) {
return new TransferImpl((BitVector) to);
}
public void transferTo(BitVector target) {
target.clear();
if (target.data != null) {
target.data.release();
}
target.data = data;
target.data.retain(1);
target.valueCount = valueCount;
clear();
}
public void splitAndTransferTo(int startIndex, int length, BitVector target) {
assert startIndex + length <= valueCount;
int firstByte = getByteIndex(startIndex);
int byteSize = getSizeFromCount(length);
int offset = startIndex % 8;
if (offset == 0) {
target.clear();
// slice
if (target.data != null) {
target.data.release();
}
target.data = (ArrowBuf) data.slice(firstByte, byteSize);
target.data.retain(1);
} else {
// Copy data
// When the first bit starts from the middle of a byte (offset != 0), copy data from src BitVector.
// Each byte in the target is composed by a part in i-th byte, another part in (i+1)-th byte.
// The last byte copied to target is a bit tricky :
// 1) if length requires partly byte (length % 8 !=0), copy the remaining bits only.
// 2) otherwise, copy the last byte in the same way as to the prior bytes.
target.clear();
target.allocateNew(length);
// TODO maybe do this one word at a time, rather than byte?
for(int i = 0; i < byteSize - 1; i++) {
target.data.setByte(i, (((this.data.getByte(firstByte + i) & 0xFF) >>> offset) + (this.data.getByte(firstByte + i + 1) << (8 - offset))));
}
if (length % 8 != 0) {
target.data.setByte(byteSize - 1, ((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset));
} else {
target.data.setByte(byteSize - 1,
(((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset) + (this.data.getByte(firstByte + byteSize) << (8 - offset))));
}
}
target.getMutator().setValueCount(length);
}
private class TransferImpl implements TransferPair {
BitVector to;
public TransferImpl(MaterializedField field, BufferAllocator allocator) {
this.to = new BitVector(field, allocator);
}
public TransferImpl(BitVector to) {
this.to = to;
}
@Override
public BitVector getTo() {
return to;
}
@Override
public void transfer() {
transferTo(to);
}
@Override
public void splitAndTransfer(int startIndex, int length) {
splitAndTransferTo(startIndex, length, to);
}
@Override
public void copyValueSafe(int fromIndex, int toIndex) {
to.copyFromSafe(fromIndex, toIndex, BitVector.this);
}
}
private void decrementAllocationMonitor() {
if (allocationMonitor > 0) {
allocationMonitor = 0;
}
--allocationMonitor;
}
private void incrementAllocationMonitor() {
++allocationMonitor;
}
public class Accessor extends BaseAccessor {
/**
* Get the byte holding the desired bit, then mask all other bits. Iff the result is 0, the bit was not set.
*
* @param index
* position of the bit in the vector
* @return 1 if set, otherwise 0
*/
public final int get(int index) {
int byteIndex = index >> 3;
byte b = data.getByte(byteIndex);
int bitIndex = index & 7;
return Long.bitCount(b & (1L << bitIndex));
}
@Override
public boolean isNull(int index) {
return false;
}
@Override
public final Boolean getObject(int index) {
return new Boolean(get(index) != 0);
}
@Override
public final int getValueCount() {
return valueCount;
}
public final void get(int index, BitHolder holder) {
holder.value = get(index);
}
public final void get(int index, NullableBitHolder holder) {
holder.isSet = 1;
holder.value = get(index);
}
}
/**
* MutableBit implements a vector of bit-width values. Elements in the vector are accessed by position from the
* logical start of the vector. Values should be pushed onto the vector sequentially, but may be randomly accessed.
*
* NB: this class is automatically generated from ValueVectorTypes.tdd using FreeMarker.
*/
public class Mutator extends BaseMutator {
private Mutator() {
}
/**
* Set the bit at the given index to the specified value.
*
* @param index
* position of the bit to set
* @param value
* value to set (either 1 or 0)
*/
public final void set(int index, int value) {
int byteIndex = index >> 3;
int bitIndex = index & 7;
byte currentByte = data.getByte(byteIndex);
byte bitMask = (byte) (1L << bitIndex);
if (value != 0) {
currentByte |= bitMask;
} else {
currentByte -= (bitMask & currentByte);
}
data.setByte(byteIndex, currentByte);
}
public final void set(int index, BitHolder holder) {
set(index, holder.value);
}
final void set(int index, NullableBitHolder holder) {
set(index, holder.value);
}
public void setSafe(int index, int value) {
while(index >= getValueCapacity()) {
reAlloc();
}
set(index, value);
}
public void setSafe(int index, BitHolder holder) {
while(index >= getValueCapacity()) {
reAlloc();
}
set(index, holder.value);
}
public void setSafe(int index, NullableBitHolder holder) {
while(index >= getValueCapacity()) {
reAlloc();
}
set(index, holder.value);
}
@Override
public final void setValueCount(int valueCount) {
int currentValueCapacity = getValueCapacity();
BitVector.this.valueCount = valueCount;
int idx = getSizeFromCount(valueCount);
while(valueCount > getValueCapacity()) {
reAlloc();
}
if (valueCount > 0 && currentValueCapacity > valueCount * 2) {
incrementAllocationMonitor();
} else if (allocationMonitor > 0) {
allocationMonitor = 0;
}
VectorTrimmer.trim(data, idx);
}
@Override
public final void generateTestData(int values) {
boolean even = true;
for(int i = 0; i < values; i++, even = !even) {
if (even) {
set(i, 1);
}
}
setValueCount(values);
}
}
@Override
public void clear() {
this.valueCount = 0;
super.clear();
}
}