| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.arrow.vector; |
| |
| import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; |
| |
| import java.nio.ByteBuffer; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.List; |
| import org.apache.arrow.memory.ArrowBuf; |
| import org.apache.arrow.memory.BufferAllocator; |
| import org.apache.arrow.memory.OutOfMemoryException; |
| import org.apache.arrow.memory.util.ArrowBufPointer; |
| import org.apache.arrow.memory.util.ByteFunctionHelpers; |
| import org.apache.arrow.memory.util.CommonUtil; |
| import org.apache.arrow.memory.util.hash.ArrowBufHasher; |
| import org.apache.arrow.util.Preconditions; |
| import org.apache.arrow.vector.compare.VectorVisitor; |
| import org.apache.arrow.vector.ipc.message.ArrowFieldNode; |
| import org.apache.arrow.vector.types.pojo.Field; |
| import org.apache.arrow.vector.util.CallBack; |
| import org.apache.arrow.vector.util.OversizedAllocationException; |
| import org.apache.arrow.vector.util.TransferPair; |
| |
| /** BaseVariableWidthVector is a base class providing functionality for strings/bytes types. */ |
| public abstract class BaseVariableWidthVector extends BaseValueVector |
| implements VariableWidthFieldVector { |
| private static final int DEFAULT_RECORD_BYTE_COUNT = 8; |
| private static final int INITIAL_BYTE_COUNT = |
| INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; |
| private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); |
| private int lastValueCapacity; |
| private long lastValueAllocationSizeInBytes; |
| |
| /* protected members */ |
| public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */ |
| protected static final byte[] emptyByteArray = new byte[] {}; |
| protected ArrowBuf validityBuffer; |
| protected ArrowBuf valueBuffer; |
| protected ArrowBuf offsetBuffer; |
| protected int valueCount; |
| protected int lastSet; |
| protected final Field field; |
| |
| /** |
| * Constructs a new instance. |
| * |
| * @param field The field materialized by this vector. |
| * @param allocator The allocator to use for creating/resizing buffers |
| */ |
| public BaseVariableWidthVector(Field field, final BufferAllocator allocator) { |
| super(allocator); |
| this.field = field; |
| lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; |
| // -1 because we require one extra slot for the offset array. |
| lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1; |
| valueCount = 0; |
| lastSet = -1; |
| offsetBuffer = allocator.getEmpty(); |
| validityBuffer = allocator.getEmpty(); |
| valueBuffer = allocator.getEmpty(); |
| } |
| |
| @Override |
| public String getName() { |
| return field.getName(); |
| } |
| |
| /* TODO: |
| * see if getNullCount() can be made faster -- O(1) |
| */ |
| |
| /* TODO: |
| * Once the entire hierarchy has been refactored, move common functions |
| * like getNullCount(), splitAndTransferValidityBuffer to top level |
| * base class BaseValueVector. |
| * |
| * Along with this, some class members (validityBuffer) can also be |
| * abstracted out to top level base class. |
| * |
| * Right now BaseValueVector is the top level base class for other |
| * vector types in ValueVector hierarchy (non-nullable) and those |
| * vectors have not yet been refactored/removed so moving things to |
| * the top class as of now is not a good idea. |
| */ |
| |
| /** |
| * Get buffer that manages the validity (NULL or NON-NULL nature) of elements in the vector. |
| * Consider it as a buffer for internal bit vector data structure. |
| * |
| * @return buffer |
| */ |
| @Override |
| public ArrowBuf getValidityBuffer() { |
| return validityBuffer; |
| } |
| |
| /** |
| * Get the buffer that stores the data for elements in the vector. |
| * |
| * @return buffer |
| */ |
| @Override |
| public ArrowBuf getDataBuffer() { |
| return valueBuffer; |
| } |
| |
| /** |
| * buffer that stores the offsets for elements in the vector. This operation is not supported for |
| * fixed-width vectors. |
| * |
| * @return buffer |
| */ |
| @Override |
| public ArrowBuf getOffsetBuffer() { |
| return offsetBuffer; |
| } |
| |
| /** |
| * Get the memory address of buffer that stores the offsets for elements in the vector. |
| * |
| * @return starting address of the buffer |
| */ |
| @Override |
| public long getOffsetBufferAddress() { |
| return offsetBuffer.memoryAddress(); |
| } |
| |
| /** |
| * Get the memory address of buffer that manages the validity (NULL or NON-NULL nature) of |
| * elements in the vector. |
| * |
| * @return starting address of the buffer |
| */ |
| @Override |
| public long getValidityBufferAddress() { |
| return validityBuffer.memoryAddress(); |
| } |
| |
| /** |
| * Get the memory address of buffer that stores the data for elements in the vector. |
| * |
| * @return starting address of the buffer |
| */ |
| @Override |
| public long getDataBufferAddress() { |
| return valueBuffer.memoryAddress(); |
| } |
| |
| /** |
| * Sets the desired value capacity for the vector. This function doesn't allocate any memory for |
| * the vector. |
| * |
| * @param valueCount desired number of elements in the vector |
| */ |
| @Override |
| public void setInitialCapacity(int valueCount) { |
| final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT; |
| checkDataBufferSize(size); |
| computeAndCheckOffsetsBufferSize(valueCount); |
| lastValueAllocationSizeInBytes = (int) size; |
| lastValueCapacity = valueCount; |
| } |
| |
| /** |
| * Sets the desired value capacity for the vector. This function doesn't allocate any memory for |
| * the vector. |
| * |
| * @param valueCount desired number of elements in the vector |
| * @param density average number of bytes per variable width element |
| */ |
| @Override |
| public void setInitialCapacity(int valueCount, double density) { |
| long size = Math.max((long) (valueCount * density), 1L); |
| checkDataBufferSize(size); |
| computeAndCheckOffsetsBufferSize(valueCount); |
| lastValueAllocationSizeInBytes = (int) size; |
| lastValueCapacity = valueCount; |
| } |
| |
| /** |
| * Get the density of this ListVector. |
| * |
| * @return density |
| */ |
| public double getDensity() { |
| if (valueCount == 0) { |
| return 0.0D; |
| } |
| final int startOffset = getStartOffset(0); |
| final int endOffset = getStartOffset(valueCount); |
| final double totalListSize = endOffset - startOffset; |
| return totalListSize / valueCount; |
| } |
| |
| /** |
| * Get the current capacity which does not exceed either validity buffer or offset buffer. Note: |
| * Here the `getValueCapacity` has no relationship with the value buffer. |
| * |
| * @return number of elements that vector can hold. |
| */ |
| @Override |
| public int getValueCapacity() { |
| final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); |
| return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); |
| } |
| |
| private int getValidityBufferValueCapacity() { |
| return capAtMaxInt(validityBuffer.capacity() * 8); |
| } |
| |
| private int getOffsetBufferValueCapacity() { |
| return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); |
| } |
| |
| /** zero out the vector and the data in associated buffers. */ |
| public void zeroVector() { |
| initValidityBuffer(); |
| initOffsetBuffer(); |
| valueBuffer.setZero(0, valueBuffer.capacity()); |
| } |
| |
| /* zero out the validity buffer */ |
| private void initValidityBuffer() { |
| validityBuffer.setZero(0, validityBuffer.capacity()); |
| } |
| |
| /* zero out the offset buffer */ |
| private void initOffsetBuffer() { |
| offsetBuffer.setZero(0, offsetBuffer.capacity()); |
| } |
| |
| /** |
| * Reset the vector to initial state. Same as {@link #zeroVector()}. Note that this method doesn't |
| * release any memory. |
| */ |
| @Override |
| public void reset() { |
| zeroVector(); |
| lastSet = -1; |
| valueCount = 0; |
| } |
| |
| /** Close the vector and release the associated buffers. */ |
| @Override |
| public void close() { |
| clear(); |
| } |
| |
| /** Same as {@link #close()}. */ |
| @Override |
| public void clear() { |
| validityBuffer = releaseBuffer(validityBuffer); |
| valueBuffer = releaseBuffer(valueBuffer); |
| offsetBuffer = releaseBuffer(offsetBuffer); |
| lastSet = -1; |
| valueCount = 0; |
| } |
| |
| /** |
| * Get the inner vectors. |
| * |
| * @return the inner vectors for this field as defined by the TypeLayout |
| * @deprecated This API will be removed as the current implementations no longer support inner |
| * vectors. |
| */ |
| @Deprecated |
| @Override |
| public List<BufferBacked> getFieldInnerVectors() { |
| throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); |
| } |
| |
| /** |
| * Initialize the children in schema for this Field. This operation is a NO-OP for scalar types |
| * since they don't have any children. |
| * |
| * @param children the schema |
| * @throws IllegalArgumentException if children is a non-empty list for scalar types. |
| */ |
| @Override |
| public void initializeChildrenFromFields(List<Field> children) { |
| if (!children.isEmpty()) { |
| throw new IllegalArgumentException("primitive type vector cannot have children"); |
| } |
| } |
| |
| /** |
| * Get the inner child vectors. |
| * |
| * @return list of child vectors for complex types, empty list for scalar vector types |
| */ |
| @Override |
| public List<FieldVector> getChildrenFromFields() { |
| return Collections.emptyList(); |
| } |
| |
| /** |
| * Load the buffers of this vector with provided source buffers. The caller manages the source |
| * buffers and populates them before invoking this method. |
| * |
| * @param fieldNode the fieldNode indicating the value count |
| * @param ownBuffers the buffers for this Field (own buffers only, children not included) |
| */ |
| @Override |
| public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) { |
| ArrowBuf bitBuffer = ownBuffers.get(0); |
| ArrowBuf offBuffer = ownBuffers.get(1); |
| ArrowBuf dataBuffer = ownBuffers.get(2); |
| |
| validityBuffer.getReferenceManager().release(); |
| validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); |
| offsetBuffer.getReferenceManager().release(); |
| offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); |
| valueBuffer.getReferenceManager().release(); |
| valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator); |
| |
| lastSet = fieldNode.getLength() - 1; |
| valueCount = fieldNode.getLength(); |
| } |
| |
| /** |
| * Get the buffers belonging to this vector. |
| * |
| * @return the inner buffers. |
| */ |
| @Override |
| public List<ArrowBuf> getFieldBuffers() { |
| // before flight/IPC, we must bring the vector to a consistent state. |
| // this is because, it is possible that the offset buffers of some trailing values |
| // are not updated. this may cause some data in the data buffer being lost. |
| // for details, please see TestValueVector#testUnloadVariableWidthVector. |
| fillHoles(valueCount); |
| |
| List<ArrowBuf> result = new ArrayList<>(3); |
| setReaderAndWriterIndex(); |
| result.add(validityBuffer); |
| result.add(offsetBuffer); |
| result.add(valueBuffer); |
| |
| return result; |
| } |
| |
| /** |
| * Export the buffers of the fields for C Data Interface. This method traverse the buffers and |
| * export buffer and buffer's memory address into a list of buffers and a pointer to the list of |
| * buffers. |
| */ |
| @Override |
| public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, long nullValue) { |
| // before flight/IPC, we must bring the vector to a consistent state. |
| // this is because, it is possible that the offset buffers of some trailing values |
| // are not updated. this may cause some data in the data buffer being lost. |
| // for details, please see TestValueVector#testUnloadVariableWidthVector. |
| fillHoles(valueCount); |
| |
| exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); |
| |
| if (offsetBuffer.capacity() == 0) { |
| // Empty offset buffer is allowed for historical reason. |
| // To export it through C Data interface, we need to allocate a buffer with one offset. |
| // We set `retain = false` to explicitly not increase the ref count for the exported buffer. |
| // The ref count of the newly created buffer (i.e., 1) already represents the usage |
| // at imported side. |
| exportBuffer(allocateOffsetBuffer(OFFSET_WIDTH), buffers, buffersPtr, nullValue, false); |
| } else { |
| exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); |
| } |
| |
| exportBuffer(valueBuffer, buffers, buffersPtr, nullValue, true); |
| } |
| |
| /** Set the reader and writer indexes for the inner buffers. */ |
| private void setReaderAndWriterIndex() { |
| validityBuffer.readerIndex(0); |
| offsetBuffer.readerIndex(0); |
| valueBuffer.readerIndex(0); |
| if (valueCount == 0) { |
| validityBuffer.writerIndex(0); |
| offsetBuffer.writerIndex(0); |
| valueBuffer.writerIndex(0); |
| } else { |
| final int lastDataOffset = getStartOffset(valueCount); |
| validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); |
| offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); |
| valueBuffer.writerIndex(lastDataOffset); |
| } |
| } |
| |
| /** Same as {@link #allocateNewSafe()}. */ |
| @Override |
| public void allocateNew() { |
| allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); |
| } |
| |
| /** |
| * Allocate memory for the vector. We internally use a default value count of 4096 to allocate |
| * memory for at least these many elements in the vector. See {@link #allocateNew(long, int)} for |
| * allocating memory for specific number of elements in the vector. |
| * |
| * @return false if memory allocation fails, true otherwise. |
| */ |
| @Override |
| public boolean allocateNewSafe() { |
| try { |
| allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); |
| return true; |
| } catch (Exception e) { |
| return false; |
| } |
| } |
| |
| /** |
| * Allocate memory for the vector to support storing at least the provided number of elements in |
| * the vector. This method must be called prior to using the ValueVector. |
| * |
| * @param totalBytes desired total memory capacity |
| * @param valueCount the desired number of elements in the vector |
| * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails |
| */ |
| @Override |
| public void allocateNew(long totalBytes, int valueCount) { |
| assert totalBytes >= 0; |
| |
| checkDataBufferSize(totalBytes); |
| computeAndCheckOffsetsBufferSize(valueCount); |
| |
| /* we are doing a new allocation -- release the current buffers */ |
| clear(); |
| |
| try { |
| allocateBytes(totalBytes, valueCount); |
| } catch (Exception e) { |
| clear(); |
| throw e; |
| } |
| } |
| |
| @Override |
| public void allocateNew(int valueCount) { |
| allocateNew(lastValueAllocationSizeInBytes, valueCount); |
| } |
| |
| /* Check if the data buffer size is within bounds. */ |
| private void checkDataBufferSize(long size) { |
| if (size > MAX_BUFFER_SIZE || size < 0) { |
| throw new OversizedAllocationException( |
| "Memory required for vector " |
| + "is (" |
| + size |
| + "), which is overflow or more than max allowed (" |
| + MAX_BUFFER_SIZE |
| + "). " |
| + "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types"); |
| } |
| } |
| |
| /* |
| * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's |
| * within bounds. |
| */ |
| private long computeAndCheckOffsetsBufferSize(int valueCount) { |
| /* to track the end offset of last data element in vector, we need |
| * an additional slot in offset buffer. |
| */ |
| final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH); |
| if (size > MAX_BUFFER_SIZE) { |
| throw new OversizedAllocationException( |
| "Memory required for vector capacity " |
| + valueCount |
| + " is (" |
| + size |
| + "), which is more than max allowed (" |
| + MAX_BUFFER_SIZE |
| + ")"); |
| } |
| return size; |
| } |
| |
| /* allocate the inner buffers */ |
| private void allocateBytes(final long valueBufferSize, final int valueCount) { |
| /* allocate data buffer */ |
| long curSize = valueBufferSize; |
| valueBuffer = allocator.buffer(curSize); |
| valueBuffer.readerIndex(0); |
| |
| /* allocate offset buffer and validity buffer */ |
| DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH); |
| offsetBuffer = buffers.getDataBuf(); |
| validityBuffer = buffers.getValidityBuf(); |
| initOffsetBuffer(); |
| initValidityBuffer(); |
| |
| lastValueCapacity = getValueCapacity(); |
| lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity()); |
| } |
| |
| /* allocate offset buffer */ |
| private ArrowBuf allocateOffsetBuffer(final long size) { |
| final int curSize = (int) size; |
| ArrowBuf offsetBuffer = allocator.buffer(curSize); |
| offsetBuffer.readerIndex(0); |
| initOffsetBuffer(); |
| return offsetBuffer; |
| } |
| |
| /* allocate validity buffer */ |
| private void allocateValidityBuffer(final long size) { |
| final int curSize = (int) size; |
| validityBuffer = allocator.buffer(curSize); |
| validityBuffer.readerIndex(0); |
| initValidityBuffer(); |
| } |
| |
| /** |
| * Resize the vector to increase the capacity. The internal behavior is to double the current |
| * value capacity. |
| */ |
| @Override |
| public void reAlloc() { |
| reallocDataBuffer(); |
| reallocValidityAndOffsetBuffers(); |
| } |
| |
| /** |
| * Reallocate the data buffer. Data Buffer stores the actual data for VARCHAR or VARBINARY |
| * elements in the vector. The behavior is to double the size of buffer. |
| * |
| * @throws OversizedAllocationException if the desired new size is more than max allowed |
| * @throws OutOfMemoryException if the internal memory allocation fails |
| */ |
| public void reallocDataBuffer() { |
| final long currentBufferCapacity = valueBuffer.capacity(); |
| long newAllocationSize = currentBufferCapacity * 2; |
| if (newAllocationSize == 0) { |
| if (lastValueAllocationSizeInBytes > 0) { |
| newAllocationSize = lastValueAllocationSizeInBytes; |
| } else { |
| newAllocationSize = INITIAL_BYTE_COUNT * 2L; |
| } |
| } |
| |
| reallocDataBuffer(newAllocationSize); |
| } |
| |
| /** |
| * Reallocate the data buffer to given size. Data Buffer stores the actual data for VARCHAR or |
| * VARBINARY elements in the vector. The actual allocate size may be larger than the request one |
| * because it will round up the provided value to the nearest power of two. |
| * |
| * @param desiredAllocSize the desired new allocation size |
| * @throws OversizedAllocationException if the desired new size is more than max allowed |
| * @throws OutOfMemoryException if the internal memory allocation fails |
| */ |
| public void reallocDataBuffer(long desiredAllocSize) { |
| if (desiredAllocSize == 0) { |
| return; |
| } |
| |
| final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); |
| assert newAllocationSize >= 1; |
| |
| checkDataBufferSize(newAllocationSize); |
| |
| final ArrowBuf newBuf = allocator.buffer(newAllocationSize); |
| newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); |
| valueBuffer.getReferenceManager().release(); |
| valueBuffer = newBuf; |
| lastValueAllocationSizeInBytes = valueBuffer.capacity(); |
| } |
| |
| /** |
| * Reallocate the validity and offset buffers for this vector. Validity buffer is used to track |
| * the NULL or NON-NULL nature of elements in the vector and offset buffer is used to store the |
| * lengths of variable width elements in the vector. |
| * |
| * <p>Note that data buffer for variable length vectors moves independent of the companion |
| * validity and offset buffers. This is in contrast to what we have for fixed width vectors. |
| * |
| * <p>So even though we may have setup an initial capacity of 1024 elements in the vector, it is |
| * quite possible that we need to reAlloc() the data buffer when we are setting the 5th element in |
| * the vector simply because previous variable length elements have exhausted the buffer capacity. |
| * However, we really don't need to reAlloc() validity and offset buffers until we try to set the |
| * 1025th element This is why we do a separate check for safe methods to determine which buffer |
| * needs reallocation. |
| * |
| * @throws OversizedAllocationException if the desired new size is more than max allowed |
| * @throws OutOfMemoryException if the internal memory allocation fails |
| */ |
| public void reallocValidityAndOffsetBuffers() { |
| int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2); |
| if (targetOffsetCount == 0) { |
| if (lastValueCapacity > 0) { |
| targetOffsetCount = (lastValueCapacity + 1); |
| } else { |
| targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1); |
| } |
| } |
| computeAndCheckOffsetsBufferSize(targetOffsetCount); |
| |
| DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH); |
| final ArrowBuf newOffsetBuffer = buffers.getDataBuf(); |
| newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); |
| newOffsetBuffer.setZero( |
| offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity()); |
| offsetBuffer.getReferenceManager().release(); |
| offsetBuffer = newOffsetBuffer; |
| |
| final ArrowBuf newValidityBuffer = buffers.getValidityBuf(); |
| newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); |
| newValidityBuffer.setZero( |
| validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); |
| validityBuffer.getReferenceManager().release(); |
| validityBuffer = newValidityBuffer; |
| |
| lastValueCapacity = getValueCapacity(); |
| } |
| |
| /** |
| * Get the size (number of bytes) of underlying data buffer. |
| * |
| * @return number of bytes in the data buffer |
| */ |
| @Override |
| public int getByteCapacity() { |
| return capAtMaxInt(valueBuffer.capacity()); |
| } |
| |
| @Override |
| public int sizeOfValueBuffer() { |
| if (valueCount == 0) { |
| return 0; |
| } |
| return offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH); |
| } |
| |
| /** |
| * Get the size (number of bytes) of underlying buffers used by this vector. |
| * |
| * @return size of underlying buffers. |
| */ |
| @Override |
| public int getBufferSize() { |
| return getBufferSizeFor(this.valueCount); |
| } |
| |
| /** |
| * Get the potential buffer size for a particular number of records. |
| * |
| * @param valueCount desired number of elements in the vector |
| * @return estimated size of underlying buffers if the vector holds a given number of elements |
| */ |
| @Override |
| public int getBufferSizeFor(final int valueCount) { |
| if (valueCount == 0) { |
| return 0; |
| } |
| |
| final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); |
| final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; |
| /* get the end offset for this valueCount */ |
| final int dataBufferSize = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH); |
| return validityBufferSize + offsetBufferSize + dataBufferSize; |
| } |
| |
| /** |
| * Get information about how this field is materialized. |
| * |
| * @return the field corresponding to this vector |
| */ |
| @Override |
| public Field getField() { |
| return field; |
| } |
| |
| /** |
| * Return the underlying buffers associated with this vector. Note that this doesn't impact the |
| * reference counts for this buffer so it only should be used for in-context access. Also note |
| * that this buffer changes regularly thus external classes shouldn't hold a reference to it |
| * (unless they change it). |
| * |
| * @param clear Whether to clear vector before returning; the buffers will still be refcounted but |
| * the returned array will be the only reference to them |
| * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. |
| */ |
| @Override |
| public ArrowBuf[] getBuffers(boolean clear) { |
| final ArrowBuf[] buffers; |
| setReaderAndWriterIndex(); |
| if (getBufferSize() == 0) { |
| buffers = new ArrowBuf[0]; |
| } else { |
| buffers = new ArrowBuf[3]; |
| buffers[0] = validityBuffer; |
| buffers[1] = offsetBuffer; |
| buffers[2] = valueBuffer; |
| } |
| if (clear) { |
| for (final ArrowBuf buffer : buffers) { |
| buffer.getReferenceManager().retain(); |
| } |
| clear(); |
| } |
| return buffers; |
| } |
| |
| /** Validate the scalar values held by this vector. */ |
| public void validateScalars() { |
| // No validation by default. |
| } |
| |
| /** |
| * Construct a transfer pair of this vector and another vector of same type. |
| * |
| * @param field The field materialized by this vector. |
| * @param allocator allocator for the target vector |
| * @param callBack not used |
| * @return TransferPair |
| */ |
| @Override |
| public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { |
| return getTransferPair(field, allocator); |
| } |
| |
| /** |
| * Construct a transfer pair of this vector and another vector of same type. |
| * |
| * @param ref name of the target vector |
| * @param allocator allocator for the target vector |
| * @param callBack not used |
| * @return TransferPair |
| */ |
| @Override |
| public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { |
| return getTransferPair(ref, allocator); |
| } |
| |
| /** |
| * Construct a transfer pair of this vector and another vector of same type. |
| * |
| * @param allocator allocator for the target vector |
| * @return TransferPair |
| */ |
| @Override |
| public TransferPair getTransferPair(BufferAllocator allocator) { |
| return getTransferPair(getName(), allocator); |
| } |
| |
| /** |
| * Construct a transfer pair of this vector and another vector of same type. |
| * |
| * @param ref name of the target vector |
| * @param allocator allocator for the target vector |
| * @return TransferPair |
| */ |
| @Override |
| public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); |
| |
| /** |
| * Construct a transfer pair of this vector and another vector of same type. |
| * |
| * @param field The field materialized by this vector. |
| * @param allocator allocator for the target vector |
| * @return TransferPair |
| */ |
| @Override |
| public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); |
| |
| /** |
| * Transfer this vector'data to another vector. The memory associated with this vector is |
| * transferred to the allocator of target vector for accounting and management purposes. |
| * |
| * @param target destination vector for transfer |
| */ |
| public void transferTo(BaseVariableWidthVector target) { |
| compareTypes(target, "transferTo"); |
| target.clear(); |
| target.validityBuffer = transferBuffer(validityBuffer, target.allocator); |
| target.valueBuffer = transferBuffer(valueBuffer, target.allocator); |
| target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator); |
| target.setLastSet(this.lastSet); |
| if (this.valueCount > 0) { |
| target.setValueCount(this.valueCount); |
| } |
| clear(); |
| } |
| |
| /** |
| * Slice this vector at desired index and length and transfer the corresponding data to the target |
| * vector. |
| * |
| * @param startIndex start position of the split in source vector. |
| * @param length length of the split. |
| * @param target destination vector |
| */ |
| public void splitAndTransferTo(int startIndex, int length, BaseVariableWidthVector target) { |
| Preconditions.checkArgument( |
| startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, |
| "Invalid parameters startIndex: %s, length: %s for valueCount: %s", |
| startIndex, |
| length, |
| valueCount); |
| compareTypes(target, "splitAndTransferTo"); |
| target.clear(); |
| if (length > 0) { |
| splitAndTransferValidityBuffer(startIndex, length, target); |
| splitAndTransferOffsetBuffer(startIndex, length, target); |
| target.setLastSet(length - 1); |
| target.setValueCount(length); |
| } |
| } |
| |
| /** |
| * Transfer the offsets along with data. Unlike the data buffer, we cannot simply slice the offset |
| * buffer for split and transfer. The reason is that offsets in the target vector have to be |
| * adjusted and made relative to the staring offset in source vector from the start index of |
| * split. This is why, we need to explicitly allocate the offset buffer and set the adjusted |
| * offsets in the target vector. |
| */ |
| private void splitAndTransferOffsetBuffer( |
| int startIndex, int length, BaseVariableWidthVector target) { |
| final int start = getStartOffset(startIndex); |
| final int end = getStartOffset(startIndex + length); |
| final int dataLength = end - start; |
| |
| if (start == 0) { |
| final ArrowBuf slicedOffsetBuffer = |
| offsetBuffer.slice( |
| startIndex * ((long) OFFSET_WIDTH), (1 + length) * ((long) OFFSET_WIDTH)); |
| target.offsetBuffer = transferBuffer(slicedOffsetBuffer, target.allocator); |
| } else { |
| target.offsetBuffer = target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH); |
| for (int i = 0; i < length + 1; i++) { |
| final int relativeSourceOffset = getStartOffset(startIndex + i) - start; |
| target.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeSourceOffset); |
| } |
| } |
| final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength); |
| target.valueBuffer = transferBuffer(slicedBuffer, target.allocator); |
| } |
| |
| /* |
| * Transfer the validity. |
| */ |
| private void splitAndTransferValidityBuffer( |
| int startIndex, int length, BaseVariableWidthVector target) { |
| if (length <= 0) { |
| return; |
| } |
| |
| final int firstByteSource = BitVectorHelper.byteIndex(startIndex); |
| final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); |
| final int byteSizeTarget = getValidityBufferSizeFromCount(length); |
| final int offset = startIndex % 8; |
| |
| if (offset == 0) { |
| // slice |
| if (target.validityBuffer != null) { |
| target.validityBuffer.getReferenceManager().release(); |
| } |
| final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); |
| target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); |
| return; |
| } |
| |
| /* Copy data |
| * When the first bit starts from the middle of a byte (offset != 0), |
| * copy data from src BitVector. |
| * Each byte in the target is composed by a part in i-th byte, |
| * another part in (i+1)-th byte. |
| */ |
| target.allocateValidityBuffer(byteSizeTarget); |
| |
| for (int i = 0; i < byteSizeTarget - 1; i++) { |
| byte b1 = |
| BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); |
| byte b2 = |
| BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); |
| |
| target.validityBuffer.setByte(i, (b1 + b2)); |
| } |
| /* Copying the last piece is done in the following manner: |
| * if the source vector has 1 or more bytes remaining, we copy |
| * the last piece as a byte formed by shifting data |
| * from the current byte and the next byte. |
| * |
| * if the source vector has no more bytes remaining |
| * (we are at the last byte), we copy the last piece as a byte |
| * by shifting data from the current byte. |
| */ |
| if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { |
| byte b1 = |
| BitVectorHelper.getBitsFromCurrentByte( |
| this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); |
| byte b2 = |
| BitVectorHelper.getBitsFromNextByte( |
| this.validityBuffer, firstByteSource + byteSizeTarget, offset); |
| |
| target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); |
| } else { |
| byte b1 = |
| BitVectorHelper.getBitsFromCurrentByte( |
| this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); |
| target.validityBuffer.setByte(byteSizeTarget - 1, b1); |
| } |
| } |
| |
| /*----------------------------------------------------------------* |
| | | |
| | common getters and setters | |
| | | |
| *----------------------------------------------------------------*/ |
| |
| /** |
| * Get the number of elements that are null in the vector. |
| * |
| * @return the number of null elements. |
| */ |
| @Override |
| public int getNullCount() { |
| return BitVectorHelper.getNullCount(validityBuffer, valueCount); |
| } |
| |
| /** |
| * Check if the given index is within the current value capacity of the vector. |
| * |
| * @param index position to check |
| * @return true if index is within the current value capacity |
| */ |
| public boolean isSafe(int index) { |
| return index < getValueCapacity(); |
| } |
| |
| /** |
| * Check if element at given index is null. |
| * |
| * @param index position of element |
| * @return true if element at given index is null |
| */ |
| @Override |
| public boolean isNull(int index) { |
| return (isSet(index) == 0); |
| } |
| |
| /** |
| * Same as {@link #isNull(int)}. |
| * |
| * @param index position of element |
| * @return 1 if element at given index is not null, 0 otherwise |
| */ |
| public int isSet(int index) { |
| final int byteIndex = index >> 3; |
| final byte b = validityBuffer.getByte(byteIndex); |
| final int bitIndex = index & 7; |
| return (b >> bitIndex) & 0x01; |
| } |
| |
| /** |
| * Get the value count of vector. This will always be zero unless setValueCount(int) has been |
| * called prior to calling this. |
| * |
| * @return valueCount for the vector |
| */ |
| @Override |
| public int getValueCount() { |
| return valueCount; |
| } |
| |
| /** |
| * Sets the value count for the vector. |
| * |
| * @param valueCount value count |
| */ |
| @Override |
| public void setValueCount(int valueCount) { |
| assert valueCount >= 0; |
| this.valueCount = valueCount; |
| while (valueCount > getValueCapacity()) { |
| reallocValidityAndOffsetBuffers(); |
| } |
| fillHoles(valueCount); |
| lastSet = valueCount - 1; |
| setReaderAndWriterIndex(); |
| } |
| |
| /** |
| * Create holes in the vector upto the given index (exclusive). Holes will be created from the |
| * current last set position in the vector. |
| * |
| * @param index target index |
| */ |
| @Override |
| public void fillEmpties(int index) { |
| handleSafe(index, emptyByteArray.length); |
| fillHoles(index); |
| lastSet = index - 1; |
| } |
| |
| /** |
| * Set the index of last non-null element in the vector. It is important to call this method with |
| * appropriate value before calling {@link #setValueCount(int)}. |
| * |
| * @param value desired index of last non-null element. |
| */ |
| @Override |
| public void setLastSet(int value) { |
| lastSet = value; |
| } |
| |
| /** |
| * Get the index of last non-null element in the vector. |
| * |
| * @return index of the last non-null element |
| */ |
| @Override |
| public int getLastSet() { |
| return lastSet; |
| } |
| |
| /** |
| * Get the starting position (offset) in the data stream for a given element in the vector. |
| * |
| * @param index position of the element in the vector |
| * @return starting offset for the element |
| */ |
| public long getStartEnd(int index) { |
| return offsetBuffer.getLong((long) index * OFFSET_WIDTH); |
| } |
| |
| /** |
| * Mark the particular position in the vector as non-null. |
| * |
| * @param index position of the element. |
| */ |
| @Override |
| public void setIndexDefined(int index) { |
| // We need to check and realloc both validity and offset buffer |
| while (index >= getValueCapacity()) { |
| reallocValidityAndOffsetBuffers(); |
| } |
| BitVectorHelper.setBit(validityBuffer, index); |
| } |
| |
| /** |
| * Sets the value length for an element. |
| * |
| * @param index position of the element to set |
| * @param length length of the element |
| */ |
| @Override |
| public void setValueLengthSafe(int index, int length) { |
| assert index >= 0; |
| handleSafe(index, length); |
| fillHoles(index); |
| final int startOffset = getStartOffset(index); |
| offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length); |
| lastSet = index; |
| } |
| |
| /** |
| * Get the variable length element at specified index as Text. |
| * |
| * @param index position of element to get |
| * @return greater than 0 length for non-null element, 0 otherwise |
| */ |
| @Override |
| public int getValueLength(int index) { |
| assert index >= 0; |
| if (isSet(index) == 0) { |
| return 0; |
| } |
| final int startOffset = getStartOffset(index); |
| final int dataLength = getEndOffset(index) - startOffset; |
| return dataLength; |
| } |
| |
| /** |
| * Set the variable length element at the specified index to the supplied byte array. This is same |
| * as using {@link #set(int, byte[], int, int)} with start as 0 and length as value.length |
| * |
| * @param index position of the element to set |
| * @param value array of bytes to write |
| */ |
| @Override |
| public void set(int index, byte[] value) { |
| assert index >= 0; |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| setBytes(index, value, 0, value.length); |
| lastSet = index; |
| } |
| |
| /** |
| * Same as {@link #set(int, byte[])} except that it handles the case where index and length of new |
| * element are beyond the existing capacity of the vector. |
| * |
| * @param index position of the element to set |
| * @param value array of bytes to write |
| */ |
| @Override |
| public void setSafe(int index, byte[] value) { |
| assert index >= 0; |
| handleSafe(index, value.length); |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| setBytes(index, value, 0, value.length); |
| lastSet = index; |
| } |
| |
| /** |
| * Set the variable length element at the specified index to the supplied byte array. |
| * |
| * @param index position of the element to set |
| * @param value array of bytes to write |
| * @param start start index in array of bytes |
| * @param length length of data in array of bytes |
| */ |
| public void set(int index, byte[] value, int start, int length) { |
| assert index >= 0; |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| setBytes(index, value, start, length); |
| lastSet = index; |
| } |
| |
| /** |
| * Same as {@link #set(int, byte[], int, int)} except that it handles the case where index and |
| * length of new element are beyond the existing capacity of the vector. |
| * |
| * @param index position of the element to set |
| * @param value array of bytes to write |
| * @param start start index in array of bytes |
| * @param length length of data in array of bytes |
| */ |
| public void setSafe(int index, byte[] value, int start, int length) { |
| assert index >= 0; |
| handleSafe(index, length); |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| setBytes(index, value, start, length); |
| lastSet = index; |
| } |
| |
| /** |
| * Set the variable length element at the specified index to the content in supplied ByteBuffer. |
| * |
| * @param index position of the element to set |
| * @param value ByteBuffer with data |
| * @param start start index in ByteBuffer |
| * @param length length of data in ByteBuffer |
| */ |
| @Override |
| public void set(int index, ByteBuffer value, int start, int length) { |
| assert index >= 0; |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| final int startOffset = getStartOffset(index); |
| offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length); |
| valueBuffer.setBytes(startOffset, value, start, length); |
| lastSet = index; |
| } |
| |
| /** |
| * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the case where index and |
| * length of new element are beyond the existing capacity of the vector. |
| * |
| * @param index position of the element to set |
| * @param value ByteBuffer with data |
| * @param start start index in ByteBuffer |
| * @param length length of data in ByteBuffer |
| */ |
| public void setSafe(int index, ByteBuffer value, int start, int length) { |
| assert index >= 0; |
| handleSafe(index, length); |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| final int startOffset = getStartOffset(index); |
| offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length); |
| valueBuffer.setBytes(startOffset, value, start, length); |
| lastSet = index; |
| } |
| |
| /** |
| * Set the element at the given index to null. |
| * |
| * @param index position of element |
| */ |
| @Override |
| public void setNull(int index) { |
| // We need to check and realloc both validity and offset buffer |
| while (index >= getValueCapacity()) { |
| reallocValidityAndOffsetBuffers(); |
| } |
| BitVectorHelper.unsetBit(validityBuffer, index); |
| } |
| |
| /** |
| * Store the given value at a particular position in the vector. isSet indicates whether the value |
| * is NULL or not. |
| * |
| * @param index position of the new value |
| * @param isSet 0 for NULL value, 1 otherwise |
| * @param start start position of data in buffer |
| * @param end end position of data in buffer |
| * @param buffer data buffer containing the variable width element to be stored in the vector |
| */ |
| public void set(int index, int isSet, int start, int end, ArrowBuf buffer) { |
| assert index >= 0; |
| final int dataLength = end - start; |
| fillHoles(index); |
| BitVectorHelper.setValidityBit(validityBuffer, index, isSet); |
| final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); |
| offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); |
| valueBuffer.setBytes(startOffset, buffer, start, dataLength); |
| lastSet = index; |
| } |
| |
| /** |
| * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index |
| * is greater than or equal to current value capacity of the vector. |
| * |
| * @param index position of the new value |
| * @param isSet 0 for NULL value, 1 otherwise |
| * @param start start position of data in buffer |
| * @param end end position of data in buffer |
| * @param buffer data buffer containing the variable width element to be stored in the vector |
| */ |
| public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) { |
| assert index >= 0; |
| final int dataLength = end - start; |
| handleSafe(index, dataLength); |
| fillHoles(index); |
| BitVectorHelper.setValidityBit(validityBuffer, index, isSet); |
| final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); |
| offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); |
| valueBuffer.setBytes(startOffset, buffer, start, dataLength); |
| lastSet = index; |
| } |
| |
| /** |
| * Store the given value at a particular position in the vector. isSet indicates whether the value |
| * is NULL or not. |
| * |
| * @param index position of the new value |
| * @param start start position of data in buffer |
| * @param length length of data in buffer |
| * @param buffer data buffer containing the variable width element to be stored in the vector |
| */ |
| public void set(int index, int start, int length, ArrowBuf buffer) { |
| assert index >= 0; |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); |
| offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); |
| final ArrowBuf bb = buffer.slice(start, length); |
| valueBuffer.setBytes(startOffset, bb); |
| lastSet = index; |
| } |
| |
| /** |
| * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index |
| * is greater than or equal to current value capacity of the vector. |
| * |
| * @param index position of the new value |
| * @param start start position of data in buffer |
| * @param length length of data in buffer |
| * @param buffer data buffer containing the variable width element to be stored in the vector |
| */ |
| public void setSafe(int index, int start, int length, ArrowBuf buffer) { |
| assert index >= 0; |
| handleSafe(index, length); |
| fillHoles(index); |
| BitVectorHelper.setBit(validityBuffer, index); |
| final int startOffset = getStartOffset(index); |
| offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); |
| final ArrowBuf bb = buffer.slice(start, length); |
| valueBuffer.setBytes(startOffset, bb); |
| lastSet = index; |
| } |
| |
| /*----------------------------------------------------------------* |
| | | |
| | helper methods for setters | |
| | | |
| *----------------------------------------------------------------*/ |
| |
| protected final void fillHoles(int index) { |
| for (int i = lastSet + 1; i < index; i++) { |
| setBytes(i, emptyByteArray, 0, emptyByteArray.length); |
| } |
| lastSet = index - 1; |
| } |
| |
| protected final void setBytes(int index, byte[] value, int start, int length) { |
| /* end offset of current last element in the vector. this will |
| * be the start offset of new element we are trying to store. |
| */ |
| final int startOffset = getStartOffset(index); |
| /* set new end offset */ |
| offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); |
| /* store the var length data in value buffer */ |
| valueBuffer.setBytes(startOffset, value, start, length); |
| } |
| |
| public final int getStartOffset(int index) { |
| return offsetBuffer.getInt((long) index * OFFSET_WIDTH); |
| } |
| |
| protected final void handleSafe(int index, int dataLength) { |
| /* |
| * IMPORTANT: |
| * value buffer for variable length vectors moves independent |
| * of the companion validity and offset buffers. This is in |
| * contrast to what we have for fixed width vectors. |
| * |
| * Here there is no concept of getValueCapacity() in the |
| * data stream. getValueCapacity() is applicable only to validity |
| * and offset buffers. |
| * |
| * So even though we may have setup an initial capacity of 1024 |
| * elements in the vector, it is quite possible |
| * that we need to reAlloc() the data buffer when we are setting |
| * the 5th element in the vector simply because previous |
| * variable length elements have exhausted the buffer capacity. |
| * However, we really don't need to reAlloc() validity and |
| * offset buffers until we try to set the 1025th element |
| * This is why we do a separate check for safe methods to |
| * determine which buffer needs reallocation. |
| */ |
| while (index >= getValueCapacity()) { |
| reallocValidityAndOffsetBuffers(); |
| } |
| final long startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1); |
| final long targetCapacity = startOffset + dataLength; |
| if (valueBuffer.capacity() < targetCapacity) { |
| reallocDataBuffer(targetCapacity); |
| } |
| } |
| |
| /** |
| * Method used by Json Writer to read a variable width element from the variable width vector and |
| * write to Json. |
| * |
| * <p>This method should not be used externally. |
| * |
| * @param data buffer storing the variable width vector elements |
| * @param offset buffer storing the offsets of variable width vector elements |
| * @param index position of the element in the vector |
| * @return array of bytes |
| */ |
| public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) { |
| final int currentStartOffset = offset.getInt((long) index * OFFSET_WIDTH); |
| final int dataLength = offset.getInt((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset; |
| final byte[] result = new byte[dataLength]; |
| data.getBytes(currentStartOffset, result, 0, dataLength); |
| return result; |
| } |
| |
| /** |
| * Method used by Json Reader to explicitly set the offsets of the variable width vector data. The |
| * method takes care of allocating the memory for offsets if the caller hasn't done so. |
| * |
| * <p>This method should not be used externally. |
| * |
| * @param buffer ArrowBuf to store offsets for variable width elements |
| * @param allocator memory allocator |
| * @param valueCount number of elements |
| * @param index position of the element |
| * @param value offset of the element |
| * @return buffer holding the offsets |
| */ |
| public static ArrowBuf set( |
| ArrowBuf buffer, BufferAllocator allocator, int valueCount, int index, int value) { |
| if (buffer == null) { |
| buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH); |
| } |
| buffer.setInt((long) index * OFFSET_WIDTH, value); |
| if (index == (valueCount - 1)) { |
| buffer.writerIndex((long) valueCount * OFFSET_WIDTH); |
| } |
| |
| return buffer; |
| } |
| |
| /** |
| * Copy a cell value from a particular index in source vector to a particular position in this |
| * vector. |
| * |
| * @param fromIndex position to copy from in source vector |
| * @param thisIndex position to copy to in this vector |
| * @param from source vector |
| */ |
| @Override |
| public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { |
| Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); |
| if (from.isNull(fromIndex)) { |
| fillHoles(thisIndex); |
| BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); |
| final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH); |
| offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); |
| } else { |
| final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH); |
| final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH); |
| final int length = end - start; |
| fillHoles(thisIndex); |
| BitVectorHelper.setBit(this.validityBuffer, thisIndex); |
| final int copyStart = getStartOffset(thisIndex); |
| from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length); |
| offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); |
| } |
| lastSet = thisIndex; |
| } |
| |
| /** |
| * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the |
| * capacity of the vector needs to be expanded before copy. |
| * |
| * @param fromIndex position to copy from in source vector |
| * @param thisIndex position to copy to in this vector |
| * @param from source vector |
| */ |
| @Override |
| public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { |
| Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); |
| if (from.isNull(fromIndex)) { |
| handleSafe(thisIndex, 0); |
| fillHoles(thisIndex); |
| BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); |
| final int copyStart = getStartOffset(thisIndex); |
| offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); |
| } else { |
| final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH); |
| final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH); |
| final int length = end - start; |
| handleSafe(thisIndex, length); |
| fillHoles(thisIndex); |
| BitVectorHelper.setBit(this.validityBuffer, thisIndex); |
| final int copyStart = getStartOffset(thisIndex); |
| from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length); |
| offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); |
| } |
| lastSet = thisIndex; |
| } |
| |
| @Override |
| public ArrowBufPointer getDataPointer(int index) { |
| return getDataPointer(index, new ArrowBufPointer()); |
| } |
| |
| @Override |
| public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { |
| if (isNull(index)) { |
| reuse.set(null, 0, 0); |
| } else { |
| int offset = getStartOffset(index); |
| int length = getEndOffset(index) - offset; |
| reuse.set(valueBuffer, offset, length); |
| } |
| return reuse; |
| } |
| |
| @Override |
| public int hashCode(int index) { |
| return hashCode(index, null); |
| } |
| |
| @Override |
| public int hashCode(int index, ArrowBufHasher hasher) { |
| if (isNull(index)) { |
| return ArrowBufPointer.NULL_HASH_CODE; |
| } |
| final int start = getStartOffset(index); |
| final int end = getEndOffset(index); |
| return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end); |
| } |
| |
| @Override |
| public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) { |
| return visitor.visit(this, value); |
| } |
| |
| /** Gets the ending offset of a record, given its index. */ |
| public final int getEndOffset(int index) { |
| return offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH); |
| } |
| } |