blob: 1cfa81dbba3b3b661200ab5546d5634ecaa01e5d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector;
import java.io.Closeable;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.OutOfMemoryException;
import org.apache.arrow.vector.compare.VectorVisitor;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.util.CallBack;
import org.apache.arrow.vector.util.TransferPair;
import io.netty.buffer.ArrowBuf;
/**
* An abstraction that is used to store a sequence of values in an individual column.
*
* <p>A {@link ValueVector value vector} stores underlying data in-memory in a columnar fashion that is compact and
* efficient. The column whose data is stored, is referred by {@link #getField()}.
*
* <p>It is important that vector is allocated before attempting to read or write.
*
* <p>There are a few "rules" around vectors:
*
* <ul>
* <li>values need to be written in order (e.g. index 0, 1, 2, 5)</li>
* <li>null vectors start with all values as null before writing anything</li>
* <li>for variable width types, the offset vector should be all zeros before writing</li>
* <li>you must call setValueCount before a vector can be read</li>
* <li>you should never write to a vector once it has been read.</li>
* </ul>
*
* <p>Please note that the current implementation doesn't enforce those rules, hence we may find few places that
* deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector)
*
* <p>This interface "should" strive to guarantee this order of operation:
* <blockquote>
* allocate &gt; mutate &gt; setvaluecount &gt; access &gt; clear (or allocate to start the process over).
* </blockquote>
*/
public interface ValueVector extends Closeable, Iterable<ValueVector> {
/**
* Allocate new buffers. ValueVector implements logic to determine how much to allocate.
*
* @throws OutOfMemoryException Thrown if no memory can be allocated.
*/
void allocateNew() throws OutOfMemoryException;
/**
* Allocates new buffers. ValueVector implements logic to determine how much to allocate.
*
* @return Returns true if allocation was successful.
*/
boolean allocateNewSafe();
/**
* Allocate new buffer with double capacity, and copy data into the new buffer.
* Replace vector's buffer with new buffer, and release old one
*/
void reAlloc();
BufferAllocator getAllocator();
/**
* Set the initial record capacity.
*
* @param numRecords the initial record capacity.
*/
void setInitialCapacity(int numRecords);
/**
* Returns the maximum number of values that can be stored in this vector instance.
*
* @return the maximum number of values that can be stored in this vector instance.
*/
int getValueCapacity();
/**
* Alternative to clear(). Allows use as an AutoCloseable in try-with-resources.
*/
@Override
void close();
/**
* Release any owned ArrowBuf and reset the ValueVector to the initial state. If the
* vector has any child vectors, they will also be cleared.
*/
void clear();
/**
* Reset the ValueVector to the initial state without releasing any owned ArrowBuf.
* Buffer capacities will remain unchanged and any previous data will be zeroed out.
* This includes buffers for data, validity, offset, etc. If the vector has any
* child vectors, they will also be reset.
*/
void reset();
/**
* Get information about how this field is materialized.
*
* @return the field corresponding to this vector
*/
Field getField();
MinorType getMinorType();
/**
* To transfer quota responsibility.
*
* @param allocator the target allocator
* @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of
* the same type.
*/
TransferPair getTransferPair(BufferAllocator allocator);
TransferPair getTransferPair(String ref, BufferAllocator allocator);
TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack);
/**
* Makes a new transfer pair used to transfer underlying buffers.
*
* @param target the target for the transfer
* @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to transfer underlying
* buffers into the target vector.
*/
TransferPair makeTransferPair(ValueVector target);
/**
* Get a reader for this vector.
*
* @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports reading values
* from this vector.
*/
FieldReader getReader();
/**
* Get the number of bytes used by this vector.
*
* @return the number of bytes that is used by this vector instance.
*/
int getBufferSize();
/**
* Returns the number of bytes that is used by this vector if it holds the given number
* of values. The result will be the same as if setValueCount() were called, followed
* by calling getBufferSize(), but without any of the closing side-effects that setValueCount()
* implies wrt finishing off the population of a vector. Some operations might wish to use
* this to determine how much memory has been used by a vector so far, even though it is
* not finished being populated.
*
* @param valueCount the number of values to assume this vector contains
* @return the buffer size if this vector is holding valueCount values
*/
int getBufferSizeFor(int valueCount);
/**
* Return the underlying buffers associated with this vector. Note that this doesn't impact the reference counts for
* this buffer so it only should be used for in-context access. Also note that this buffer changes regularly thus
* external classes shouldn't hold a reference to it (unless they change it).
*
* @param clear Whether to clear vector before returning; the buffers will still be refcounted;
* but the returned array will be the only reference to them
* @return The underlying {@link io.netty.buffer.ArrowBuf buffers} that is used by this vector instance.
*/
ArrowBuf[] getBuffers(boolean clear);
/**
* Gets the underlying buffer associated with validity vector.
*
* @return buffer
*/
ArrowBuf getValidityBuffer();
/**
* Gets the underlying buffer associated with data vector.
*
* @return buffer
*/
ArrowBuf getDataBuffer();
/**
* Gets the underlying buffer associated with offset vector.
*
* @return buffer
*/
ArrowBuf getOffsetBuffer();
/**
* Gets the number of values.
*
* @return number of values in the vector
*/
int getValueCount();
/**
* Set number of values in the vector.
*/
void setValueCount(int valueCount);
/**
* Get friendly type object from the vector.
*
* @param index index of object to get
* @return friendly type object
*/
Object getObject(int index);
/**
* Returns number of null elements in the vector.
*
* @return number of null elements
*/
int getNullCount();
/**
* Check whether an element in the vector is null.
*
* @param index index to check for null
* @return true if element is null
*/
boolean isNull(int index);
/**
* Returns hashCode of element in index.
*/
int hashCode(int index);
/**
* Copy a cell value from a particular index in source vector to a particular
* position in this vector.
*
* @param fromIndex position to copy from in source vector
* @param thisIndex position to copy to in this vector
* @param from source vector
*/
void copyFrom(int fromIndex, int thisIndex, ValueVector from);
/**
* Same as {@link #copyFrom(int, int, ValueVector)} except that
* it handles the case when the capacity of the vector needs to be expanded
* before copy.
*
* @param fromIndex position to copy from in source vector
* @param thisIndex position to copy to in this vector
* @param from source vector
*/
void copyFromSafe(int fromIndex, int thisIndex, ValueVector from);
/**
* Accept a generic {@link VectorVisitor} and return the result.
* @param <OUT> the output result type.
* @param <IN> the input data together with visitor.
*/
<OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value);
}