blob: a8bc386822bd488270b15f8422109743ef5b7363 [file] [log] [blame]
.. Licensed to the Apache Software Foundation (ASF) under one
.. or more contributor license agreements. See the NOTICE file
.. distributed with this work for additional information
.. regarding copyright ownership. The ASF licenses this file
.. to you under the Apache License, Version 2.0 (the
.. "License"); you may not use this file except in compliance
.. with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
.. software distributed under the License is distributed on an
.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
.. KIND, either express or implied. See the License for the
.. specific language governing permissions and limitations
.. under the License.
=================
Data manipulation
=================
Recipes related to compare, filtering or transforming data.
.. contents::
Concatenate VectorSchemaRoots
=============================
In some cases, VectorSchemaRoot needs to be modeled as a container. To accomplish
this, you can use ``VectorSchemaRootAppender.append``. The following code
creates two roots, then concatenates them together:
.. testcode::
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.VectorSchemaRootAppender;
import static java.util.Arrays.asList;
Field column_one = new Field("column-one", FieldType.nullable(new ArrowType.Int(32, true)), null);
Schema schema = new Schema(asList(column_one));
try (
BufferAllocator allocator = new RootAllocator();
VectorSchemaRoot rootOne = VectorSchemaRoot.create(schema, allocator);
VectorSchemaRoot rootTwo = VectorSchemaRoot.create(schema, allocator);
VectorSchemaRoot result = VectorSchemaRoot.create(schema, allocator);
) {
IntVector appenderOne = (IntVector) rootOne.getVector(0);
rootOne.allocateNew();
appenderOne.set(0, 100);
appenderOne.set(1, 20);
rootOne.setRowCount(2);
IntVector appenderTwo = (IntVector) rootTwo.getVector(0);
rootTwo.allocateNew();
appenderTwo.set(0, 34);
appenderTwo.set(1, 75);
rootTwo.setRowCount(2);
result.allocateNew();
VectorSchemaRootAppender.append(result, rootOne, rootTwo);
System.out.print(result.contentToTSVString());
}
.. testoutput::
column-one
100
20
34
75
Concatenate Value Vectors
=========================
In some cases, we need to concatenate two value vectors into one. To accomplish
this, we can use `VectorAppender`_. This mutates the initial ValueVector.
.. testcode::
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.util.VectorAppender;
try (
BufferAllocator allocator = new RootAllocator();
IntVector initialValues = new IntVector("initialValues", allocator);
IntVector toAppend = new IntVector("toAppend", allocator);
) {
initialValues.allocateNew(2);
initialValues.set(0, 1);
initialValues.set(1, 2);
initialValues.setValueCount(2);
System.out.println("Initial IntVector: " + initialValues);
toAppend.allocateNew(4);
toAppend.set(1, 4);
toAppend.set(3, 6);
toAppend.setValueCount(4);
System.out.println("IntVector to Append: " + toAppend);
VectorAppender appenderUtil = new VectorAppender(initialValues);
toAppend.accept(appenderUtil, null);
System.out.println("IntVector Result: " + initialValues);
}
.. testoutput::
Initial IntVector: [1, 2]
IntVector to Append: [null, 4, null, 6]
IntVector Result: [1, 2, null, 4, null, 6]
Compare Vectors for Field Equality
==================================
.. testcode::
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.compare.TypeEqualsVisitor;
import org.apache.arrow.memory.RootAllocator;
try(
BufferAllocator allocator = new RootAllocator();
IntVector right = new IntVector("int", allocator);
) {
right.allocateNew(3);
right.set(0, 10);
right.set(1, 20);
right.set(2, 30);
right.setValueCount(3);
IntVector left1 = new IntVector("int", allocator);
IntVector left2 = new IntVector("int2", allocator);
TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
System.out.println(visitor.equals(left1));
System.out.println(visitor.equals(left2));
}
.. testoutput::
true
false
Compare Vectors Equality
========================
.. testcode::
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.compare.VectorEqualsVisitor;
try(
BufferAllocator allocator = new RootAllocator();
IntVector vector1 = new IntVector("vector1", allocator);
IntVector vector2 = new IntVector("vector1", allocator);
IntVector vector3 = new IntVector("vector1", allocator)
) {
vector1.allocateNew(1);
vector1.set(0, 10);
vector1.setValueCount(1);
vector2.allocateNew(1);
vector2.set(0, 10);
vector2.setValueCount(1);
vector3.allocateNew(1);
vector3.set(0, 20);
vector3.setValueCount(1);
VectorEqualsVisitor visitor = new VectorEqualsVisitor();
System.out.println(visitor.vectorEquals(vector1, vector2));
System.out.println(visitor.vectorEquals(vector1, vector3));
}
.. testoutput::
true
false
Compare Values on the Array
===========================
Comparing two values at the given indices in the vectors:
.. testcode::
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.memory.RootAllocator;
try(
BufferAllocator allocator = new RootAllocator();
VarCharVector vec = new VarCharVector("valueindexcomparator", allocator);
) {
vec.allocateNew(3);
vec.setValueCount(3);
vec.set(0, "ba".getBytes());
vec.set(1, "abc".getBytes());
vec.set(2, "aa".getBytes());
VectorValueComparator<VarCharVector> valueComparator = DefaultVectorComparators.createDefaultComparator(vec);
valueComparator.attachVector(vec);
System.out.println(valueComparator.compare(0, 1) > 0);
System.out.println(valueComparator.compare(1, 2) < 0);
}
.. testoutput::
true
false
Consider that if we need our own comparator we could extend VectorValueComparator
and override compareNotNull method as needed
Search Values on the Array
==========================
Linear Search - O(n)
********************
Algorithm: org.apache.arrow.algorithm.search.VectorSearcher#linearSearch - O(n)
.. testcode::
import org.apache.arrow.algorithm.search.VectorSearcher;
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.memory.RootAllocator;
try(
BufferAllocator allocator = new RootAllocator();
IntVector linearSearchVector = new IntVector("linearSearchVector", allocator);
) {
linearSearchVector.allocateNew(10);
linearSearchVector.setValueCount(10);
for (int i = 0; i < 10; i++) {
linearSearchVector.set(i, i);
}
VectorValueComparator<IntVector> comparatorInt = DefaultVectorComparators.createDefaultComparator(linearSearchVector);
int result = VectorSearcher.linearSearch(linearSearchVector, comparatorInt, linearSearchVector, 3);
System.out.println(result);
}
.. testoutput::
3
Binary Search - O(log(n))
*************************
Algorithm: org.apache.arrow.algorithm.search.VectorSearcher#binarySearch - O(log(n))
.. testcode::
import org.apache.arrow.algorithm.search.VectorSearcher;
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.memory.RootAllocator;
try(
BufferAllocator allocator = new RootAllocator();
IntVector binarySearchVector = new IntVector("", allocator);
) {
binarySearchVector.allocateNew(10);
binarySearchVector.setValueCount(10);
for (int i = 0; i < 10; i++) {
binarySearchVector.set(i, i);
}
VectorValueComparator<IntVector> comparatorInt = DefaultVectorComparators.createDefaultComparator(binarySearchVector);
int result = VectorSearcher.binarySearch(binarySearchVector, comparatorInt, binarySearchVector, 3);
System.out.println(result);
}
.. testoutput::
3
Sort Values on the Array
========================
In-place Sorter - O(nlog(n))
****************************
Sorting by manipulating the original vector.
Algorithm: org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter - O(nlog(n))
.. testcode::
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.memory.RootAllocator;
try(
BufferAllocator allocator = new RootAllocator();
IntVector intVectorNotSorted = new IntVector("intvectornotsorted", allocator);
) {
intVectorNotSorted.allocateNew(3);
intVectorNotSorted.setValueCount(3);
intVectorNotSorted.set(0, 10);
intVectorNotSorted.set(1, 8);
intVectorNotSorted.setNull(2);
FixedWidthInPlaceVectorSorter<IntVector> sorter = new FixedWidthInPlaceVectorSorter<IntVector>();
VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(intVectorNotSorted);
sorter.sortInPlace(intVectorNotSorted, comparator);
System.out.println(intVectorNotSorted);
}
.. testoutput::
[null, 8, 10]
Out-place Sorter - O(nlog(n))
*****************************
Sorting by copies vector elements to a new vector in sorted order - O(nlog(n))
Algorithm: : org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.
FixedWidthOutOfPlaceVectorSorter & VariableWidthOutOfPlaceVectorSor
.. testcode::
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.FixedWidthOutOfPlaceVectorSorter;
import org.apache.arrow.algorithm.sort.OutOfPlaceVectorSorter;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.memory.RootAllocator;
try(
BufferAllocator allocator = new RootAllocator();
IntVector intVectorNotSorted = new IntVector("intvectornotsorted", allocator);
IntVector intVectorSorted = (IntVector) intVectorNotSorted.getField()
.getFieldType().createNewSingleVector("new-out-of-place-sorter",
allocator, null);
) {
intVectorNotSorted.allocateNew(3);
intVectorNotSorted.setValueCount(3);
intVectorNotSorted.set(0, 10);
intVectorNotSorted.set(1, 8);
intVectorNotSorted.setNull(2);
OutOfPlaceVectorSorter<IntVector> sorterOutOfPlaceSorter = new FixedWidthOutOfPlaceVectorSorter<>();
VectorValueComparator<IntVector> comparatorOutOfPlaceSorter = DefaultVectorComparators.createDefaultComparator(intVectorNotSorted);
intVectorSorted.allocateNew(intVectorNotSorted.getValueCount());
intVectorSorted.setValueCount(intVectorNotSorted.getValueCount());
sorterOutOfPlaceSorter.sortOutOfPlace(intVectorNotSorted, intVectorSorted, comparatorOutOfPlaceSorter);
System.out.println(intVectorSorted);
}
.. testoutput::
[null, 8, 10]
.. _`VectorAppender`: https://github.com/apache/arrow/blob/main/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java