blob: 47c307659f02e90d701f08161aa93ce9d54b0c4d [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include <cstdint>
#include <memory>
#include <numeric>
#include <string>
#include <vector>
#include "arrow/array.h"
#include "arrow/buffer.h"
#include "arrow/builder.h"
#include "arrow/ipc/test-common.h"
#include "arrow/memory_pool.h"
#include "arrow/pretty_print.h"
#include "arrow/record_batch.h"
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
#include "arrow/util/bit-util.h"
namespace arrow {
namespace ipc {
namespace test {
void CompareArraysDetailed(int index, const Array& result, const Array& expected) {
if (!expected.Equals(result)) {
std::stringstream pp_result;
std::stringstream pp_expected;
ASSERT_OK(PrettyPrint(expected, 0, &pp_expected));
ASSERT_OK(PrettyPrint(result, 0, &pp_result));
FAIL() << "Index: " << index << " Expected: " << pp_expected.str()
<< "\nGot: " << pp_result.str();
}
}
void CompareBatchColumnsDetailed(const RecordBatch& result, const RecordBatch& expected) {
for (int i = 0; i < expected.num_columns(); ++i) {
auto left = result.column(i);
auto right = expected.column(i);
CompareArraysDetailed(i, *left, *right);
}
}
Status MakeRandomInt32Array(int64_t length, bool include_nulls, MemoryPool* pool,
std::shared_ptr<Array>* out, uint32_t seed) {
random::RandomArrayGenerator rand(seed);
const double null_probability = include_nulls ? 0.5 : 0.0;
*out = rand.Int32(length, 0, 1000, null_probability);
return Status::OK();
}
Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_lists,
bool include_nulls, MemoryPool* pool,
std::shared_ptr<Array>* out) {
// Create the null list values
std::vector<uint8_t> valid_lists(num_lists);
const double null_percent = include_nulls ? 0.1 : 0;
random_null_bytes(num_lists, null_percent, valid_lists.data());
// Create list offsets
const int max_list_size = 10;
std::vector<int32_t> list_sizes(num_lists, 0);
std::vector<int32_t> offsets(
num_lists + 1, 0); // +1 so we can shift for nulls. See partial sum below.
const uint32_t seed = static_cast<uint32_t>(child_array->length());
if (num_lists > 0) {
rand_uniform_int(num_lists, seed, 0, max_list_size, list_sizes.data());
// make sure sizes are consistent with null
std::transform(list_sizes.begin(), list_sizes.end(), valid_lists.begin(),
list_sizes.begin(),
[](int32_t size, int32_t valid) { return valid == 0 ? 0 : size; });
std::partial_sum(list_sizes.begin(), list_sizes.end(), ++offsets.begin());
// Force invariants
const int32_t child_length = static_cast<int32_t>(child_array->length());
offsets[0] = 0;
std::replace_if(offsets.begin(), offsets.end(),
[child_length](int32_t offset) { return offset > child_length; },
child_length);
}
offsets[num_lists] = static_cast<int32_t>(child_array->length());
/// TODO(wesm): Implement support for nulls in ListArray::FromArrays
std::shared_ptr<Buffer> null_bitmap, offsets_buffer;
RETURN_NOT_OK(GetBitmapFromVector(valid_lists, &null_bitmap));
RETURN_NOT_OK(CopyBufferFromVector(offsets, pool, &offsets_buffer));
*out = std::make_shared<ListArray>(list(child_array->type()), num_lists, offsets_buffer,
child_array, null_bitmap, kUnknownNullCount);
return ValidateArray(**out);
}
Status MakeRandomMapArray(const std::shared_ptr<Array>& key_array,
const std::shared_ptr<Array>& item_array, int num_maps,
bool include_nulls, MemoryPool* pool,
std::shared_ptr<Array>* out) {
auto pair_type = struct_(
{field("key", key_array->type(), false), field("value", item_array->type())});
auto pair_array = std::make_shared<StructArray>(pair_type, num_maps,
ArrayVector{key_array, item_array});
RETURN_NOT_OK(MakeRandomListArray(pair_array, num_maps, include_nulls, pool, out));
auto map_data = (*out)->data();
map_data->type = map(key_array->type(), item_array->type());
out->reset(new MapArray(map_data));
return Status::OK();
}
Status MakeRandomBooleanArray(const int length, bool include_nulls,
std::shared_ptr<Array>* out) {
std::vector<uint8_t> values(length);
random_null_bytes(length, 0.5, values.data());
std::shared_ptr<Buffer> data;
RETURN_NOT_OK(BitUtil::BytesToBits(values, default_memory_pool(), &data));
if (include_nulls) {
std::vector<uint8_t> valid_bytes(length);
std::shared_ptr<Buffer> null_bitmap;
RETURN_NOT_OK(BitUtil::BytesToBits(valid_bytes, default_memory_pool(), &null_bitmap));
random_null_bytes(length, 0.1, valid_bytes.data());
*out = std::make_shared<BooleanArray>(length, data, null_bitmap, -1);
} else {
*out = std::make_shared<BooleanArray>(length, data, NULLPTR, 0);
}
return Status::OK();
}
Status MakeBooleanBatchSized(const int length, std::shared_ptr<RecordBatch>* out) {
// Make the schema
auto f0 = field("f0", boolean());
auto f1 = field("f1", boolean());
auto schema = ::arrow::schema({f0, f1});
std::shared_ptr<Array> a0, a1;
RETURN_NOT_OK(MakeRandomBooleanArray(length, true, &a0));
RETURN_NOT_OK(MakeRandomBooleanArray(length, false, &a1));
*out = RecordBatch::Make(schema, length, {a0, a1});
return Status::OK();
}
Status MakeBooleanBatch(std::shared_ptr<RecordBatch>* out) {
return MakeBooleanBatchSized(1000, out);
}
Status MakeIntBatchSized(int length, std::shared_ptr<RecordBatch>* out, uint32_t seed) {
// Make the schema
auto f0 = field("f0", int32());
auto f1 = field("f1", int32());
auto schema = ::arrow::schema({f0, f1});
// Example data
std::shared_ptr<Array> a0, a1;
MemoryPool* pool = default_memory_pool();
RETURN_NOT_OK(MakeRandomInt32Array(length, false, pool, &a0, seed));
RETURN_NOT_OK(MakeRandomInt32Array(length, true, pool, &a1, seed + 1));
*out = RecordBatch::Make(schema, length, {a0, a1});
return Status::OK();
}
Status MakeIntRecordBatch(std::shared_ptr<RecordBatch>* out) {
return MakeIntBatchSized(10, out);
}
Status MakeRandomStringArray(int64_t length, bool include_nulls, MemoryPool* pool,
std::shared_ptr<Array>* out) {
const std::vector<std::string> values = {"", "", "abc", "123",
"efg", "456!@#!@#", "12312"};
StringBuilder builder(pool);
const size_t values_len = values.size();
for (int64_t i = 0; i < length; ++i) {
int64_t values_index = i % values_len;
if (include_nulls && values_index == 0) {
RETURN_NOT_OK(builder.AppendNull());
} else {
const auto& value = values[values_index];
RETURN_NOT_OK(builder.Append(value));
}
}
return builder.Finish(out);
}
template <class Builder, class RawType>
static Status MakeBinaryArrayWithUniqueValues(int64_t length, bool include_nulls,
MemoryPool* pool,
std::shared_ptr<Array>* out) {
Builder builder(pool);
for (int64_t i = 0; i < length; ++i) {
if (include_nulls && (i % 7 == 0)) {
RETURN_NOT_OK(builder.AppendNull());
} else {
const std::string value = std::to_string(i);
RETURN_NOT_OK(builder.Append(reinterpret_cast<const RawType*>(value.data()),
static_cast<int32_t>(value.size())));
}
}
return builder.Finish(out);
}
Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out, bool with_nulls) {
const int64_t length = 500;
auto string_type = utf8();
auto binary_type = binary();
auto f0 = field("f0", string_type);
auto f1 = field("f1", binary_type);
auto schema = ::arrow::schema({f0, f1});
std::shared_ptr<Array> a0, a1;
MemoryPool* pool = default_memory_pool();
// Quirk with RETURN_NOT_OK macro and templated functions
{
auto s = MakeBinaryArrayWithUniqueValues<StringBuilder, char>(length, with_nulls,
pool, &a0);
RETURN_NOT_OK(s);
}
{
auto s = MakeBinaryArrayWithUniqueValues<BinaryBuilder, uint8_t>(length, with_nulls,
pool, &a1);
RETURN_NOT_OK(s);
}
*out = RecordBatch::Make(schema, length, {a0, a1});
return Status::OK();
}
Status MakeStringTypesRecordBatchWithNulls(std::shared_ptr<RecordBatch>* out) {
return MakeStringTypesRecordBatch(out, true);
}
Status MakeNullRecordBatch(std::shared_ptr<RecordBatch>* out) {
const int64_t length = 500;
auto f0 = field("f0", null());
auto schema = ::arrow::schema({f0});
std::shared_ptr<Array> a0 = std::make_shared<NullArray>(length);
*out = RecordBatch::Make(schema, length, {a0});
return Status::OK();
}
Status MakeListRecordBatch(std::shared_ptr<RecordBatch>* out) {
// Make the schema
auto f0 = field("f0", list(int32()));
auto f1 = field("f1", list(list(int32())));
auto f2 = field("f2", int32());
auto schema = ::arrow::schema({f0, f1, f2});
// Example data
MemoryPool* pool = default_memory_pool();
const int length = 200;
std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
const bool include_nulls = true;
RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &leaf_values));
RETURN_NOT_OK(
MakeRandomListArray(leaf_values, length, include_nulls, pool, &list_array));
RETURN_NOT_OK(
MakeRandomListArray(list_array, length, include_nulls, pool, &list_list_array));
RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array));
*out = RecordBatch::Make(schema, length, {list_array, list_list_array, flat_array});
return Status::OK();
}
Status MakeFixedSizeListRecordBatch(std::shared_ptr<RecordBatch>* out) {
// Make the schema
auto f0 = field("f0", fixed_size_list(int32(), 1));
auto f1 = field("f1", fixed_size_list(list(int32()), 3));
auto f2 = field("f2", int32());
auto schema = ::arrow::schema({f0, f1, f2});
// Example data
MemoryPool* pool = default_memory_pool();
const int length = 200;
std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
const bool include_nulls = true;
RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &leaf_values));
RETURN_NOT_OK(
MakeRandomListArray(leaf_values, length * 3, include_nulls, pool, &list_array));
list_list_array = std::make_shared<FixedSizeListArray>(f1->type(), length, list_array);
list_array = std::make_shared<FixedSizeListArray>(f0->type(), length,
leaf_values->Slice(0, length));
RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array));
*out = RecordBatch::Make(schema, length, {list_array, list_list_array, flat_array});
return Status::OK();
}
Status MakeZeroLengthRecordBatch(std::shared_ptr<RecordBatch>* out) {
// Make the schema
auto f0 = field("f0", list(int32()));
auto f1 = field("f1", list(list(int32())));
auto f2 = field("f2", int32());
auto schema = ::arrow::schema({f0, f1, f2});
// Example data
MemoryPool* pool = default_memory_pool();
const bool include_nulls = true;
std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
RETURN_NOT_OK(MakeRandomInt32Array(0, include_nulls, pool, &leaf_values));
RETURN_NOT_OK(MakeRandomListArray(leaf_values, 0, include_nulls, pool, &list_array));
RETURN_NOT_OK(
MakeRandomListArray(list_array, 0, include_nulls, pool, &list_list_array));
RETURN_NOT_OK(MakeRandomInt32Array(0, include_nulls, pool, &flat_array));
*out = RecordBatch::Make(schema, 0, {list_array, list_list_array, flat_array});
return Status::OK();
}
Status MakeNonNullRecordBatch(std::shared_ptr<RecordBatch>* out) {
// Make the schema
auto f0 = field("f0", list(int32()));
auto f1 = field("f1", list(list(int32())));
auto f2 = field("f2", int32());
auto schema = ::arrow::schema({f0, f1, f2});
// Example data
MemoryPool* pool = default_memory_pool();
const int length = 50;
std::shared_ptr<Array> leaf_values, list_array, list_list_array, flat_array;
RETURN_NOT_OK(MakeRandomInt32Array(1000, true, pool, &leaf_values));
bool include_nulls = false;
RETURN_NOT_OK(
MakeRandomListArray(leaf_values, length, include_nulls, pool, &list_array));
RETURN_NOT_OK(
MakeRandomListArray(list_array, length, include_nulls, pool, &list_list_array));
RETURN_NOT_OK(MakeRandomInt32Array(length, include_nulls, pool, &flat_array));
*out = RecordBatch::Make(schema, length, {list_array, list_list_array, flat_array});
return Status::OK();
}
Status MakeDeeplyNestedList(std::shared_ptr<RecordBatch>* out) {
const int batch_length = 5;
auto type = int32();
MemoryPool* pool = default_memory_pool();
std::shared_ptr<Array> array;
const bool include_nulls = true;
RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &array));
for (int i = 0; i < 63; ++i) {
type = std::static_pointer_cast<DataType>(list(type));
RETURN_NOT_OK(MakeRandomListArray(array, batch_length, include_nulls, pool, &array));
}
auto f0 = field("f0", type);
auto schema = ::arrow::schema({f0});
std::vector<std::shared_ptr<Array>> arrays = {array};
*out = RecordBatch::Make(schema, batch_length, arrays);
return Status::OK();
}
Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
// reuse constructed list columns
std::shared_ptr<RecordBatch> list_batch;
RETURN_NOT_OK(MakeListRecordBatch(&list_batch));
std::vector<std::shared_ptr<Array>> columns = {
list_batch->column(0), list_batch->column(1), list_batch->column(2)};
auto list_schema = list_batch->schema();
// Define schema
std::shared_ptr<DataType> type(new StructType(
{list_schema->field(0), list_schema->field(1), list_schema->field(2)}));
auto f0 = field("non_null_struct", type);
auto f1 = field("null_struct", type);
auto schema = ::arrow::schema({f0, f1});
// construct individual nullable/non-nullable struct arrays
std::shared_ptr<Array> no_nulls(new StructArray(type, list_batch->num_rows(), columns));
std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
null_bytes[0] = 0;
std::shared_ptr<Buffer> null_bitmask;
RETURN_NOT_OK(BitUtil::BytesToBits(null_bytes, default_memory_pool(), &null_bitmask));
std::shared_ptr<Array> with_nulls(
new StructArray(type, list_batch->num_rows(), columns, null_bitmask, 1));
// construct batch
std::vector<std::shared_ptr<Array>> arrays = {no_nulls, with_nulls};
*out = RecordBatch::Make(schema, list_batch->num_rows(), arrays);
return Status::OK();
}
Status MakeUnion(std::shared_ptr<RecordBatch>* out) {
// Define schema
std::vector<std::shared_ptr<Field>> union_types(
{field("u0", int32()), field("u1", uint8())});
std::vector<uint8_t> type_codes = {5, 10};
auto sparse_type =
std::make_shared<UnionType>(union_types, type_codes, UnionMode::SPARSE);
auto dense_type =
std::make_shared<UnionType>(union_types, type_codes, UnionMode::DENSE);
auto f0 = field("sparse_nonnull", sparse_type, false);
auto f1 = field("sparse", sparse_type);
auto f2 = field("dense", dense_type);
auto schema = ::arrow::schema({f0, f1, f2});
// Create data
std::vector<std::shared_ptr<Array>> sparse_children(2);
std::vector<std::shared_ptr<Array>> dense_children(2);
const int64_t length = 7;
std::shared_ptr<Buffer> type_ids_buffer;
std::vector<uint8_t> type_ids = {5, 10, 5, 5, 10, 10, 5};
RETURN_NOT_OK(CopyBufferFromVector(type_ids, default_memory_pool(), &type_ids_buffer));
std::vector<int32_t> u0_values = {0, 1, 2, 3, 4, 5, 6};
ArrayFromVector<Int32Type, int32_t>(u0_values, &sparse_children[0]);
std::vector<uint8_t> u1_values = {10, 11, 12, 13, 14, 15, 16};
ArrayFromVector<UInt8Type, uint8_t>(u1_values, &sparse_children[1]);
// dense children
u0_values = {0, 2, 3, 7};
ArrayFromVector<Int32Type, int32_t>(u0_values, &dense_children[0]);
u1_values = {11, 14, 15};
ArrayFromVector<UInt8Type, uint8_t>(u1_values, &dense_children[1]);
std::shared_ptr<Buffer> offsets_buffer;
std::vector<int32_t> offsets = {0, 0, 1, 2, 1, 2, 3};
RETURN_NOT_OK(CopyBufferFromVector(offsets, default_memory_pool(), &offsets_buffer));
std::vector<uint8_t> null_bytes(length, 1);
null_bytes[2] = 0;
std::shared_ptr<Buffer> null_bitmask;
RETURN_NOT_OK(BitUtil::BytesToBits(null_bytes, default_memory_pool(), &null_bitmask));
// construct individual nullable/non-nullable struct arrays
auto sparse_no_nulls =
std::make_shared<UnionArray>(sparse_type, length, sparse_children, type_ids_buffer);
auto sparse = std::make_shared<UnionArray>(sparse_type, length, sparse_children,
type_ids_buffer, NULLPTR, null_bitmask, 1);
auto dense =
std::make_shared<UnionArray>(dense_type, length, dense_children, type_ids_buffer,
offsets_buffer, null_bitmask, 1);
// construct batch
std::vector<std::shared_ptr<Array>> arrays = {sparse_no_nulls, sparse, dense};
*out = RecordBatch::Make(schema, length, arrays);
return Status::OK();
}
Status MakeDictionary(std::shared_ptr<RecordBatch>* out) {
const int64_t length = 6;
std::vector<bool> is_valid = {true, true, false, true, true, true};
auto dict_ty = utf8();
auto dict1 = ArrayFromJSON(dict_ty, "[\"foo\", \"bar\", \"baz\"]");
auto dict2 = ArrayFromJSON(dict_ty, "[\"fo\", \"bap\", \"bop\", \"qup\"]");
auto f0_type = arrow::dictionary(arrow::int32(), dict_ty);
auto f1_type = arrow::dictionary(arrow::int8(), dict_ty, true);
auto f2_type = arrow::dictionary(arrow::int32(), dict_ty);
std::shared_ptr<Array> indices0, indices1, indices2;
std::vector<int32_t> indices0_values = {1, 2, -1, 0, 2, 0};
std::vector<int8_t> indices1_values = {0, 0, 2, 2, 1, 1};
std::vector<int32_t> indices2_values = {3, 0, 2, 1, 0, 2};
ArrayFromVector<Int32Type, int32_t>(is_valid, indices0_values, &indices0);
ArrayFromVector<Int8Type, int8_t>(is_valid, indices1_values, &indices1);
ArrayFromVector<Int32Type, int32_t>(is_valid, indices2_values, &indices2);
auto a0 = std::make_shared<DictionaryArray>(f0_type, indices0, dict1);
auto a1 = std::make_shared<DictionaryArray>(f1_type, indices1, dict1);
auto a2 = std::make_shared<DictionaryArray>(f2_type, indices2, dict2);
// Lists of dictionary-encoded strings
auto f3_type = list(f1_type);
auto indices3 = ArrayFromJSON(int8(), "[0, 1, 2, 0, 1, 1, 2, 1, 0]");
auto offsets3 = ArrayFromJSON(int32(), "[0, 0, 2, 2, 5, 6, 9]");
std::shared_ptr<Buffer> null_bitmap;
RETURN_NOT_OK(GetBitmapFromVector(is_valid, &null_bitmap));
std::shared_ptr<Array> a3 = std::make_shared<ListArray>(
f3_type, length, std::static_pointer_cast<PrimitiveArray>(offsets3)->values(),
std::make_shared<DictionaryArray>(f1_type, indices3, dict1), null_bitmap, 1);
// Dictionary-encoded lists of integers
auto dict4_ty = list(int8());
auto f4_type = dictionary(int8(), dict4_ty);
auto indices4 = ArrayFromJSON(int8(), "[0, 1, 2, 0, 2, 2]");
auto dict4 = ArrayFromJSON(dict4_ty, "[[44, 55], [], [66]]");
auto a4 = std::make_shared<DictionaryArray>(f4_type, indices4, dict4);
// construct batch
auto schema = ::arrow::schema(
{field("dict1", f0_type), field("dict2", f1_type), field("dict3", f2_type),
field("list<encoded utf8>", f3_type), field("encoded list<int8>", f4_type)});
*out = RecordBatch::Make(schema, length, {a0, a1, a2, a3, a4});
return Status::OK();
}
Status MakeDictionaryFlat(std::shared_ptr<RecordBatch>* out) {
const int64_t length = 6;
std::vector<bool> is_valid = {true, true, false, true, true, true};
auto dict_ty = utf8();
auto dict1 = ArrayFromJSON(dict_ty, "[\"foo\", \"bar\", \"baz\"]");
auto dict2 = ArrayFromJSON(dict_ty, "[\"foo\", \"bar\", \"baz\", \"qux\"]");
auto f0_type = arrow::dictionary(arrow::int32(), dict_ty);
auto f1_type = arrow::dictionary(arrow::int8(), dict_ty);
auto f2_type = arrow::dictionary(arrow::int32(), dict_ty);
std::shared_ptr<Array> indices0, indices1, indices2;
std::vector<int32_t> indices0_values = {1, 2, -1, 0, 2, 0};
std::vector<int8_t> indices1_values = {0, 0, 2, 2, 1, 1};
std::vector<int32_t> indices2_values = {3, 0, 2, 1, 0, 2};
ArrayFromVector<Int32Type, int32_t>(is_valid, indices0_values, &indices0);
ArrayFromVector<Int8Type, int8_t>(is_valid, indices1_values, &indices1);
ArrayFromVector<Int32Type, int32_t>(is_valid, indices2_values, &indices2);
auto a0 = std::make_shared<DictionaryArray>(f0_type, indices0, dict1);
auto a1 = std::make_shared<DictionaryArray>(f1_type, indices1, dict1);
auto a2 = std::make_shared<DictionaryArray>(f2_type, indices2, dict2);
// construct batch
auto schema = ::arrow::schema(
{field("dict1", f0_type), field("dict2", f1_type), field("dict3", f2_type)});
std::vector<std::shared_ptr<Array>> arrays = {a0, a1, a2};
*out = RecordBatch::Make(schema, length, arrays);
return Status::OK();
}
Status MakeDates(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", date32());
auto f1 = field("f1", date64());
auto schema = ::arrow::schema({f0, f1});
std::vector<int32_t> date32_values = {0, 1, 2, 3, 4, 5, 6};
std::shared_ptr<Array> date32_array;
ArrayFromVector<Date32Type, int32_t>(is_valid, date32_values, &date32_array);
std::vector<int64_t> date64_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000,
1489274000000};
std::shared_ptr<Array> date64_array;
ArrayFromVector<Date64Type, int64_t>(is_valid, date64_values, &date64_array);
*out = RecordBatch::Make(schema, date32_array->length(), {date32_array, date64_array});
return Status::OK();
}
Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", timestamp(TimeUnit::MILLI));
auto f1 = field("f1", timestamp(TimeUnit::NANO, "America/New_York"));
auto f2 = field("f2", timestamp(TimeUnit::SECOND));
auto schema = ::arrow::schema({f0, f1, f2});
std::vector<int64_t> ts_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000};
std::shared_ptr<Array> a0, a1, a2;
ArrayFromVector<TimestampType, int64_t>(f0->type(), is_valid, ts_values, &a0);
ArrayFromVector<TimestampType, int64_t>(f1->type(), is_valid, ts_values, &a1);
ArrayFromVector<TimestampType, int64_t>(f2->type(), is_valid, ts_values, &a2);
*out = RecordBatch::Make(schema, a0->length(), {a0, a1, a2});
return Status::OK();
}
Status MakeIntervals(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", duration(TimeUnit::MILLI));
auto f1 = field("f1", duration(TimeUnit::NANO));
auto f2 = field("f2", duration(TimeUnit::SECOND));
auto f3 = field("f3", day_time_interval());
auto f4 = field("f4", month_interval());
auto schema = ::arrow::schema({f0, f1, f2, f3, f4});
std::vector<int64_t> ts_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000};
std::shared_ptr<Array> a0, a1, a2, a3, a4;
ArrayFromVector<DurationType, int64_t>(f0->type(), is_valid, ts_values, &a0);
ArrayFromVector<DurationType, int64_t>(f1->type(), is_valid, ts_values, &a1);
ArrayFromVector<DurationType, int64_t>(f2->type(), is_valid, ts_values, &a2);
ArrayFromVector<DayTimeIntervalType, DayTimeIntervalType::DayMilliseconds>(
f3->type(), is_valid, {{0, 0}, {0, 1}, {1, 1}, {2, 1}, {3, 4}, {-1, -1}}, &a3);
ArrayFromVector<MonthIntervalType, int32_t>(f4->type(), is_valid, {0, -1, 1, 2, -2, 24},
&a4);
*out = RecordBatch::Make(schema, a0->length(), {a0, a1, a2, a3, a4});
return Status::OK();
}
Status MakeTimes(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", time32(TimeUnit::MILLI));
auto f1 = field("f1", time64(TimeUnit::NANO));
auto f2 = field("f2", time32(TimeUnit::SECOND));
auto f3 = field("f3", time64(TimeUnit::NANO));
auto schema = ::arrow::schema({f0, f1, f2, f3});
std::vector<int32_t> t32_values = {1489269000, 1489270000, 1489271000,
1489272000, 1489272000, 1489273000};
std::vector<int64_t> t64_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000};
std::shared_ptr<Array> a0, a1, a2, a3;
ArrayFromVector<Time32Type, int32_t>(f0->type(), is_valid, t32_values, &a0);
ArrayFromVector<Time64Type, int64_t>(f1->type(), is_valid, t64_values, &a1);
ArrayFromVector<Time32Type, int32_t>(f2->type(), is_valid, t32_values, &a2);
ArrayFromVector<Time64Type, int64_t>(f3->type(), is_valid, t64_values, &a3);
*out = RecordBatch::Make(schema, a0->length(), {a0, a1, a2, a3});
return Status::OK();
}
template <typename BuilderType, typename T>
static void AppendValues(const std::vector<bool>& is_valid, const std::vector<T>& values,
BuilderType* builder) {
for (size_t i = 0; i < values.size(); ++i) {
if (is_valid[i]) {
ASSERT_OK(builder->Append(values[i]));
} else {
ASSERT_OK(builder->AppendNull());
}
}
}
Status MakeFWBinary(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false};
auto f0 = field("f0", fixed_size_binary(4));
auto f1 = field("f1", fixed_size_binary(0));
auto schema = ::arrow::schema({f0, f1});
std::shared_ptr<Array> a1, a2;
FixedSizeBinaryBuilder b1(f0->type());
FixedSizeBinaryBuilder b2(f1->type());
std::vector<std::string> values1 = {"foo1", "foo2", "foo3", "foo4"};
AppendValues(is_valid, values1, &b1);
std::vector<std::string> values2 = {"", "", "", ""};
AppendValues(is_valid, values2, &b2);
RETURN_NOT_OK(b1.Finish(&a1));
RETURN_NOT_OK(b2.Finish(&a2));
*out = RecordBatch::Make(schema, a1->length(), {a1, a2});
return Status::OK();
}
Status MakeDecimal(std::shared_ptr<RecordBatch>* out) {
constexpr int kDecimalPrecision = 38;
auto type = decimal(kDecimalPrecision, 4);
auto f0 = field("f0", type);
auto f1 = field("f1", type);
auto schema = ::arrow::schema({f0, f1});
constexpr int kDecimalSize = 16;
constexpr int length = 10;
std::shared_ptr<Buffer> data, is_valid;
std::vector<uint8_t> is_valid_bytes(length);
RETURN_NOT_OK(AllocateBuffer(kDecimalSize * length, &data));
random_decimals(length, 1, kDecimalPrecision, data->mutable_data());
random_null_bytes(length, 0.1, is_valid_bytes.data());
RETURN_NOT_OK(BitUtil::BytesToBits(is_valid_bytes, default_memory_pool(), &is_valid));
auto a1 = std::make_shared<Decimal128Array>(f0->type(), length, data, is_valid,
kUnknownNullCount);
auto a2 = std::make_shared<Decimal128Array>(f1->type(), length, data);
*out = RecordBatch::Make(schema, length, {a1, a2});
return Status::OK();
}
Status MakeNull(std::shared_ptr<RecordBatch>* out) {
auto f0 = field("f0", null());
// Also put a non-null field to make sure we handle the null array buffers properly
auto f1 = field("f1", int64());
auto schema = ::arrow::schema({f0, f1});
auto a1 = std::make_shared<NullArray>(10);
std::vector<int64_t> int_values = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
std::vector<bool> is_valid = {true, true, true, false, false,
true, true, true, true, true};
std::shared_ptr<Array> a2;
ArrayFromVector<Int64Type, int64_t>(f1->type(), is_valid, int_values, &a2);
*out = RecordBatch::Make(schema, a1->length(), {a1, a2});
return Status::OK();
}
} // namespace test
} // namespace ipc
} // namespace arrow