blob: 5fd4f9fb3e0c4f0925c909e5a725ce0935e8fc1f [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
module ValuesDenseUnionArrayTests
def build_data_type(type, type_codes)
field_description = {}
if type.is_a?(Hash)
field_description = field_description.merge(type)
else
field_description[:type] = type
end
Arrow::DenseUnionDataType.new(fields: [
field_description.merge(name: "0"),
field_description.merge(name: "1"),
],
type_codes: type_codes)
end
def build_array(type, values)
type_codes = [0, 1]
data_type = build_data_type(type, type_codes)
type_ids = []
offsets = []
arrays = data_type.fields.collect do |field|
sub_schema = Arrow::Schema.new([field])
sub_records = []
values.each do |value|
next if value.nil?
next unless value.key?(field.name)
sub_records << [value[field.name]]
end
sub_record_batch = Arrow::RecordBatch.new(sub_schema,
sub_records)
sub_record_batch.columns[0].data
end
values.each do |value|
if value.key?("0")
type_id = type_codes[0]
type_ids << type_id
offsets << (type_ids.count(type_id) - 1)
elsif value.key?("1")
type_id = type_codes[1]
type_ids << type_id
offsets << (type_ids.count(type_id) - 1)
end
end
Arrow::DenseUnionArray.new(data_type,
Arrow::Int8Array.new(type_ids),
Arrow::Int32Array.new(offsets),
arrays)
end
def remove_field_names(values)
values.collect do |value|
if value.nil?
value
else
value.values[0]
end
end
end
def test_null
values = [
{"0" => nil},
]
target = build(:null, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_boolean
values = [
{"0" => true},
{"1" => nil},
]
target = build(:boolean, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_int8
values = [
{"0" => -(2 ** 7)},
{"1" => nil},
]
target = build(:int8, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_uint8
values = [
{"0" => (2 ** 8) - 1},
{"1" => nil},
]
target = build(:uint8, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_int16
values = [
{"0" => -(2 ** 15)},
{"1" => nil},
]
target = build(:int16, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_uint16
values = [
{"0" => (2 ** 16) - 1},
{"1" => nil},
]
target = build(:uint16, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_int32
values = [
{"0" => -(2 ** 31)},
{"1" => nil},
]
target = build(:int32, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_uint32
values = [
{"0" => (2 ** 32) - 1},
{"1" => nil},
]
target = build(:uint32, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_int64
values = [
{"0" => -(2 ** 63)},
{"1" => nil},
]
target = build(:int64, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_uint64
values = [
{"0" => (2 ** 64) - 1},
{"1" => nil},
]
target = build(:uint64, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_float
values = [
{"0" => -1.0},
{"1" => nil},
]
target = build(:float, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_double
values = [
{"0" => -1.0},
{"1" => nil},
]
target = build(:double, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_binary
values = [
{"0" => "\xff".b},
{"1" => nil},
]
target = build(:binary, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_string
values = [
{"0" => "Ruby"},
{"1" => nil},
]
target = build(:string, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_date32
values = [
{"0" => Date.new(1960, 1, 1)},
{"1" => nil},
]
target = build(:date32, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_date64
values = [
{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
{"1" => nil},
]
target = build(:date64, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_timestamp_second
values = [
{"0" => Time.parse("1960-01-01T02:09:30Z")},
{"1" => nil},
]
target = build({
type: :timestamp,
unit: :second,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_timestamp_milli
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123Z")},
{"1" => nil},
]
target = build({
type: :timestamp,
unit: :milli,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_timestamp_micro
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
{"1" => nil},
]
target = build({
type: :timestamp,
unit: :micro,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_timestamp_nano
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
{"1" => nil},
]
target = build({
type: :timestamp,
unit: :nano,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_time32_second
unit = Arrow::TimeUnit::SECOND
values = [
# 00:10:00
{"0" => Arrow::Time.new(unit, 60 * 10)},
{"1" => nil},
]
target = build({
type: :time32,
unit: :second,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_time32_milli
unit = Arrow::TimeUnit::MILLI
values = [
# 00:10:00.123
{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
{"1" => nil},
]
target = build({
type: :time32,
unit: :milli,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_time64_micro
unit = Arrow::TimeUnit::MICRO
values = [
# 00:10:00.123456
{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
{"1" => nil},
]
target = build({
type: :time64,
unit: :micro,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_time64_nano
unit = Arrow::TimeUnit::NANO
values = [
# 00:10:00.123456789
{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
{"1" => nil},
]
target = build({
type: :time64,
unit: :nano,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_decimal128
values = [
{"0" => BigDecimal("92.92")},
{"1" => nil},
]
target = build({
type: :decimal128,
precision: 8,
scale: 2,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_decimal256
values = [
{"0" => BigDecimal("92.92")},
{"1" => nil},
]
target = build({
type: :decimal256,
precision: 38,
scale: 2,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_month_interval
values = [
{"0" => 1},
{"1" => nil},
]
target = build(:month_interval, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_day_time_interval
values = [
{"0" => {day: 1, millisecond: 100}},
{"1" => nil},
]
target = build(:day_time_interval, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_month_day_nano_interval
values = [
{"0" => {month: 1, day: 1, nanosecond: 100}},
{"1" => nil},
]
target = build(:month_day_nano_interval, values)
assert_equal(remove_field_names(values),
target.values)
end
def test_list
values = [
{"0" => [true, nil, false]},
{"1" => nil},
]
target = build({
type: :list,
field: {
name: :sub_element,
type: :boolean,
},
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_struct
values = [
{"0" => {"sub_field" => true}},
{"1" => nil},
{"0" => {"sub_field" => nil}},
]
target = build({
type: :struct,
fields: [
{
name: :sub_field,
type: :boolean,
},
],
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_map
values = [
{"0" => {"key1" => true, "key2" => nil}},
{"1" => nil},
]
target = build({
type: :map,
key: :string,
item: :boolean,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
def test_sparse_union
values = [
{"0" => {"field1" => true}},
{"1" => nil},
{"0" => {"field2" => 29}},
{"0" => {"field2" => nil}},
]
target = build({
type: :sparse_union,
fields: [
{
name: :field1,
type: :boolean,
},
{
name: :field2,
type: :uint8,
},
],
type_codes: [0, 1],
},
values)
assert_equal(remove_field_names(remove_field_names(values)),
target.values)
end
def test_dense_union
values = [
{"0" => {"field1" => true}},
{"1" => nil},
{"0" => {"field2" => 29}},
{"0" => {"field2" => nil}},
]
target = build({
type: :dense_union,
fields: [
{
name: :field1,
type: :boolean,
},
{
name: :field2,
type: :uint8,
},
],
type_codes: [0, 1],
},
values)
assert_equal(remove_field_names(remove_field_names(values)),
target.values)
end
def test_dictionary
values = [
{"0" => "Ruby"},
{"1" => nil},
{"0" => "GLib"},
]
target = build({
type: :dictionary,
index_data_type: :int8,
value_data_type: :string,
ordered: false,
},
values)
assert_equal(remove_field_names(values),
target.values)
end
end
class ValuesArrayDenseUnionArrayTest < Test::Unit::TestCase
include ValuesDenseUnionArrayTests
def build(type, values)
build_array(type, values)
end
end
class ValuesChunkedArrayDenseUnionArrayTest < Test::Unit::TestCase
include ValuesDenseUnionArrayTests
def build(type, values)
Arrow::ChunkedArray.new([build_array(type, values)])
end
end