blob: 6d16f25ece6a9005a8ae2d293678c818246ce455 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
module RawRecordsDenseUnionArrayTests
def build_schema(type, type_codes)
field_description = {}
if type.is_a?(Hash)
field_description = field_description.merge(type)
else
field_description[:type] = type
end
{
column: {
type: :dense_union,
fields: [
field_description.merge(name: "0"),
field_description.merge(name: "1"),
],
type_codes: type_codes,
},
}
end
# TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
def build_record_batch(type, records)
type_codes = [0, 1]
schema = Arrow::Schema.new(build_schema(type, type_codes))
type_ids = []
offsets = []
arrays = schema.fields[0].data_type.fields.collect do |field|
sub_schema = Arrow::Schema.new([field])
sub_records = []
records.each do |record|
column = record[0]
next if column.nil?
next unless column.key?(field.name)
sub_records << [column[field.name]]
end
sub_record_batch = Arrow::RecordBatch.new(sub_schema,
sub_records)
sub_record_batch.columns[0].data
end
records.each do |record|
column = record[0]
if column.key?("0")
type_id = type_codes[0]
type_ids << type_id
offsets << (type_ids.count(type_id) - 1)
elsif column.key?("1")
type_id = type_codes[1]
type_ids << type_id
offsets << (type_ids.count(type_id) - 1)
end
end
union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
Arrow::Int8Array.new(type_ids),
Arrow::Int32Array.new(offsets),
arrays)
schema = Arrow::Schema.new(column: union_array.value_data_type)
Arrow::RecordBatch.new(schema,
records.size,
[union_array])
end
def test_null
records = [
[{"0" => nil}],
]
target = build(:null, records)
assert_equal(records, target.raw_records)
end
def test_boolean
records = [
[{"0" => true}],
[{"1" => nil}],
]
target = build(:boolean, records)
assert_equal(records, target.raw_records)
end
def test_int8
records = [
[{"0" => -(2 ** 7)}],
[{"1" => nil}],
]
target = build(:int8, records)
assert_equal(records, target.raw_records)
end
def test_uint8
records = [
[{"0" => (2 ** 8) - 1}],
[{"1" => nil}],
]
target = build(:uint8, records)
assert_equal(records, target.raw_records)
end
def test_int16
records = [
[{"0" => -(2 ** 15)}],
[{"1" => nil}],
]
target = build(:int16, records)
assert_equal(records, target.raw_records)
end
def test_uint16
records = [
[{"0" => (2 ** 16) - 1}],
[{"1" => nil}],
]
target = build(:uint16, records)
assert_equal(records, target.raw_records)
end
def test_int32
records = [
[{"0" => -(2 ** 31)}],
[{"1" => nil}],
]
target = build(:int32, records)
assert_equal(records, target.raw_records)
end
def test_uint32
records = [
[{"0" => (2 ** 32) - 1}],
[{"1" => nil}],
]
target = build(:uint32, records)
assert_equal(records, target.raw_records)
end
def test_int64
records = [
[{"0" => -(2 ** 63)}],
[{"1" => nil}],
]
target = build(:int64, records)
assert_equal(records, target.raw_records)
end
def test_uint64
records = [
[{"0" => (2 ** 64) - 1}],
[{"1" => nil}],
]
target = build(:uint64, records)
assert_equal(records, target.raw_records)
end
def test_float
records = [
[{"0" => -1.0}],
[{"1" => nil}],
]
target = build(:float, records)
assert_equal(records, target.raw_records)
end
def test_double
records = [
[{"0" => -1.0}],
[{"1" => nil}],
]
target = build(:double, records)
assert_equal(records, target.raw_records)
end
def test_binary
records = [
[{"0" => "\xff".b}],
[{"1" => nil}],
]
target = build(:binary, records)
assert_equal(records, target.raw_records)
end
def test_string
records = [
[{"0" => "Ruby"}],
[{"1" => nil}],
]
target = build(:string, records)
assert_equal(records, target.raw_records)
end
def test_date32
records = [
[{"0" => Date.new(1960, 1, 1)}],
[{"1" => nil}],
]
target = build(:date32, records)
assert_equal(records, target.raw_records)
end
def test_date64
records = [
[{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
[{"1" => nil}],
]
target = build(:date64, records)
assert_equal(records, target.raw_records)
end
def test_timestamp_second
records = [
[{"0" => Time.parse("1960-01-01T02:09:30Z")}],
[{"1" => nil}],
]
target = build({
type: :timestamp,
unit: :second,
},
records)
assert_equal(records, target.raw_records)
end
def test_timestamp_milli
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
[{"1" => nil}],
]
target = build({
type: :timestamp,
unit: :milli,
},
records)
assert_equal(records, target.raw_records)
end
def test_timestamp_micro
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
[{"1" => nil}],
]
target = build({
type: :timestamp,
unit: :micro,
},
records)
assert_equal(records, target.raw_records)
end
def test_timestamp_nano
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
[{"1" => nil}],
]
target = build({
type: :timestamp,
unit: :nano,
},
records)
assert_equal(records, target.raw_records)
end
def test_time32_second
unit = Arrow::TimeUnit::SECOND
records = [
# 00:10:00
[{"0" => Arrow::Time.new(unit, 60 * 10)}],
[{"1" => nil}],
]
target = build({
type: :time32,
unit: :second,
},
records)
assert_equal(records, target.raw_records)
end
def test_time32_milli
unit = Arrow::TimeUnit::MILLI
records = [
# 00:10:00.123
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
[{"1" => nil}],
]
target = build({
type: :time32,
unit: :milli,
},
records)
assert_equal(records, target.raw_records)
end
def test_time64_micro
unit = Arrow::TimeUnit::MICRO
records = [
# 00:10:00.123456
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
[{"1" => nil}],
]
target = build({
type: :time64,
unit: :micro,
},
records)
assert_equal(records, target.raw_records)
end
def test_time64_nano
unit = Arrow::TimeUnit::NANO
records = [
# 00:10:00.123456789
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
[{"1" => nil}],
]
target = build({
type: :time64,
unit: :nano,
},
records)
assert_equal(records, target.raw_records)
end
def test_decimal128
records = [
[{"0" => BigDecimal("92.92")}],
[{"1" => nil}],
]
target = build({
type: :decimal128,
precision: 8,
scale: 2,
},
records)
assert_equal(records, target.raw_records)
end
def test_decimal256
records = [
[{"0" => BigDecimal("92.92")}],
[{"1" => nil}],
]
target = build({
type: :decimal256,
precision: 38,
scale: 2,
},
records)
assert_equal(records, target.raw_records)
end
def test_list
records = [
[{"0" => [true, nil, false]}],
[{"1" => nil}],
]
target = build({
type: :list,
field: {
name: :sub_element,
type: :boolean,
},
},
records)
assert_equal(records, target.raw_records)
end
def test_struct
records = [
[{"0" => {"sub_field" => true}}],
[{"1" => nil}],
[{"0" => {"sub_field" => nil}}],
]
target = build({
type: :struct,
fields: [
{
name: :sub_field,
type: :boolean,
},
],
},
records)
assert_equal(records, target.raw_records)
end
def test_sparse_union
omit("Need to add support for SparseUnionArrayBuilder")
records = [
[{"0" => {"field1" => true}}],
[{"1" => nil}],
[{"0" => {"field2" => nil}}],
]
target = build({
type: :sparse_union,
fields: [
{
name: :field1,
type: :boolean,
},
{
name: :field2,
type: :uint8,
},
],
type_codes: [0, 1],
},
records)
assert_equal(records, target.raw_records)
end
def test_dense_union
omit("Need to add support for DenseUnionArrayBuilder")
records = [
[{"0" => {"field1" => true}}],
[{"1" => nil}],
[{"0" => {"field2" => nil}}],
]
target = build({
type: :dense_union,
fields: [
{
name: :field1,
type: :boolean,
},
{
name: :field2,
type: :uint8,
},
],
type_codes: [0, 1],
},
records)
assert_equal(records, target.raw_records)
end
def test_dictionary
omit("Need to add support for DictionaryArrayBuilder")
records = [
[{"0" => "Ruby"}],
[{"1" => nil}],
[{"0" => "GLib"}],
]
dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
target = build({
type: :dictionary,
index_data_type: :int8,
dictionary: dictionary,
ordered: true,
},
records)
assert_equal(records, target.raw_records)
end
end
class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
include RawRecordsDenseUnionArrayTests
def build(type, records)
build_record_batch(type, records)
end
end
class RawRecordsTableDenseUnionArrayTest < Test::Unit::TestCase
include RawRecordsDenseUnionArrayTests
def build(type, records)
build_record_batch(type, records).to_table
end
end