blob: 0123686db478d0b4cde553afc744e6d1d7641aa8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
class TestCSVReader < Test::Unit::TestCase
include Helper::Buildable
include Helper::Omittable
sub_test_case("#read") do
def open_input(csv)
buffer = Arrow::Buffer.new(csv)
Arrow::BufferInputStream.new(buffer)
end
def test_default
require_gi(1, 42, 0)
table = Arrow::CSVReader.new(open_input(<<-CSV))
message,count
"Start",2
"Shutdown",9
CSV
columns = {
"message" => build_string_array(["Start", "Shutdown"]),
"count" => build_int64_array([2, 9]),
}
assert_equal(build_table(columns),
table.read)
end
sub_test_case("options") do
def test_add_column_type
options = Arrow::CSVReadOptions.new
options.add_column_type("count", Arrow::UInt8DataType.new)
options.add_column_type("valid", Arrow::BooleanDataType.new)
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
count,valid
2,1
9,0
CSV
columns = {
"count" => build_uint8_array([2, 9]),
"valid" => build_boolean_array([true, false]),
}
assert_equal(build_table(columns),
table.read)
end
def test_add_schema
options = Arrow::CSVReadOptions.new
fields = [
Arrow::Field.new("count", Arrow::UInt8DataType.new),
Arrow::Field.new("valid", Arrow::BooleanDataType.new),
]
schema = Arrow::Schema.new(fields)
options.add_schema(schema)
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
count,valid
2,1
9,0
CSV
columns = {
"count" => build_uint8_array([2, 9]),
"valid" => build_boolean_array([true, false]),
}
assert_equal(build_table(columns),
table.read)
end
def test_column_types
require_gi_bindings(3, 3, 1)
options = Arrow::CSVReadOptions.new
options.add_column_type("count", Arrow::UInt8DataType.new)
options.add_column_type("valid", Arrow::BooleanDataType.new)
assert_equal({
"count" => Arrow::UInt8DataType.new,
"valid" => Arrow::BooleanDataType.new,
},
options.column_types)
end
def test_null_values
options = Arrow::CSVReadOptions.new
null_values = ["2", "5"]
options.null_values = null_values
assert_equal(null_values, options.null_values)
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
message,count
"Start",2
"Shutdown",9
"Restart",5
CSV
columns = {
"message" => build_string_array(["Start", "Shutdown", "Restart"]),
"count" => build_int64_array([nil, 9, nil]),
}
assert_equal(build_table(columns),
table.read)
end
def test_add_null_value
options = Arrow::CSVReadOptions.new
null_values = ["2", "5"]
options.null_values = null_values
options.add_null_value("9")
assert_equal(null_values + ["9"], options.null_values)
end
def test_boolean_values
options = Arrow::CSVReadOptions.new
true_values = ["Start", "Restart"]
options.true_values = true_values
assert_equal(true_values, options.true_values)
false_values = ["Shutdown"]
options.false_values = false_values
assert_equal(false_values, options.false_values)
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
message,count
"Start",2
"Shutdown",9
"Restart",5
CSV
columns = {
"message" => build_boolean_array([true, false, true]),
"count" => build_int64_array([2, 9, 5]),
}
assert_equal(build_table(columns),
table.read)
end
def test_add_true_value
options = Arrow::CSVReadOptions.new
true_values = ["Start", "Restart"]
options.true_values = true_values
options.add_true_value("Shutdown")
assert_equal(true_values + ["Shutdown"], options.true_values)
end
def test_add_false_value
options = Arrow::CSVReadOptions.new
false_values = ["Start", "Restart"]
options.false_values = false_values
options.add_false_value("Shutdown")
assert_equal(false_values + ["Shutdown"], options.false_values)
end
def test_allow_null_strings
options = Arrow::CSVReadOptions.new
options.null_values = ["Start", "Restart"]
options.allow_null_strings = true
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
message,count
"Start",2
"Shutdown",9
"Restart",5
CSV
columns = {
"message" => build_string_array([nil, "Shutdown", nil]),
"count" => build_int64_array([2, 9, 5]),
}
assert_equal(build_table(columns),
table.read)
end
def test_n_skip_rows
options = Arrow::CSVReadOptions.new
options.n_skip_rows = 1
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
message1,message2
"Start1","Start2"
"Shutdown1","Shutdown2"
"Reboot1","Reboot2"
CSV
columns = {
"Start1" => build_string_array(["Shutdown1", "Reboot1"]),
"Start2" => build_string_array(["Shutdown2", "Reboot2"]),
}
assert_equal(build_table(columns),
table.read)
end
def test_column_names
options = Arrow::CSVReadOptions.new
column_names = ["message", "count"]
options.column_names = column_names
assert_equal(column_names, options.column_names)
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
"Start",2
"Shutdown",9
"Reboot",5
CSV
columns = {
"message" => build_string_array(["Start", "Shutdown", "Reboot"]),
"count" => build_int64_array([2, 9, 5]),
}
assert_equal(build_table(columns),
table.read)
end
def test_add_column_name
options = Arrow::CSVReadOptions.new
column_names = ["message", "count"]
options.column_names = column_names
options.add_column_name("score")
assert_equal(column_names + ["score"], options.column_names)
end
def test_generate_column_names
options = Arrow::CSVReadOptions.new
options.generate_column_names = true
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
"Start",2
"Shutdown",9
"Reboot",5
CSV
columns = {
"f0" => build_string_array(["Start", "Shutdown", "Reboot"]),
"f1" => build_int64_array([2, 9, 5]),
}
assert_equal(build_table(columns),
table.read)
end
end
end
end