blob: e87ed1dd3c03a0ee6aa306ebf981df9e092b4939 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
class CSVLoaderTest < Test::Unit::TestCase
include Helper::Fixture
def load_csv(input)
Arrow::CSVLoader.load(input, skip_lines: /^#/)
end
sub_test_case(".load") do
test("String: data: with header") do
data = fixture_path("with-header-float.csv").read
assert_equal(<<-TABLE, load_csv(data).to_s)
name score
0 alice 10.100000
1 bob 29.200000
2 chris -1.300000
TABLE
end
test("String: data: without header") do
data = fixture_path("without-header-float.csv").read
assert_equal(<<-TABLE, load_csv(data).to_s)
0 1
0 alice 10.100000
1 bob 29.200000
2 chris -1.300000
TABLE
end
test("String: path: with header") do
path = fixture_path("with-header-float.csv").to_s
assert_equal(<<-TABLE, load_csv(path).to_s)
name score
0 alice 10.100000
1 bob 29.200000
2 chris -1.300000
TABLE
end
test("String: path: without header") do
path = fixture_path("without-header-float.csv").to_s
assert_equal(<<-TABLE, load_csv(path).to_s)
0 1
0 alice 10.100000
1 bob 29.200000
2 chris -1.300000
TABLE
end
test("Pathname: with header") do
path = fixture_path("with-header-float.csv")
assert_equal(<<-TABLE, load_csv(path).to_s)
name score
0 alice 10.100000
1 bob 29.200000
2 chris -1.300000
TABLE
end
test("Pathname: without header") do
path = fixture_path("without-header-float.csv")
assert_equal(<<-TABLE, load_csv(path).to_s)
0 1
0 alice 10.100000
1 bob 29.200000
2 chris -1.300000
TABLE
end
test("null: with double quote") do
path = fixture_path("null-with-double-quote.csv").to_s
assert_equal(<<-TABLE, load_csv(path).to_s)
name score
0 alice 10
1 bob
2 chris -1
TABLE
end
test("null: without double quote") do
path = fixture_path("null-without-double-quote.csv").to_s
assert_equal(<<-TABLE, load_csv(path).to_s)
name score
0 alice 10
1 bob
2 chris -1
TABLE
end
test("number: float, integer") do
path = fixture_path("float-integer.csv").to_s
assert_equal([2.9, 10, -1.1],
load_csv(path)[:score].to_a)
end
test("number: integer, float") do
path = fixture_path("integer-float.csv").to_s
assert_equal([10.0, 2.9, -1.1],
load_csv(path)[:score].to_a)
end
end
sub_test_case("CSVReader") do
def load_csv(data, **options)
Arrow::CSVLoader.load(data, **options)
end
sub_test_case(":headers") do
test("true") do
values = Arrow::StringArray.new(["a", "b", "c"])
assert_equal(Arrow::Table.new(value: values),
load_csv(<<-CSV, headers: true))
value
a
b
c
CSV
end
test(":first_line") do
values = Arrow::StringArray.new(["a", "b", "c"])
assert_equal(Arrow::Table.new(value: values),
load_csv(<<-CSV, headers: :first_line))
value
a
b
c
CSV
end
test("truthy") do
values = Arrow::StringArray.new(["a", "b", "c"])
assert_equal(Arrow::Table.new(value: values),
load_csv(<<-CSV, headers: 0))
value
a
b
c
CSV
end
test("Array of column names") do
values = Arrow::StringArray.new(["a", "b", "c"])
assert_equal(Arrow::Table.new(column: values),
load_csv(<<-CSV, headers: ["column"]))
a
b
c
CSV
end
test("false") do
values = Arrow::StringArray.new(["a", "b", "c"])
assert_equal(Arrow::Table.new(f0: values),
load_csv(<<-CSV, headers: false))
a
b
c
CSV
end
test("nil") do
values = Arrow::StringArray.new(["a", "b", "c"])
assert_equal(Arrow::Table.new(f0: values),
load_csv(<<-CSV, headers: nil))
a
b
c
CSV
end
test("string") do
values = Arrow::StringArray.new(["a", "b", "c"])
assert_equal(Arrow::Table.new(column: values),
load_csv(<<-CSV, headers: "column"))
a
b
c
CSV
end
end
test(":column_types") do
assert_equal(Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])),
load_csv(<<-CSV, column_types: {count: :uint16}))
count
1
2
4
CSV
end
test(":schema") do
table = Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4]))
assert_equal(table,
load_csv(<<-CSV, schema: table.schema))
count
1
2
4
CSV
end
test(":encoding") do
messages = [
"\u3042", # U+3042 HIRAGANA LETTER A
"\u3044", # U+3044 HIRAGANA LETTER I
"\u3046", # U+3046 HIRAGANA LETTER U
]
table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
encoding = "cp932"
assert_equal(table,
load_csv((["message"] + messages).join("\n").encode(encoding),
schema: table.schema,
encoding: encoding))
end
test(":encoding and :compression") do
messages = [
"\u3042", # U+3042 HIRAGANA LETTER A
"\u3044", # U+3044 HIRAGANA LETTER I
"\u3046", # U+3046 HIRAGANA LETTER U
]
table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
encoding = "cp932"
csv = (["message"] + messages).join("\n").encode(encoding)
assert_equal(table,
load_csv(Zlib::Deflate.deflate(csv),
schema: table.schema,
encoding: encoding,
compression: :gzip))
end
end
end