| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| class TableTest < Test::Unit::TestCase |
| include Helper::Fixture |
| |
| def setup |
| @count_field = Arrow::Field.new("count", :uint8) |
| @visible_field = Arrow::Field.new("visible", :boolean) |
| schema = Arrow::Schema.new([@count_field, @visible_field]) |
| count_arrays = [ |
| Arrow::UInt8Array.new([1, 2]), |
| Arrow::UInt8Array.new([4, 8, 16]), |
| Arrow::UInt8Array.new([32, 64]), |
| Arrow::UInt8Array.new([128]), |
| ] |
| visible_arrays = [ |
| Arrow::BooleanArray.new([true, false, nil]), |
| Arrow::BooleanArray.new([true]), |
| Arrow::BooleanArray.new([true, false]), |
| Arrow::BooleanArray.new([nil]), |
| Arrow::BooleanArray.new([nil]), |
| ] |
| @count_array = Arrow::ChunkedArray.new(count_arrays) |
| @visible_array = Arrow::ChunkedArray.new(visible_arrays) |
| @table = Arrow::Table.new(schema, [@count_array, @visible_array]) |
| end |
| |
| test("#columns") do |
| assert_equal([ |
| Arrow::Column.new(@table, 0), |
| Arrow::Column.new(@table, 1), |
| ], |
| @table.columns) |
| end |
| |
| sub_test_case("#slice") do |
| test("Arrow::BooleanArray") do |
| target_rows_raw = [nil, true, true, false, true, false, true, true] |
| target_rows = Arrow::BooleanArray.new(target_rows_raw) |
| assert_equal(<<-TABLE, @table.slice(target_rows).to_s) |
| count visible |
| 0 (null) (null) |
| 1 2 false |
| 2 4 (null) |
| 3 16 true |
| 4 64 (null) |
| 5 128 (null) |
| TABLE |
| end |
| |
| test("Array: boolean") do |
| target_rows_raw = [nil, true, true, false, true, false, true, true] |
| assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s) |
| count visible |
| 0 (null) (null) |
| 1 2 false |
| 2 4 (null) |
| 3 16 true |
| 4 64 (null) |
| 5 128 (null) |
| TABLE |
| end |
| |
| test("Integer: positive") do |
| assert_equal({"count" => 128, "visible" => nil}, |
| @table.slice(@table.n_rows - 1).to_h) |
| end |
| |
| test("Integer: negative") do |
| assert_equal({"count" => 1, "visible" => true}, |
| @table.slice(-@table.n_rows).to_h) |
| end |
| |
| test("Integer: out of index") do |
| assert_equal([ |
| nil, |
| nil, |
| ], |
| [ |
| @table.slice(@table.n_rows), |
| @table.slice(-(@table.n_rows + 1)), |
| ]) |
| end |
| |
| test("Range: positive: include end") do |
| assert_equal(<<-TABLE, @table.slice(2..4).to_s) |
| count visible |
| 0 4 (null) |
| 1 8 true |
| 2 16 true |
| TABLE |
| end |
| |
| test("Range: positive: exclude end") do |
| assert_equal(<<-TABLE, @table.slice(2...4).to_s) |
| count visible |
| 0 4 (null) |
| 1 8 true |
| TABLE |
| end |
| |
| test("Range: negative: include end") do |
| assert_equal(<<-TABLE, @table.slice(-4..-2).to_s) |
| count visible |
| 0 16 true |
| 1 32 false |
| 2 64 (null) |
| TABLE |
| end |
| |
| test("Range: negative: exclude end") do |
| assert_equal(<<-TABLE, @table.slice(-4...-2).to_s) |
| count visible |
| 0 16 true |
| 1 32 false |
| TABLE |
| end |
| |
| test("[from, to]: positive") do |
| assert_equal(<<-TABLE, @table.slice(0, 2).to_s) |
| count visible |
| 0 1 true |
| 1 2 false |
| TABLE |
| end |
| |
| test("[from, to]: negative") do |
| assert_equal(<<-TABLE, @table.slice(-4, 2).to_s) |
| count visible |
| 0 16 true |
| 1 32 false |
| TABLE |
| end |
| |
| test("{key: Number}") do |
| assert_equal(<<-TABLE, @table.slice(count: 16).to_s) |
| count visible |
| 0 16 true |
| TABLE |
| end |
| |
| test("{key: String}") do |
| table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"])) |
| assert_equal(<<-TABLE, table.slice(name: 'b').to_s) |
| name |
| 0 b |
| TABLE |
| end |
| |
| test("{key: true}") do |
| assert_equal(<<-TABLE, @table.slice(visible: true).to_s) |
| count visible |
| 0 1 true |
| 1 (null) (null) |
| 2 8 true |
| 3 16 true |
| 4 (null) (null) |
| 5 (null) (null) |
| TABLE |
| end |
| |
| test("{key: false}") do |
| assert_equal(<<-TABLE, @table.slice(visible: false).to_s) |
| count visible |
| 0 2 false |
| 1 (null) (null) |
| 2 32 false |
| 3 (null) (null) |
| 4 (null) (null) |
| TABLE |
| end |
| |
| test("{key: Range}: beginless include end") do |
| begin |
| range = eval("..8") |
| rescue SyntaxError |
| omit("beginless range isn't supported") |
| end |
| assert_equal(<<-TABLE, @table.slice(count: range).to_s) |
| count visible |
| 0 1 true |
| 1 2 false |
| 2 4 (null) |
| 3 8 true |
| TABLE |
| end |
| |
| test("{key: Range}: beginless exclude end") do |
| begin |
| range = eval("...8") |
| rescue SyntaxError |
| omit("beginless range isn't supported") |
| end |
| assert_equal(<<-TABLE, @table.slice(count: range).to_s) |
| count visible |
| 0 1 true |
| 1 2 false |
| 2 4 (null) |
| TABLE |
| end |
| |
| test("{key: Range}: endless") do |
| begin |
| range = eval("16..") |
| rescue SyntaxError |
| omit("endless range isn't supported") |
| end |
| assert_equal(<<-TABLE, @table.slice(count: range).to_s) |
| count visible |
| 0 16 true |
| 1 32 false |
| 2 64 (null) |
| 3 128 (null) |
| TABLE |
| end |
| |
| test("{key: Range}: include end") do |
| assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s) |
| count visible |
| 0 1 true |
| 1 2 false |
| 2 4 (null) |
| 3 8 true |
| 4 16 true |
| TABLE |
| end |
| |
| test("{key: Range}: exclude end") do |
| assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s) |
| count visible |
| 0 1 true |
| 1 2 false |
| 2 4 (null) |
| 3 8 true |
| TABLE |
| end |
| |
| test("{key1: Range, key2: true}") do |
| assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s) |
| count visible |
| 0 2 false |
| 1 (null) (null) |
| 2 (null) (null) |
| 3 (null) (null) |
| TABLE |
| end |
| |
| sub_test_case("wrong argument") do |
| test("no arguments") do |
| message = "wrong number of arguments (given 0, expected 1..2)" |
| assert_raise(ArgumentError.new(message)) do |
| @table.slice |
| end |
| end |
| |
| test("too many arguments") do |
| message = "wrong number of arguments (given 3, expected 1..2)" |
| assert_raise(ArgumentError.new(message)) do |
| @table.slice(1, 2, 3) |
| end |
| end |
| |
| test("arguments: with block") do |
| message = "must not specify both arguments and block" |
| assert_raise(ArgumentError.new(message)) do |
| @table.slice(1, 2) {} |
| end |
| end |
| |
| test("offset: too small") do |
| n_rows = @table.n_rows |
| offset = -(n_rows + 1) |
| message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}" |
| assert_raise(ArgumentError.new(message)) do |
| @table.slice(offset, 1) |
| end |
| end |
| |
| test("offset: too large") do |
| n_rows = @table.n_rows |
| offset = n_rows |
| message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}" |
| assert_raise(ArgumentError.new(message)) do |
| @table.slice(offset, 1) |
| end |
| end |
| end |
| end |
| |
| sub_test_case("#[]") do |
| def setup |
| @table = Arrow::Table.new(a: [true], |
| b: [true], |
| c: [true], |
| d: [true], |
| e: [true], |
| f: [true], |
| g: [true]) |
| end |
| |
| test("[String]") do |
| assert_equal(Arrow::Column.new(@table, 0), |
| @table["a"]) |
| end |
| |
| test("[Symbol]") do |
| assert_equal(Arrow::Column.new(@table, 1), |
| @table[:b]) |
| end |
| |
| test("[Integer]") do |
| assert_equal(Arrow::Column.new(@table, 6), |
| @table[-1]) |
| end |
| |
| test("[Range]") do |
| assert_equal(Arrow::Table.new(d: [true], |
| e: [true]), |
| @table[3..4]) |
| end |
| |
| test("[[Symbol, String, Integer, Range]]") do |
| assert_equal(Arrow::Table.new(c: [true], |
| a: [true], |
| g: [true], |
| d: [true], |
| e: [true]), |
| @table[[:c, "a", -1, 3..4]]) |
| end |
| end |
| |
| sub_test_case("#merge") do |
| sub_test_case("Hash") do |
| test("add") do |
| name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) |
| assert_equal(<<-TABLE, @table.merge(:name => name_array).to_s) |
| count visible name |
| 0 1 true a |
| 1 2 false b |
| 2 4 (null) c |
| 3 8 true d |
| 4 16 true e |
| 5 32 false f |
| 6 64 (null) g |
| 7 128 (null) h |
| TABLE |
| end |
| |
| test("remove") do |
| assert_equal(<<-TABLE, @table.merge(:visible => nil).to_s) |
| count |
| 0 1 |
| 1 2 |
| 2 4 |
| 3 8 |
| 4 16 |
| 5 32 |
| 6 64 |
| 7 128 |
| TABLE |
| end |
| |
| test("replace") do |
| visible_array = Arrow::Int32Array.new([1] * @visible_array.length) |
| assert_equal(<<-TABLE, @table.merge(:visible => visible_array).to_s) |
| count visible |
| 0 1 1 |
| 1 2 1 |
| 2 4 1 |
| 3 8 1 |
| 4 16 1 |
| 5 32 1 |
| 6 64 1 |
| 7 128 1 |
| TABLE |
| end |
| end |
| |
| sub_test_case("Arrow::Table") do |
| test("add") do |
| name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) |
| table = Arrow::Table.new("name" => name_array) |
| assert_equal(<<-TABLE, @table.merge(table).to_s) |
| count visible name |
| 0 1 true a |
| 1 2 false b |
| 2 4 (null) c |
| 3 8 true d |
| 4 16 true e |
| 5 32 false f |
| 6 64 (null) g |
| 7 128 (null) h |
| TABLE |
| end |
| |
| test("replace") do |
| visible_array = Arrow::Int32Array.new([1] * @visible_array.length) |
| table = Arrow::Table.new("visible" => visible_array) |
| assert_equal(<<-TABLE, @table.merge(table).to_s) |
| count visible |
| 0 1 1 |
| 1 2 1 |
| 2 4 1 |
| 3 8 1 |
| 4 16 1 |
| 5 32 1 |
| 6 64 1 |
| 7 128 1 |
| TABLE |
| end |
| end |
| end |
| |
| test("column name getter") do |
| assert_equal(Arrow::Column.new(@table, 1), |
| @table.visible) |
| end |
| |
| sub_test_case("#remove_column") do |
| test("String") do |
| assert_equal(<<-TABLE, @table.remove_column("visible").to_s) |
| count |
| 0 1 |
| 1 2 |
| 2 4 |
| 3 8 |
| 4 16 |
| 5 32 |
| 6 64 |
| 7 128 |
| TABLE |
| end |
| |
| test("Symbol") do |
| assert_equal(<<-TABLE, @table.remove_column(:visible).to_s) |
| count |
| 0 1 |
| 1 2 |
| 2 4 |
| 3 8 |
| 4 16 |
| 5 32 |
| 6 64 |
| 7 128 |
| TABLE |
| end |
| |
| test("unknown column name") do |
| assert_raise(KeyError) do |
| @table.remove_column(:nonexistent) |
| end |
| end |
| |
| test("Integer") do |
| assert_equal(<<-TABLE, @table.remove_column(1).to_s) |
| count |
| 0 1 |
| 1 2 |
| 2 4 |
| 3 8 |
| 4 16 |
| 5 32 |
| 6 64 |
| 7 128 |
| TABLE |
| end |
| |
| test("negative integer") do |
| assert_equal(<<-TABLE, @table.remove_column(-1).to_s) |
| count |
| 0 1 |
| 1 2 |
| 2 4 |
| 3 8 |
| 4 16 |
| 5 32 |
| 6 64 |
| 7 128 |
| TABLE |
| end |
| |
| test("too small index") do |
| assert_raise(IndexError) do |
| @table.remove_column(-3) |
| end |
| end |
| |
| test("too large index") do |
| assert_raise(IndexError) do |
| @table.remove_column(2) |
| end |
| end |
| end |
| |
| sub_test_case("#select_columns") do |
| def setup |
| raw_table = { |
| :a => Arrow::UInt8Array.new([1]), |
| :b => Arrow::UInt8Array.new([1]), |
| :c => Arrow::UInt8Array.new([1]), |
| :d => Arrow::UInt8Array.new([1]), |
| :e => Arrow::UInt8Array.new([1]), |
| } |
| @table = Arrow::Table.new(raw_table) |
| end |
| |
| test("names") do |
| assert_equal(<<-TABLE, @table.select_columns(:c, :a).to_s) |
| c a |
| 0 1 1 |
| TABLE |
| end |
| |
| test("range") do |
| assert_equal(<<-TABLE, @table.select_columns(2...4).to_s) |
| c d |
| 0 1 1 |
| TABLE |
| end |
| |
| test("indexes") do |
| assert_equal(<<-TABLE, @table.select_columns(0, -1, 2).to_s) |
| a e c |
| 0 1 1 1 |
| TABLE |
| end |
| |
| test("mixed") do |
| assert_equal(<<-TABLE, @table.select_columns(:a, -1, 2..3).to_s) |
| a e c d |
| 0 1 1 1 1 |
| TABLE |
| end |
| |
| test("block") do |
| selected_table = @table.select_columns.with_index do |column, i| |
| column.name == "a" or i.odd? |
| end |
| assert_equal(<<-TABLE, selected_table.to_s) |
| a b d |
| 0 1 1 1 |
| TABLE |
| end |
| |
| test("names, indexes and block") do |
| selected_table = @table.select_columns(:a, -1) do |column| |
| column.name == "a" |
| end |
| assert_equal(<<-TABLE, selected_table.to_s) |
| a |
| 0 1 |
| TABLE |
| end |
| end |
| |
| sub_test_case("#save and .load") do |
| module SaveLoadFormatTests |
| def test_default |
| output = create_output(".arrow") |
| @table.save(output) |
| assert_equal(@table, Arrow::Table.load(output)) |
| end |
| |
| def test_arrow_file |
| output = create_output(".arrow") |
| @table.save(output, format: :arrow_file) |
| assert_equal(@table, Arrow::Table.load(output, format: :arrow_file)) |
| end |
| |
| def test_batch |
| output = create_output(".arrow") |
| @table.save(output, format: :batch) |
| assert_equal(@table, Arrow::Table.load(output, format: :batch)) |
| end |
| |
| def test_arrows |
| output = create_output(".arrows") |
| @table.save(output, format: :arrows) |
| assert_equal(@table, Arrow::Table.load(output, format: :arrows)) |
| end |
| |
| def test_arrow_streaming |
| output = create_output(".arrows") |
| @table.save(output, format: :arrow_streaming) |
| assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming)) |
| end |
| |
| def test_stream |
| output = create_output(".arrows") |
| @table.save(output, format: :stream) |
| assert_equal(@table, Arrow::Table.load(output, format: :stream)) |
| end |
| |
| def test_csv |
| output = create_output(".csv") |
| @table.save(output, format: :csv) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :csv, |
| schema: @table.schema)) |
| end |
| |
| def test_csv_gz |
| output = create_output(".csv.gz") |
| @table.save(output, |
| format: :csv, |
| compression: :gzip) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :csv, |
| compression: :gzip, |
| schema: @table.schema)) |
| end |
| |
| def test_tsv |
| output = create_output(".tsv") |
| @table.save(output, format: :tsv) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :tsv, |
| schema: @table.schema)) |
| end |
| end |
| |
| sub_test_case("path") do |
| sub_test_case(":format") do |
| include SaveLoadFormatTests |
| |
| def create_output(extension) |
| @file = Tempfile.new(["red-arrow", extension]) |
| @file.path |
| end |
| |
| sub_test_case("save: auto detect") do |
| test("arrow") do |
| output = create_output(".arrow") |
| @table.save(output) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :arrow, |
| schema: @table.schema)) |
| end |
| |
| test("arrows") do |
| output = create_output(".arrows") |
| @table.save(output) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :arrows, |
| schema: @table.schema)) |
| end |
| |
| test("csv") do |
| output = create_output(".csv") |
| @table.save(output) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :csv, |
| schema: @table.schema)) |
| end |
| |
| test("csv.gz") do |
| output = create_output(".csv.gz") |
| @table.save(output) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :csv, |
| compression: :gzip, |
| schema: @table.schema)) |
| end |
| |
| test("tsv") do |
| output = create_output(".tsv") |
| @table.save(output) |
| assert_equal(@table, |
| Arrow::Table.load(output, |
| format: :tsv, |
| schema: @table.schema)) |
| end |
| end |
| |
| sub_test_case("load: auto detect") do |
| test("arrow: file") do |
| output = create_output(".arrow") |
| @table.save(output, format: :arrow_file) |
| assert_equal(@table, Arrow::Table.load(output)) |
| end |
| |
| test("arrow: streaming") do |
| output = create_output(".arrow") |
| @table.save(output, format: :arrows) |
| assert_equal(@table, Arrow::Table.load(output)) |
| end |
| |
| test("arrows") do |
| output = create_output(".arrows") |
| @table.save(output, format: :arrows) |
| assert_equal(@table, Arrow::Table.load(output)) |
| end |
| |
| test("csv") do |
| path = fixture_path("with-header.csv") |
| table = Arrow::Table.load(path, skip_lines: /^\#/) |
| assert_equal(<<-TABLE, table.to_s) |
| name score |
| 0 alice 10 |
| 1 bob 29 |
| 2 chris -1 |
| TABLE |
| end |
| |
| test("csv.gz") do |
| file = Tempfile.new(["red-arrow", ".csv.gz"]) |
| file.close |
| Zlib::GzipWriter.open(file.path) do |gz| |
| gz.write(<<-CSV) |
| name,score |
| alice,10 |
| bob,29 |
| chris,-1 |
| CSV |
| end |
| assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s) |
| name score |
| 0 alice 10 |
| 1 bob 29 |
| 2 chris -1 |
| TABLE |
| end |
| |
| test("tsv") do |
| file = Tempfile.new(["red-arrow", ".tsv"]) |
| file.puts(<<-TSV) |
| name\tscore |
| alice\t10 |
| bob\t29 |
| chris\t-1 |
| TSV |
| file.close |
| table = Arrow::Table.load(file.path) |
| assert_equal(<<-TABLE, table.to_s) |
| name score |
| 0 alice 10 |
| 1 bob 29 |
| 2 chris -1 |
| TABLE |
| end |
| end |
| end |
| end |
| |
| sub_test_case("Buffer") do |
| sub_test_case(":format") do |
| include SaveLoadFormatTests |
| |
| def create_output(extension) |
| Arrow::ResizableBuffer.new(1024) |
| end |
| end |
| end |
| |
| sub_test_case("URI") do |
| def start_web_server(path, data, content_type) |
| http_server = WEBrick::HTTPServer.new(:Port => 0) |
| http_server.mount_proc(path) do |request, response| |
| response.body = data |
| response.content_type = content_type |
| end |
| http_server_thread = Thread.new do |
| http_server.start |
| end |
| begin |
| Timeout.timeout(1) do |
| yield(http_server[:Port]) |
| end |
| ensure |
| http_server.shutdown |
| http_server_thread.join |
| end |
| end |
| |
| data("Arrow File", |
| ["arrow", "application/vnd.apache.arrow.file"]) |
| data("Arrow Stream", |
| ["arrows", "application/vnd.apache.arrow.stream"]) |
| data("CSV", |
| ["csv", "text/csv"]) |
| def test_http(data) |
| extension, content_type = data |
| output = Arrow::ResizableBuffer.new(1024) |
| @table.save(output, format: extension.to_sym) |
| path = "/data.#{extension}" |
| start_web_server(path, |
| output.data.to_s, |
| content_type) do |port| |
| input = URI("http://127.0.0.1:#{port}#{path}") |
| loaded_table = Arrow::Table.load(input) |
| assert_equal(@table.to_s, loaded_table.to_s) |
| end |
| end |
| end |
| end |
| |
| test("#pack") do |
| packed_table = @table.pack |
| column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks} |
| assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s]) |
| count visible |
| 0 1 true |
| 1 2 false |
| 2 4 (null) |
| 3 8 true |
| 4 16 true |
| 5 32 false |
| 6 64 (null) |
| 7 128 (null) |
| TABLE |
| end |
| |
| sub_test_case("#to_s") do |
| sub_test_case(":format") do |
| def setup |
| columns = { |
| "count" => Arrow::UInt8Array.new([1, 2]), |
| "visible" => Arrow::BooleanArray.new([true, false]), |
| } |
| @table = Arrow::Table.new(columns) |
| end |
| |
| test(":column") do |
| assert_equal(<<-TABLE, @table.to_s(format: :column)) |
| count: uint8 |
| visible: bool |
| ---- |
| count: |
| [ |
| [ |
| 1, |
| 2 |
| ] |
| ] |
| visible: |
| [ |
| [ |
| true, |
| false |
| ] |
| ] |
| TABLE |
| end |
| |
| test(":list") do |
| assert_equal(<<-TABLE, @table.to_s(format: :list)) |
| ==================== 0 ==================== |
| count: 1 |
| visible: true |
| ==================== 1 ==================== |
| count: 2 |
| visible: false |
| TABLE |
| end |
| |
| test(":table") do |
| assert_equal(<<-TABLE, @table.to_s(format: :table)) |
| count visible |
| 0 1 true |
| 1 2 false |
| TABLE |
| end |
| |
| test("invalid") do |
| message = ":format must be :column, :list, :table or nil: <:invalid>" |
| assert_raise(ArgumentError.new(message)) do |
| @table.to_s(format: :invalid) |
| end |
| end |
| end |
| |
| sub_test_case("#==") do |
| test("Arrow::Table") do |
| assert do |
| @table == @table |
| end |
| end |
| |
| test("not Arrow::Table") do |
| assert do |
| not (@table == 29) |
| end |
| end |
| end |
| end |
| |
| sub_test_case("#filter") do |
| def setup |
| super |
| @options = Arrow::FilterOptions.new |
| @options.null_selection_behavior = :emit_null |
| end |
| |
| test("Array: boolean") do |
| filter = [nil, true, true, false, true, false, true, true] |
| assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) |
| count visible |
| 0 (null) (null) |
| 1 2 false |
| 2 4 (null) |
| 3 16 true |
| 4 64 (null) |
| 5 128 (null) |
| TABLE |
| end |
| |
| test("Arrow::BooleanArray") do |
| array = [nil, true, true, false, true, false, true, true] |
| filter = Arrow::BooleanArray.new(array) |
| assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) |
| count visible |
| 0 (null) (null) |
| 1 2 false |
| 2 4 (null) |
| 3 16 true |
| 4 64 (null) |
| 5 128 (null) |
| TABLE |
| end |
| |
| test("Arrow::ChunkedArray") do |
| filter_chunks = [ |
| Arrow::BooleanArray.new([nil, true, true]), |
| Arrow::BooleanArray.new([false, true, false]), |
| Arrow::BooleanArray.new([true, true]), |
| ] |
| filter = Arrow::ChunkedArray.new(filter_chunks) |
| assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) |
| count visible |
| 0 (null) (null) |
| 1 2 false |
| 2 4 (null) |
| 3 16 true |
| 4 64 (null) |
| 5 128 (null) |
| TABLE |
| end |
| end |
| |
| sub_test_case("#take") do |
| test("Arrow: boolean") do |
| indices = [1, 0, 2] |
| assert_equal(<<-TABLE, @table.take(indices).to_s) |
| count visible |
| 0 2 false |
| 1 1 true |
| 2 4 (null) |
| TABLE |
| end |
| |
| test("Arrow::Array") do |
| indices = Arrow::Int16Array.new([1, 0, 2]) |
| assert_equal(<<-TABLE, @table.take(indices).to_s) |
| count visible |
| 0 2 false |
| 1 1 true |
| 2 4 (null) |
| TABLE |
| end |
| |
| test("Arrow::ChunkedArray") do |
| chunks = [ |
| Arrow::Int16Array.new([1, 0]), |
| Arrow::Int16Array.new([2]) |
| ] |
| indices = Arrow::ChunkedArray.new(chunks) |
| assert_equal(<<-TABLE, @table.take(indices).to_s) |
| count visible |
| 0 2 false |
| 1 1 true |
| 2 4 (null) |
| TABLE |
| end |
| end |
| |
| sub_test_case("#concatenate") do |
| test("options: :unify_schemas") do |
| table1 = Arrow::Table.new(a: [true], |
| b: [false]) |
| table2 = Arrow::Table.new(b: [false]) |
| concatenated = table1.concatenate([table2], unify_schemas: true) |
| assert_equal(<<-TABLE, concatenated.to_s) |
| a b |
| 0 true false |
| 1 (null) false |
| TABLE |
| end |
| end |
| |
| sub_test_case("#join") do |
| test("keys: String") do |
| table1 = Arrow::Table.new(key: [1, 2, 3], |
| number: [10, 20, 30]) |
| table2 = Arrow::Table.new(key: [3, 1], |
| string: ["three", "one"]) |
| assert_equal(Arrow::Table.new([ |
| ["key", [1, 3]], |
| ["number", [10, 30]], |
| ["key", [1, 3]], |
| ["string", ["one", "three"]], |
| ]), |
| table1.join(table2, "key")) |
| end |
| |
| test("keys: Symbol") do |
| table1 = Arrow::Table.new(key: [1, 2, 3], |
| number: [10, 20, 30]) |
| table2 = Arrow::Table.new(key: [3, 1], |
| string: ["three", "one"]) |
| assert_equal(Arrow::Table.new([ |
| ["key", [1, 3]], |
| ["number", [10, 30]], |
| ["key", [1, 3]], |
| ["string", ["one", "three"]], |
| ]), |
| table1.join(table2, :key)) |
| end |
| |
| test("keys: [String, Symbol]") do |
| table1 = Arrow::Table.new(key1: [1, 1, 2, 2], |
| key2: [10, 100, 20, 200], |
| number: [1010, 1100, 2020, 2200]) |
| table2 = Arrow::Table.new(key1: [1, 2, 2], |
| key2: [100, 20, 50], |
| string: ["1-100", "2-20", "2-50"]) |
| assert_equal(Arrow::Table.new([ |
| ["key1", [1, 2]], |
| ["key2", [100, 20]], |
| ["number", [1100, 2020]], |
| ["key1", [1, 2]], |
| ["key2", [100, 20]], |
| ["string", ["1-100", "2-20"]], |
| ]), |
| table1.join(table2, ["key1", :key2])) |
| end |
| |
| test("keys: {left: String, right: Symbol}") do |
| table1 = Arrow::Table.new(left_key: [1, 2, 3], |
| number: [10, 20, 30]) |
| table2 = Arrow::Table.new(right_key: [3, 1], |
| string: ["three", "one"]) |
| assert_equal(Arrow::Table.new([ |
| ["left_key", [1, 3]], |
| ["number", [10, 30]], |
| ["right_key", [1, 3]], |
| ["string", ["one", "three"]], |
| ]), |
| table1.join(table2, {left: "left_key", right: :right_key})) |
| end |
| |
| test("keys: {left: [String, Symbol], right: [Symbol, String]}") do |
| table1 = Arrow::Table.new(left_key1: [1, 1, 2, 2], |
| left_key2: [10, 100, 20, 200], |
| number: [1010, 1100, 2020, 2200]) |
| table2 = Arrow::Table.new(right_key1: [1, 2, 2], |
| right_key2: [100, 20, 50], |
| string: ["1-100", "2-20", "2-50"]) |
| assert_equal(Arrow::Table.new([ |
| ["left_key1", [1, 2]], |
| ["left_key2", [100, 20]], |
| ["number", [1100, 2020]], |
| ["right_key1", [1, 2]], |
| ["right_key2", [100, 20]], |
| ["string", ["1-100", "2-20"]], |
| ]), |
| table1.join(table2, |
| { |
| left: ["left_key1", :left_key2], |
| right: [:right_key1, "right_key2"], |
| })) |
| end |
| |
| test("type:") do |
| table1 = Arrow::Table.new(key: [1, 2, 3], |
| number: [10, 20, 30]) |
| table2 = Arrow::Table.new(key: [3, 1], |
| string: ["three", "one"]) |
| assert_equal(Arrow::Table.new([ |
| ["key", [1, 3, 2]], |
| ["number", [10, 30, 20]], |
| ["key", [1, 3, nil]], |
| ["string", ["one", "three", nil]], |
| ]), |
| table1.join(table2, "key", type: :left_outer)) |
| end |
| |
| test("left_outputs: & right_outputs:") do |
| table1 = Arrow::Table.new(key: [1, 2, 3], |
| number: [10, 20, 30]) |
| table2 = Arrow::Table.new(key: [3, 1], |
| string: ["three", "one"]) |
| assert_equal(Arrow::Table.new(key: [1, 3], |
| number: [10, 30], |
| string: ["one", "three"]), |
| table1.join(table2, |
| "key", |
| left_outputs: ["key", "number"], |
| right_outputs: ["string"])) |
| end |
| end |
| end |