| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| using Test, Arrow, Tables, Dates, PooledArrays, TimeZones, UUIDs |
| |
| include(joinpath(dirname(pathof(Arrow)), "../test/testtables.jl")) |
| include(joinpath(dirname(pathof(Arrow)), "../test/integrationtest.jl")) |
| include(joinpath(dirname(pathof(Arrow)), "../test/dates.jl")) |
| |
| struct CustomStruct |
| x::Int |
| y::Float64 |
| z::String |
| end |
| |
| @testset "Arrow" begin |
| |
| @testset "table roundtrips" begin |
| |
| for case in testtables |
| testtable(case...) |
| end |
| |
| end # @testset "table roundtrips" |
| |
| @testset "arrow json integration tests" begin |
| |
| for file in readdir(joinpath(dirname(pathof(Arrow)), "../test/arrowjson")) |
| jsonfile = joinpath(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"), file) |
| println("integration test for $jsonfile") |
| df = ArrowJSON.parsefile(jsonfile); |
| io = IOBuffer() |
| Arrow.write(io, df) |
| seekstart(io) |
| tbl = Arrow.Table(io; convert=false); |
| @test isequal(df, tbl) |
| end |
| |
| end # @testset "arrow json integration tests" |
| |
| @testset "misc" begin |
| |
| # multiple record batches |
| t = Tables.partitioner(((col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],), (col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],))) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test length(tt) == 1 |
| @test isequal(tt.col1, vcat([1,2,3,4,5,6,7,8,9,missing], [1,2,3,4,5,6,7,8,9,missing])) |
| @test eltype(tt.col1) === Union{Int64, Missing} |
| |
| # Arrow.Stream |
| seekstart(io) |
| str = Arrow.Stream(io) |
| state = iterate(str) |
| @test state !== nothing |
| tt, st = state |
| @test length(tt) == 1 |
| @test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing]) |
| |
| state = iterate(str, st) |
| @test state !== nothing |
| tt, st = state |
| @test length(tt) == 1 |
| @test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing]) |
| |
| @test iterate(str, st) === nothing |
| |
| # dictionary batch isDelta |
| t = ( |
| col1=Int64[1,2,3,4], |
| col2=Union{String, Missing}["hey", "there", "sailor", missing], |
| col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))] |
| ) |
| t2 = ( |
| col1=Int64[1,2,5,6], |
| col2=Union{String, Missing}["hey", "there", "sailor2", missing], |
| col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))] |
| ) |
| tt = Tables.partitioner((t, t2)) |
| io = IOBuffer() |
| Arrow.write(io, tt; dictencode=true, dictencodenested=true) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test tt.col1 == [1,2,3,4,1,2,5,6] |
| @test isequal(tt.col2, ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing]) |
| @test isequal(tt.col3, vcat(NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))], NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))])) |
| |
| t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) |
| meta = Dict("key1" => "value1", "key2" => "value2") |
| Arrow.setmetadata!(t, meta) |
| meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2") |
| Arrow.setmetadata!(t.col1, meta2) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test length(tt) == length(t) |
| @test tt.col1 == t.col1 |
| @test eltype(tt.col1) === Int64 |
| @test Arrow.getmetadata(tt) == meta |
| @test Arrow.getmetadata(tt.col1) == meta2 |
| |
| # custom compressors |
| lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8) |
| Arrow.CodecLz4.TranscodingStreams.initialize(lz4) |
| t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) |
| io = IOBuffer() |
| Arrow.write(io, t; compress=lz4) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test length(tt) == length(t) |
| @test all(isequal.(values(t), values(tt))) |
| |
| zstd = Arrow.CodecZstd.ZstdCompressor(; level=8) |
| Arrow.CodecZstd.TranscodingStreams.initialize(zstd) |
| t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) |
| io = IOBuffer() |
| Arrow.write(io, t; compress=zstd) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test length(tt) == length(t) |
| @test all(isequal.(values(t), values(tt))) |
| |
| # custom alignment |
| t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],) |
| io = IOBuffer() |
| Arrow.write(io, t; alignment=64) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test length(tt) == length(t) |
| @test all(isequal.(values(t), values(tt))) |
| |
| # 53 |
| s = "a" ^ 100 |
| t = (a=[SubString(s, 1:10), SubString(s, 11:20)],) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"] |
| |
| # 49 |
| @test_throws ArgumentError Arrow.Table("file_that_doesnt_exist") |
| |
| # 52 |
| t = (a=Arrow.DictEncode(string.(1:129)),) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| |
| # 60: unequal column lengths |
| io = IOBuffer() |
| @test_throws ArgumentError Arrow.write(io, (a = Int[], b = ["asd"], c=collect(1:100))) |
| |
| # nullability of custom extension types |
| t = (a=['a', missing],) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test isequal(tt.a, ['a', missing]) |
| |
| # automatic custom struct serialization/deserialization |
| t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test length(tt) == length(t) |
| @test all(isequal.(values(t), values(tt))) |
| |
| # 76 |
| t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test length(tt) == length(t) |
| @test all(isequal.(values(t), values(tt))) |
| |
| # 89 - test deprecation path for old UUID autoconversion |
| u = 0x6036fcbd20664bd8a65cdfa25434513f |
| @test Arrow.ArrowTypes.arrowconvert(UUID, (value=u,)) === UUID(u) |
| |
| # 98 |
| t = (a = [Nanosecond(0), Nanosecond(1)], b = [uuid4(), uuid4()], c = [missing, Nanosecond(1)]) |
| io = IOBuffer() |
| Arrow.write(io, t) |
| seekstart(io) |
| tt = Arrow.Table(io) |
| @test copy(tt.a) isa Vector{Nanosecond} |
| @test copy(tt.b) isa Vector{UUID} |
| @test copy(tt.c) isa Vector{Union{Missing,Nanosecond}} |
| |
| # copy on DictEncoding w/ missing values |
| x = PooledArray(["hey", missing]) |
| x2 = Arrow.toarrowvector(x) |
| @test isequal(copy(x2), x) |
| |
| end # @testset "misc" |
| |
| end |