blob: a46a953464fc7b25ecf1aa7b3e2d701b370b5359 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
using Test, Arrow, ArrowTypes, Tables, Dates, PooledArrays, TimeZones, UUIDs, Sockets,
CategoricalArrays, DataAPI, FilePathsBase, DataFrames
using Random: randstring
# Compat shim for pre-julia 1.9
if !@isdefined(pkgversion)
using TOML
function pkgversion(m::Module)
toml = TOML.parsefile(joinpath(pkgdir(m), "Project.toml"))
return VersionNumber(toml["version"])
end
end
include(joinpath(dirname(pathof(ArrowTypes)), "../test/tests.jl"))
include(joinpath(dirname(pathof(Arrow)), "../test/testtables.jl"))
include(joinpath(dirname(pathof(Arrow)), "../test/testappend.jl"))
include(joinpath(dirname(pathof(Arrow)), "../test/integrationtest.jl"))
include(joinpath(dirname(pathof(Arrow)), "../test/dates.jl"))
struct CustomStruct
x::Int
y::Float64
z::String
end
struct CustomStruct2{sym}
x::Int
end
@testset "Arrow" begin
@testset "table roundtrips" begin
for case in testtables
testtable(case...)
end
end # @testset "table roundtrips"
@testset "table append" begin
# skip windows since file locking prevents mktemp cleanup
if !Sys.iswindows()
for case in testtables
testappend(case...)
end
testappend_partitions()
for compression_option in (:lz4, :zstd)
testappend_compression(compression_option)
end
end
end # @testset "table append"
@testset "arrow json integration tests" begin
for file in readdir(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"))
jsonfile = joinpath(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"), file)
println("integration test for $jsonfile")
df = ArrowJSON.parsefile(jsonfile);
io = Arrow.tobuffer(df)
tbl = Arrow.Table(io; convert=false);
@test isequal(df, tbl)
end
end # @testset "arrow json integration tests"
@testset "abstract path" begin
# Make a custom path type that simulates how AWSS3.jl's S3Path works
struct CustomPath <: AbstractPath
path::PosixPath
end
Base.read(p::CustomPath) = read(p.path)
io = Arrow.tobuffer((col=[0],))
tt = Arrow.Table(io)
mktempdir() do dir
p = Path(joinpath(dir, "test.arrow"))
Arrow.write(p, tt)
@test isfile(p)
# skip windows since file locking prevents mktemp cleanup
if !Sys.iswindows()
tt2 = Arrow.Table(p)
@test values(tt) == values(tt2)
tt3 = Arrow.Table(CustomPath(p))
@test values(tt) == values(tt3)
end
end
end # @testset "abstract path"
@testset "misc" begin
@testset "# multiple record batches" begin
t = Tables.partitioner(((col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],), (col1=Union{Int64, Missing}[missing,11],)))
io = Arrow.tobuffer(t)
tt = Arrow.Table(io)
@test length(tt) == 1
@test isequal(tt.col1, vcat([1,2,3,4,5,6,7,8,9,missing], [missing,11]))
@test eltype(tt.col1) === Union{Int64, Missing}
# Arrow.Stream
seekstart(io)
str = Arrow.Stream(io)
@test eltype(str) == Arrow.Table
state = iterate(str)
@test state !== nothing
tt, st = state
@test length(tt) == 1
@test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing])
state = iterate(str, st)
@test state !== nothing
tt, st = state
@test length(tt) == 1
@test isequal(tt.col1, [missing,11])
@test iterate(str, st) === nothing
@test isequal(collect(str)[1].col1, [1,2,3,4,5,6,7,8,9,missing])
@test isequal(collect(str)[2].col1, [missing,11])
end
@testset "# dictionary batch isDelta" begin
t = (
col1=Int64[1,2,3,4],
col2=Union{String, Missing}["hey", "there", "sailor", missing],
col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))]
)
t2 = (
col1=Int64[1,2,5,6],
col2=Union{String, Missing}["hey", "there", "sailor2", missing],
col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]
)
tt = Tables.partitioner((t, t2))
tt = Arrow.Table(Arrow.tobuffer(tt; dictencode=true, dictencodenested=true))
@test tt.col1 == [1,2,3,4,1,2,5,6]
@test isequal(tt.col2, ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing])
@test isequal(tt.col3, vcat(NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))], NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]))
end
@testset "metadata" begin
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
meta = Dict("key1" => "value1", "key2" => "value2")
meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2")
tt = Arrow.Table(Arrow.tobuffer(t; colmetadata=Dict(:col1 => meta2), metadata=meta))
@test length(tt) == length(t)
@test tt.col1 == t.col1
@test eltype(tt.col1) === Int64
@test Arrow.getmetadata(tt) == Arrow.toidict(meta)
@test Arrow.getmetadata(tt.col1) == Arrow.toidict(meta2)
t = (col1=collect(1:10), col2=collect('a':'j'), col3=collect(1:10))
meta = ("key1" => :value1, :key2 => "value2")
meta2 = ("colkey1" => :colvalue1, :colkey2 => "colvalue2")
meta3 = ("colkey3" => :colvalue3,)
tt = Arrow.Table(Arrow.tobuffer(t; colmetadata=Dict(:col2 => meta2, :col3 => meta3), metadata=meta))
@test Arrow.getmetadata(tt) == Arrow.toidict(String(k) => String(v) for (k, v) in meta)
@test Arrow.getmetadata(tt.col1) === nothing
@test Arrow.getmetadata(tt.col2)["colkey1"] == "colvalue1"
@test Arrow.getmetadata(tt.col2)["colkey2"] == "colvalue2"
@test Arrow.getmetadata(tt.col3)["colkey3"] == "colvalue3"
end
@testset "# custom compressors" begin
lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8)
Arrow.CodecLz4.TranscodingStreams.initialize(lz4)
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
tt = Arrow.Table(Arrow.tobuffer(t; compress=lz4))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
zstd = Arrow.CodecZstd.ZstdCompressor(; level=8)
Arrow.CodecZstd.TranscodingStreams.initialize(zstd)
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
tt = Arrow.Table(Arrow.tobuffer(t; compress=zstd))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
end
@testset "# custom alignment" begin
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
tt = Arrow.Table(Arrow.tobuffer(t; alignment=64))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
end
@testset "# 53" begin
s = "a" ^ 100
t = (a=[SubString(s, 1:10), SubString(s, 11:20)],)
tt = Arrow.Table(Arrow.tobuffer(t))
@test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"]
end
@testset "# 49" begin
@test_throws SystemError Arrow.Table("file_that_doesnt_exist")
@test_throws SystemError Arrow.Table(p"file_that_doesnt_exist")
end
@testset "# 52" begin
t = (a=Arrow.DictEncode(string.(1:129)),)
tt = Arrow.Table(Arrow.tobuffer(t))
end
@testset "# 60: unequal column lengths" begin
io = IOBuffer()
@test_throws ArgumentError Arrow.write(io, (a = Int[], b = ["asd"], c=collect(1:100)))
end
@testset "# nullability of custom extension types" begin
t = (a=['a', missing],)
tt = Arrow.Table(Arrow.tobuffer(t))
@test isequal(tt.a, ['a', missing])
end
@testset "# automatic custom struct serialization/deserialization" begin
t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],)
Arrow.ArrowTypes.arrowname(::Type{CustomStruct}) = Symbol("JuliaLang.CustomStruct")
Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.CustomStruct")}, S) = CustomStruct
tt = Arrow.Table(Arrow.tobuffer(t))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
end
@testset "# 76" begin
t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],)
tt = Arrow.Table(Arrow.tobuffer(t))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
end
@testset "# 89 etc. - UUID FixedSizeListKind overloads" begin
@test Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(UUID)) == UInt8
@test Arrow.ArrowTypes.getsize(Arrow.ArrowTypes.ArrowKind(UUID)) == 16
end
@testset "# 98" begin
t = (a = [Nanosecond(0), Nanosecond(1)], b = [uuid4(), uuid4()], c = [missing, Nanosecond(1)])
tt = Arrow.Table(Arrow.tobuffer(t))
@test copy(tt.a) isa Vector{Nanosecond}
@test copy(tt.b) isa Vector{UUID}
@test copy(tt.c) isa Vector{Union{Missing,Nanosecond}}
end
@testset "# copy on DictEncoding w/ missing values" begin
x = PooledArray(["hey", missing])
x2 = Arrow.toarrowvector(x)
@test isequal(copy(x2), x)
end
@testset "# some dict encoding coverage" begin
# signed indices for DictEncodedKind #112 #113 #114
av = Arrow.toarrowvector(PooledArray(repeat(["a", "b"], inner = 5)))
@test isa(first(av.indices), Signed)
av = Arrow.toarrowvector(CategoricalArray(repeat(["a", "b"], inner = 5)))
@test isa(first(av.indices), Signed)
av = Arrow.toarrowvector(CategoricalArray(["a", "bb", missing]))
@test isa(first(av.indices), Signed)
@test length(av) == 3
@test eltype(av) == Union{String, Missing}
av = Arrow.toarrowvector(CategoricalArray(["a", "bb", "ccc"]))
@test isa(first(av.indices), Signed)
@test length(av) == 3
@test eltype(av) == String
end
@testset "# 120" begin
x = PooledArray(["hey", missing])
x2 = Arrow.toarrowvector(x)
@test eltype(DataAPI.refpool(x2)) == Union{Missing, String}
@test eltype(DataAPI.levels(x2)) == String
@test DataAPI.refarray(x2) == [1, 2]
end
@testset "# 121" begin
a = PooledArray(repeat(string.('S', 1:130), inner=5), compress=true)
@test eltype(a.refs) == UInt8
av = Arrow.toarrowvector(a)
@test eltype(av.indices) == Int16
end
@testset "# 123" begin
t = (x = collect(zip(rand(10), rand(10))),)
tt = Arrow.Table(Arrow.tobuffer(t))
@test tt.x == t.x
end
@testset "# 144" begin
t = Tables.partitioner(((a=Arrow.DictEncode([1,2,3]),), (a=Arrow.DictEncode(fill(1, 129)),)))
tt = Arrow.Table(Arrow.tobuffer(t))
@test length(tt.a) == 132
end
@testset "# 126" begin
t = Tables.partitioner(
(
(a=Arrow.toarrowvector(PooledArray([1,2,3 ])),),
(a=Arrow.toarrowvector(PooledArray([1,2,3,4])),),
(a=Arrow.toarrowvector(PooledArray([1,2,3,4,5])),),
)
)
tt = Arrow.Table(Arrow.tobuffer(t))
@test length(tt.a) == 12
@test tt.a == [1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
t = Tables.partitioner(
(
(a=Arrow.toarrowvector(PooledArray([1,2,3 ], signed=true, compress=true)),),
(a=Arrow.toarrowvector(PooledArray(collect(1:129))),),
)
)
io = IOBuffer()
@test_logs (:error, "error writing arrow data on partition = 2") begin
@test_throws ErrorException Arrow.write(io, t)
end
end
@testset "# 75" begin
tbl = Arrow.Table(Arrow.tobuffer((sets = [Set([1,2,3]), Set([1,2,3])],)))
@test eltype(tbl.sets) <: Set
end
@testset "# 85" begin
tbl = Arrow.Table(Arrow.tobuffer((tups = [(1, 3.14, "hey"), (1, 3.14, "hey")],)))
@test eltype(tbl.tups) <: Tuple
end
@testset "Nothing" begin
tbl = Arrow.Table(Arrow.tobuffer((nothings=[nothing, nothing, nothing],)))
@test tbl.nothings == [nothing, nothing, nothing]
end
@testset "arrowmetadata" begin
# arrowmetadata
t = (col1=[CustomStruct2{:hey}(1), CustomStruct2{:hey}(2)],)
ArrowTypes.arrowname(::Type{<:CustomStruct2}) = Symbol("CustomStruct2")
@test_logs (:warn, r"unsupported ARROW:extension:name type: \"CustomStruct2\"") begin
tbl = Arrow.Table(Arrow.tobuffer(t))
end
@test eltype(tbl.col1) <: NamedTuple
ArrowTypes.arrowmetadata(::Type{CustomStruct2{sym}}) where {sym} = sym
ArrowTypes.JuliaType(::Val{:CustomStruct2}, S, meta) = CustomStruct2{Symbol(meta)}
tbl = Arrow.Table(Arrow.tobuffer(t))
@test eltype(tbl.col1) == CustomStruct2{:hey}
end
@testset "# 166" begin
t = (
col1=[zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.NANOSECOND, nothing})],
)
tbl = Arrow.Table(Arrow.tobuffer(t))
@test_logs (:warn, r"automatically converting Arrow.Timestamp with precision = NANOSECOND") begin
@test tbl.col1[1] == Dates.DateTime(1970)
end
end
@testset "# 95; Arrow.ToTimestamp" begin
x = [ZonedDateTime(Dates.DateTime(2020), tz"Europe/Paris")]
c = Arrow.ToTimestamp(x)
@test eltype(c) == Arrow.Timestamp{Arrow.Flatbuf.TimeUnit.MILLISECOND, Symbol("Europe/Paris")}
@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnit.MILLISECOND, Symbol("Europe/Paris")}(1577833200000)
end
@testset "# 158" begin
# arrow ipc stream generated from pyarrow with no record batches
bytes = UInt8[0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00,
0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00,
0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00,
0x00, 0x00, 0x10, 0x00, 0x14, 0x00, 0x08, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]
tbl = Arrow.Table(bytes)
@test length(tbl.a) == 0
@test eltype(tbl.a) == Union{Int64, Missing}
end
@testset "# 181" begin
d = Dict{Int,Int}()
for i in 1:9
d = Dict(i => d)
end
tbl = (x = [d],)
msg = "reached nested serialization level (20) deeper than provided max depth argument (19); to increase allowed nesting level, pass `maxdepth=X`"
@test_throws ErrorException(msg) Arrow.tobuffer(tbl; maxdepth=19)
@test Arrow.Table(Arrow.tobuffer(tbl; maxdepth=20)).x == tbl.x
end
@testset "# 167" begin
t = (
col1=[["boop", "she"], ["boop", "she"], ["boo"]],
)
tbl = Arrow.Table(Arrow.tobuffer(t))
@test eltype(tbl.col1) == Vector{String}
end
@testset "# 200 VersionNumber" begin
t = (
col1=[v"1"],
)
tbl = Arrow.Table(Arrow.tobuffer(t))
@test eltype(tbl.col1) == VersionNumber
end
@testset "`show`" begin
str = nothing
table = (; a = 1:5, b = fill(1.0, 5))
arrow_table = Arrow.Table(Arrow.tobuffer(table))
# 2 and 3-arg show with no metadata
for outer str in (sprint(show, arrow_table),
sprint(show, MIME"text/plain"(), arrow_table))
@test length(str) < 100
@test occursin("5 rows", str)
@test occursin("2 columns", str)
@test occursin("Int", str)
@test occursin("Float64", str)
@test !occursin("metadata entries", str)
end
# 2-arg show with metadata
big_dict = Dict((randstring(rand(5:10)) => randstring(rand(1:3)) for _ = 1:100))
arrow_table = Arrow.Table(Arrow.tobuffer(table; metadata=big_dict))
str2 = sprint(show, arrow_table)
@test length(str2) > length(str)
@test length(str2) < 200
@test occursin("metadata entries", str2)
# 3-arg show with metadata
str3 = sprint(show, MIME"text/plain"(), arrow_table; context = IOContext(IOBuffer(), :displaysize => (24, 100), :limit=>true))
@test length(str3) < 1000
# some but not too many `=>`'s for printing the metadata
@test 5 < length(collect(eachmatch(r"=>", str3))) < 20
end
@testset "# 194" begin
@test isempty(Arrow.Table(Arrow.tobuffer(Dict{Symbol, Vector}())))
end
@testset "# 229" begin
struct Foo229{x}
y::String
z::Int
end
Arrow.ArrowTypes.arrowname(::Type{<:Foo229}) = Symbol("JuliaLang.Foo229")
Arrow.ArrowTypes.ArrowType(::Type{Foo229{x}}) where {x} = Tuple{String,String,Int}
Arrow.ArrowTypes.toarrow(row::Foo229{x}) where {x} = (String(x), row.y, row.z)
Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.Foo229")}, ::Any) = Foo229
Arrow.ArrowTypes.fromarrow(::Type{<:Foo229}, x, y, z) = Foo229{Symbol(x)}(y, z)
cols = (k1=[Foo229{:a}("a", 1), Foo229{:b}("b", 2)], k2=[Foo229{:c}("c", 3), Foo229{:d}("d", 4)])
tbl = Arrow.Table(Arrow.tobuffer(cols))
@test tbl.k1 == cols.k1
@test tbl.k2 == cols.k2
end
@testset "# PR 234" begin
# bugfix parsing primitive arrays
buf = [
0x14,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x14,0x00,0x00,0x00,0x10,0x00,0x0c,0x00,0x08,
0x00,0x04,0x00,0x0e,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x38,0x00,
0x00,0x00,0x38,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x03,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
]
struct TestData <: Arrow.FlatBuffers.Table
bytes::Vector{UInt8}
pos::Base.Int
end
function Base.getproperty(x::TestData, field::Symbol)
if field === :DataInt32
o = Arrow.FlatBuffers.offset(x, 12)
o != 0 && return Arrow.FlatBuffers.Array{Int32}(x, o)
else
@warn "field $field not supported"
end
end
d = Arrow.FlatBuffers.getrootas(TestData, buf, 0);
@test d.DataInt32 == UInt32[1,2,3]
end
@testset "# test multiple inputs treated as one table" begin
t = (
col1=[1, 2, 3, 4, 5],
col2=[1.2, 2.3, 3.4, 4.5, 5.6],
)
tbl = Arrow.Table([Arrow.tobuffer(t), Arrow.tobuffer(t)])
@test tbl.col1 == [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
@test tbl.col2 == [1.2, 2.3, 3.4, 4.5, 5.6, 1.2, 2.3, 3.4, 4.5, 5.6]
# schemas must match between multiple inputs
t2 = (
col1=[1.2, 2.3, 3.4, 4.5, 5.6],
)
@test_throws ArgumentError Arrow.Table([Arrow.tobuffer(t), Arrow.tobuffer(t2)])
# test multiple inputs treated as one table
tbls = collect(Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t)]))
@test tbls[1].col1 == tbls[2].col1
@test tbls[1].col2 == tbls[2].col2
# schemas must match between multiple inputs
t2 = (
col1=[1.2, 2.3, 3.4, 4.5, 5.6],
)
@test_throws ArgumentError collect(Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t2)]))
end
@testset "# 253" begin
# https://github.com/apache/arrow-julia/issues/253
@test Arrow.toidict(Pair{String, String}[]) == Base.ImmutableDict{String, String}()
end
@testset "# 232" begin
# https://github.com/apache/arrow-julia/issues/232
t = (; x=[Dict(true => 1.32, 1.2 => 0.53495216)])
@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
t = (; x=[Dict(32.0 => true, 1.2 => 0.53495216)])
@test_throws ArgumentError("`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == Real`") Arrow.tobuffer(t)
t = (; x=[Dict(true => 1.32, 1.2 => true)])
@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
end
@testset "# 214" begin
# https://github.com/apache/arrow-julia/issues/214
t1 = (; x = [(Nanosecond(42),)])
t2 = Arrow.Table(Arrow.tobuffer(t1))
t3 = Arrow.Table(Arrow.tobuffer(t2))
@test t3.x == t1.x
t1 = (; x = [(; a=Nanosecond(i), b=Nanosecond(i+1)) for i = 1:5])
t2 = Arrow.Table(Arrow.tobuffer(t1))
t3 = Arrow.Table(Arrow.tobuffer(t2))
@test t3.x == t1.x
end
@testset "Writer" begin
io = IOBuffer()
writer = open(Arrow.Writer, io)
a = 1:26
b = 'A':'Z'
partitionsize = 10
iter_a = Iterators.partition(a, partitionsize)
iter_b = Iterators.partition(b, partitionsize)
for (part_a, part_b) in zip(iter_a, iter_b)
Arrow.write(writer, (a = part_a, b = part_b))
end
close(writer)
seekstart(io)
table = Arrow.Table(io)
@test table.a == collect(a)
@test table.b == collect(b)
end
@testset "# Empty input" begin
@test Arrow.Table(UInt8[]) isa Arrow.Table
@test isempty(Tables.rows(Arrow.Table(UInt8[])))
@test Arrow.Stream(UInt8[]) isa Arrow.Stream
@test isempty(Tables.partitions(Arrow.Stream(UInt8[])))
end
@testset "# 324" begin
# https://github.com/apache/arrow-julia/issues/324
@test_throws ArgumentError filter!(x -> x > 1, Arrow.toarrowvector([1, 2, 3]))
end
@testset "# 327" begin
# https://github.com/apache/arrow-julia/issues/327
zdt = ZonedDateTime(DateTime(2020, 11, 1, 6), tz"America/New_York"; from_utc=true)
arrow_zdt = ArrowTypes.toarrow(zdt)
zdt_again = ArrowTypes.fromarrow(ZonedDateTime, arrow_zdt)
@test zdt == zdt_again
# Check that we still correctly read in old TimeZones
original_table = (; col = [ ZonedDateTime(DateTime(1, 2, 3, 4, 5, 6), tz"UTC+3") for _ in 1:5])
table = Arrow.Table(joinpath(@__DIR__, "old_zdt.arrow"))
@test original_table.col == table.col
end
@testset "# 243" begin
if pkgversion(ArrowTypes) >= v"2.0.1" # need the ArrowTypes bugfix to pass this test
# https://github.com/apache/arrow-julia/issues/243
table = (; col = [(; v=v"1"), (; v=v"2"), missing])
@test isequal(Arrow.Table(Arrow.tobuffer(table)).col, table.col)
end
end
@testset "# 367" begin
# https://github.com/apache/arrow-julia/issues/367
if pkgversion(ArrowTypes) >= v"2.0.2"
t = (; x=Union{ZonedDateTime,Missing}[missing])
a = Arrow.Table(Arrow.tobuffer(t))
@test Tables.schema(a) == Tables.schema(t)
@test isequal(a.x, t.x)
end
end
# https://github.com/apache/arrow-julia/issues/414
df = DataFrame(("$i" => rand(1000) for i in 1:65536)...)
df_load = Arrow.Table(Arrow.tobuffer(df))
@test Tables.schema(df) == Tables.schema(df_load)
for (col1, col2) in zip(Tables.columns(df), Tables.columns(df_load))
@test col1 == col2
end
@testset "# 411" begin
# Vector{UInt8} are written as List{UInt8} in Arrow
# Base.CodeUnits are written as Binary
t = (
a=[[0x00, 0x01], UInt8[], [0x03]],
am=[[0x00, 0x01], [0x03], missing],
b=[b"01", b"", b"3"],
bm=[b"01", b"3", missing],
c=["a", "b", "c"],
cm=["a", "c", missing]
)
buf = Arrow.tobuffer(t)
tt = Arrow.Table(buf)
@test t.a == tt.a
@test isequal(t.am, tt.am)
@test t.b == tt.b
@test isequal(t.bm, tt.bm)
@test t.c == tt.c
@test isequal(t.cm, tt.cm)
@test Arrow.schema(tt)[].fields[1].type isa Arrow.Flatbuf.List
@test Arrow.schema(tt)[].fields[3].type isa Arrow.Flatbuf.Binary
pos = position(buf)
Arrow.append(buf, tt)
seekstart(buf)
buf1 = read(buf, pos)
buf2 = read(buf)
t1 = Arrow.Table(buf1)
t2 = Arrow.Table(buf2)
@test isequal(t1.a, t2.a)
@test isequal(t1.am, t2.am)
@test isequal(t1.b, t2.b)
@test isequal(t1.bm, t2.bm)
@test isequal(t1.c, t2.c)
@test isequal(t1.cm, t2.cm)
end
@testset "# 435" begin
t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_one.arrow"))
@test length(t) == 15
@test length(t.isA) == 102
end
end # @testset "misc"
end