allow append to act on non-existent/blank file/io (#358)
`Arrow.append` can now append to an empty/nonexistent file by
simply invoking `Arrow.write` in that situation.
diff --git a/src/append.jl b/src/append.jl
index 5f521dd..4bd45b3 100644
--- a/src/append.jl
+++ b/src/append.jl
@@ -60,7 +60,7 @@
append(io_or_file; kw...) = x -> append(io_or_file, x; kw...)
function append(file::String, tbl; kwargs...)
- open(file, "r+") do io
+ open(file, isfile(file) ? "r+" : "w+") do io
append(io, tbl; file=true, kwargs...)
end
@@ -84,20 +84,42 @@
throw(ArgumentError("ntasks keyword argument must be > 0; pass `ntasks=1` to disable multithreaded writing"))
end
- isstream, arrow_schema, compress = stream_properties(io; convert=convert)
- if !isstream
- throw(ArgumentError("append is supported only to files in arrow stream format"))
- end
+ startpos = position(io)
+ seekend(io)
+ len = position(io) - startpos
+ seek(io, startpos) # leave the stream position unchanged
- if compress === :lz4
- compress = LZ4_FRAME_COMPRESSOR
- elseif compress === :zstd
- compress = ZSTD_COMPRESSOR
- elseif compress isa Symbol
- throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
- end
+ if len == 0 # empty file, not initialized, we can just write to it
+ kwargs = Dict{Symbol, Any}(
+ :largelists => largelists,
+ :denseunions => denseunions,
+ :dictencode => dictencode,
+ :dictencodenested => dictencodenested,
+ :alignment => alignment,
+ :maxdepth => maxdepth,
+ :metadata => metadata,
+ :colmetadata => colmetadata,
+ )
+ if isa(ntasks, Integer)
+ kwargs[:ntasks] = ntasks
+ end
+ write(io, tbl; kwargs...)
+ else
+ isstream, arrow_schema, compress = stream_properties(io; convert=convert)
+ if !isstream
+ throw(ArgumentError("append is supported only to files in arrow stream format"))
+ end
- append(io, tbl, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata)
+ if compress === :lz4
+ compress = LZ4_FRAME_COMPRESSOR
+ elseif compress === :zstd
+ compress = ZSTD_COMPRESSOR
+ elseif compress isa Symbol
+ throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
+ end
+
+ append(io, tbl, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata)
+ end
return io
end
diff --git a/test/testappend.jl b/test/testappend.jl
index 0b43784..d4834dd 100644
--- a/test/testappend.jl
+++ b/test/testappend.jl
@@ -86,6 +86,14 @@
end
@test_throws ArgumentError Arrow.append(file2, arrow_table1)
+ # can append to an empty file
+ rm(file2)
+ for _ in 1:5
+ Arrow.append(file2, arrow_table1)
+ end
+ appended_table1 = Arrow.Table(file2)
+ @test length(Tables.columns(appended_table1)[1]) == 50
+
# schema must match
testdata2 = (col2=Int64[1,2,3,4,5,6,7,8,9,10],)
open(file2, "w") do io