blob: 5f8e67e25d64bde4b87e13ea30d5e0c3145066c7 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
struct CompressedBuffer
data::Vector{UInt8}
uncompressedlength::Int64
end
"""
Arrow.Compressed
Represents the compressed version of an [`ArrowVector`](@ref).
Holds a reference to the original column. May have `Compressed`
children for nested array types.
"""
struct Compressed{Z, A}
data::A
buffers::Vector{CompressedBuffer}
len::Int64
nullcount::Int64
children::Vector{Compressed}
end
Base.length(c::Compressed) = c.len
Base.eltype(c::Compressed{Z, A}) where {Z, A} = eltype(A)
getmetadata(x::Compressed) = getmetadata(x.data)
compressiontype(c::Compressed{Z}) where {Z} = Z
function compress(Z::Meta.CompressionType, comp, x::Array)
GC.@preserve x begin
y = unsafe_wrap(Array, convert(Ptr{UInt8}, pointer(x)), sizeof(x))
return CompressedBuffer(transcode(comp, y), length(y))
end
end
compress(Z::Meta.CompressionType, comp, x) = compress(Z, comp, convert(Array, x))
compress(Z::Meta.CompressionType, comp, v::ValidityBitmap) =
v.nc == 0 ? CompressedBuffer(UInt8[], 0) : compress(Z, comp, view(v.bytes, v.pos:(v.pos + cld(v.ℓ, 8) - 1)))
function makenodesbuffers!(col::Compressed, fieldnodes, fieldbuffers, bufferoffset, alignment)
push!(fieldnodes, FieldNode(col.len, col.nullcount))
@debug 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)"
for buffer in col.buffers
blen = length(buffer.data) == 0 ? 0 : 8 + length(buffer.data)
push!(fieldbuffers, Buffer(bufferoffset, blen))
@debug 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
bufferoffset += padding(blen, alignment)
end
for child in col.children
bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
return bufferoffset
end
function writearray(io, b::CompressedBuffer)
if length(b.data) > 0
n = Base.write(io, b.uncompressedlength)
@debug 1 "writing compressed buffer: uncompressedlength = $(b.uncompressedlength), n = $(length(b.data))"
@debug 2 b.data
return n + Base.write(io, b.data)
end
return 0
end
function writebuffer(io, col::Compressed, alignment)
@debug 1 "writebuffer: col = $(typeof(col))"
@debug 2 col
for buffer in col.buffers
n = writearray(io, buffer)
writezeros(io, paddinglength(n, alignment))
end
for child in col.children
writebuffer(io, child, alignment)
end
return
end