blob: 5e6710b91025c1a64722caf6a5c970956f7b59b6 [file] [log] [blame]
"""
Given a flatbuffers metadata type definition (a Field instance from Schema.fbs),
translate to the appropriate Julia storage eltype
"""
function juliaeltype end
finaljuliatype(T) = T
finaljuliatype(::Type{Missing}) = Missing
finaljuliatype(::Type{Union{T, Missing}}) where {T} = Union{Missing, finaljuliatype(T)}
"""
Given a FlatBuffers.Builder and a Julia column eltype,
Write the field.type flatbuffer definition
"""
function arrowtype end
arrowtype(b, col::AbstractVector{T}) where {T} = arrowtype(b, maybemissing(T))
arrowtype(b, col::DictEncoded) = arrowtype(b, col.encoding.data)
arrowtype(b, col::Compressed) = arrowtype(b, col.data)
function juliaeltype(f::Meta.Field, ::Nothing, convert::Bool)
T = juliaeltype(f, convert)
return convert ? finaljuliatype(T) : T
end
function juliaeltype(f::Meta.Field, meta::Dict{String, String}, convert::Bool)
TT = juliaeltype(f, convert)
!convert && return TT
T = finaljuliatype(TT)
TTT = ArrowTypes.extensiontype(meta)
return something(TTT, T)
end
function juliaeltype(f::Meta.Field, convert::Bool)
T = juliaeltype(f, f.type, convert)
return (f.nullable ? Union{T, Missing} : T)
end
juliaeltype(f::Meta.Field, ::Meta.Null, convert) = Missing
function arrowtype(b, ::Type{Missing})
Meta.nullStart(b)
return Meta.Null, Meta.nullEnd(b), nothing
end
function juliaeltype(f::Meta.Field, int::Meta.Int, convert)
if int.is_signed
if int.bitWidth == 8
Int8
elseif int.bitWidth == 16
Int16
elseif int.bitWidth == 32
Int32
elseif int.bitWidth == 64
Int64
elseif int.bitWidth == 128
Int128
else
throw(InvalidMetadataError("$int is not valid arrow type metadata"))
end
else
if int.bitWidth == 8
UInt8
elseif int.bitWidth == 16
UInt16
elseif int.bitWidth == 32
UInt32
elseif int.bitWidth == 64
UInt64
elseif int.bitWidth == 128
UInt128
else
throw(InvalidMetadataError("$int is not valid arrow type metadata"))
end
end
end
function arrowtype(b, ::Type{T}) where {T <: Integer}
Meta.intStart(b)
Meta.intAddBitWidth(b, Int32(8 * sizeof(T)))
Meta.intAddIsSigned(b, T <: Signed)
return Meta.Int, Meta.intEnd(b), nothing
end
# primitive types
function juliaeltype(f::Meta.Field, fp::Meta.FloatingPoint, convert)
if fp.precision == Meta.Precision.HALF
Float16
elseif fp.precision == Meta.Precision.SINGLE
Float32
elseif fp.precision == Meta.Precision.DOUBLE
Float64
end
end
function arrowtype(b, ::Type{T}) where {T <: AbstractFloat}
Meta.floatingPointStart(b)
Meta.floatingPointAddPrecision(b, T === Float16 ? Meta.Precision.HALF : T === Float32 ? Meta.Precision.SINGLE : Meta.Precision.DOUBLE)
return Meta.FloatingPoint, Meta.floatingPointEnd(b), nothing
end
juliaeltype(f::Meta.Field, b::Union{Meta.Utf8, Meta.LargeUtf8}, convert) = String
datasizeof(x) = sizeof(x)
datasizeof(x::AbstractVector) = sum(datasizeof, x)
juliaeltype(f::Meta.Field, b::Union{Meta.Binary, Meta.LargeBinary}, convert) = Vector{UInt8}
juliaeltype(f::Meta.Field, x::Meta.FixedSizeBinary, convert) = NTuple{Int(x.byteWidth), UInt8}
# arggh!
Base.write(io::IO, x::NTuple{N, T}) where {N, T} = sum(y -> Base.write(io, y), x)
juliaeltype(f::Meta.Field, x::Meta.Bool, convert) = Bool
function arrowtype(b, ::Type{Bool})
Meta.boolStart(b)
return Meta.Bool, Meta.boolEnd(b), nothing
end
struct Decimal{P, S}
value::Int128
end
Base.zero(::Type{Decimal{P, S}}) where {P, S} = Decimal{P, S}(Int128(0))
==(a::Decimal{P, S}, b::Decimal{P, S}) where {P, S} = ==(a.value, b.value)
Base.isequal(a::Decimal{P, S}, b::Decimal{P, S}) where {P, S} = isequal(a.value, b.value)
function juliaeltype(f::Meta.Field, x::Meta.Decimal, convert)
return Decimal{x.precision, x.scale}
end
ArrowTypes.ArrowType(::Type{<:Decimal}) = PrimitiveType()
function arrowtype(b, ::Type{Decimal{P, S}}) where {P, S}
Meta.decimalStart(b)
Meta.decimalAddPrecision(b, Int32(P))
Meta.decimalAddScale(b, Int32(S))
return Meta.Decimal, Meta.decimalEnd(b), nothing
end
Base.write(io::IO, x::Decimal) = Base.write(io, x.value)
abstract type ArrowTimeType end
Base.write(io::IO, x::ArrowTimeType) = Base.write(io, x.x)
ArrowTypes.ArrowType(::Type{<:ArrowTimeType}) = PrimitiveType()
struct Date{U, T} <: ArrowTimeType
x::T
end
Base.zero(::Type{Date{U, T}}) where {U, T} = Date{U, T}(T(0))
storagetype(::Type{Date{U, T}}) where {U, T} = T
bitwidth(x::Meta.DateUnit) = x == Meta.DateUnit.DAY ? Int32 : Int64
Date{Meta.DateUnit.DAY}(days) = Date{Meta.DateUnit.DAY, Int32}(Int32(days))
Date{Meta.DateUnit.MILLISECOND}(ms) = Date{Meta.DateUnit.MILLISECOND, Int64}(Int64(ms))
const DATE = Date{Meta.DateUnit.DAY, Int32}
const DATETIME = Date{Meta.DateUnit.MILLISECOND, Int64}
juliaeltype(f::Meta.Field, x::Meta.Date, convert) = Date{x.unit, bitwidth(x.unit)}
finaljuliatype(::Type{Date{Meta.DateUnit.DAY, Int32}}) = Dates.Date
Base.convert(::Type{Dates.Date}, x::Date{Meta.DateUnit.DAY, Int32}) = Dates.Date(Dates.UTD(Int64(x.x + UNIX_EPOCH_DATE)))
finaljuliatype(::Type{Date{Meta.DateUnit.MILLISECOND, Int64}}) = Dates.DateTime
Base.convert(::Type{Dates.DateTime}, x::Date{Meta.DateUnit.MILLISECOND, Int64}) = Dates.DateTime(Dates.UTM(Int64(x.x + UNIX_EPOCH_DATETIME)))
function arrowtype(b, ::Type{Date{U, T}}) where {U, T}
Meta.dateStart(b)
Meta.dateAddUnit(b, U)
return Meta.Date, Meta.dateEnd(b), nothing
end
const UNIX_EPOCH_DATE = Dates.value(Dates.Date(1970))
Base.convert(::Type{Date{Meta.DateUnit.DAY, Int32}}, x::Dates.Date) = Date{Meta.DateUnit.DAY, Int32}(Int32(Dates.value(x) - UNIX_EPOCH_DATE))
const UNIX_EPOCH_DATETIME = Dates.value(Dates.DateTime(1970))
Base.convert(::Type{Date{Meta.DateUnit.MILLISECOND, Int64}}, x::Dates.DateTime) = Date{Meta.DateUnit.MILLISECOND, Int64}(Int64(Dates.value(x) - UNIX_EPOCH_DATETIME))
struct Time{U, T} <: ArrowTimeType
x::T
end
Base.zero(::Type{Time{U, T}}) where {U, T} = Time{U, T}(T(0))
const TIME = Time{Meta.TimeUnit.NANOSECOND, Int64}
bitwidth(x::Meta.TimeUnit) = x == Meta.TimeUnit.SECOND || x == Meta.TimeUnit.MILLISECOND ? Int32 : Int64
Time{U}(x) where {U <: Meta.TimeUnit} = Time{U, bitwidth(U)}(bitwidth(U)(x))
storagetype(::Type{Time{U, T}}) where {U, T} = T
juliaeltype(f::Meta.Field, x::Meta.Time, convert) = Time{x.unit, bitwidth(x.unit)}
finaljuliatype(::Type{<:Time}) = Dates.Time
periodtype(U::Meta.TimeUnit) = U === Meta.TimeUnit.SECOND ? Dates.Second :
U === Meta.TimeUnit.MILLISECOND ? Dates.Millisecond :
U === Meta.TimeUnit.MICROSECOND ? Dates.Microsecond : Dates.Nanosecond
Base.convert(::Type{Dates.Time}, x::Time{U, T}) where {U, T} = Dates.Time(Dates.Nanosecond(Dates.tons(periodtype(U)(x.x))))
function arrowtype(b, ::Type{Time{U, T}}) where {U, T}
Meta.timeStart(b)
Meta.timeAddUnit(b, U)
Meta.timeAddBitWidth(b, Int32(8 * sizeof(T)))
return Meta.Time, Meta.timeEnd(b), nothing
end
Base.convert(::Type{Time{Meta.TimeUnit.NANOSECOND, Int64}}, x::Dates.Time) = Time{Meta.TimeUnit.NANOSECOND, Int64}(Dates.value(x))
struct Timestamp{U, TZ} <: ArrowTimeType
x::Int64
end
Base.zero(::Type{Timestamp{U, T}}) where {U, T} = Timestamp{U, T}(Int64(0))
function juliaeltype(f::Meta.Field, x::Meta.Timestamp, convert)
return Timestamp{x.unit, x.timezone === nothing ? nothing : Symbol(x.timezone)}
end
finaljuliatype(::Type{Timestamp{U, nothing}}) where {U} = Dates.DateTime
Base.convert(::Type{Dates.DateTime}, x::Timestamp{U, nothing}) where {U} =
Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME)))
function arrowtype(b, ::Type{Timestamp{U, TZ}}) where {U, TZ}
tz = TZ !== nothing ? FlatBuffers.createstring!(b, String(TZ)) : FlatBuffers.UOffsetT(0)
Meta.timestampStart(b)
Meta.timestampAddUnit(b, U)
Meta.timestampAddTimezone(b, tz)
return Meta.Timestamp, Meta.timestampEnd(b), nothing
end
struct Interval{U, T} <: ArrowTimeType
x::T
end
Base.zero(::Type{Interval{U, T}}) where {U, T} = Interval{U, T}(T(0))
bitwidth(x::Meta.IntervalUnit) = x == Meta.IntervalUnit.YEAR_MONTH ? Int32 : Int64
Interval{Meta.IntervalUnit.YEAR_MONTH}(x) = Interval{Meta.IntervalUnit.YEAR_MONTH, Int32}(Int32(x))
Interval{Meta.IntervalUnit.DAY_TIME}(x) = Interval{Meta.IntervalUnit.DAY_TIME, Int64}(Int64(x))
function juliaeltype(f::Meta.Field, x::Meta.Interval, convert)
return Interval{x.unit, bitwidth(x.unit)}
end
function arrowtype(b, ::Type{Interval{U, T}}) where {U, T}
Meta.intervalStart(b)
Meta.intervalAddUnit(b, U)
return Meta.Interval, Meta.intervalEnd(b), nothing
end
struct Duration{U} <: ArrowTimeType
x::Int64
end
Base.zero(::Type{Duration{U}}) where {U} = Duration{U}(Int64(0))
function juliaeltype(f::Meta.Field, x::Meta.Duration, convert)
return Duration{x.unit}
end
finaljuliatype(::Type{Duration{U}}) where {U} = periodtype(U)
Base.convert(::Type{P}, x::Duration{U}) where {P <: Dates.Period, U} = P(periodtype(U)(x.x))
function arrowtype(b, ::Type{Duration{U}}) where {U}
Meta.durationStart(b)
Meta.durationAddUnit(b, U)
return Meta.Duration, Meta.durationEnd(b), nothing
end
arrowperiodtype(P) = Meta.TimeUnit.SECOND
arrowperiodtype(::Type{Dates.Millisecond}) = Meta.TimeUnit.MILLISECOND
arrowperiodtype(::Type{Dates.Microsecond}) = Meta.TimeUnit.MICROSECOND
arrowperiodtype(::Type{Dates.Nanosecond}) = Meta.TimeUnit.NANOSECOND
Base.convert(::Type{Duration{U}}, x::Dates.Period) where {U} = Duration{U}(Dates.value(periodtype(U)(x)))
# nested types; call juliaeltype recursively on nested children
function juliaeltype(f::Meta.Field, list::Union{Meta.List, Meta.LargeList}, convert)
return Vector{juliaeltype(f.children[1], buildmetadata(f.children[1]), convert)}
end
# arrowtype will call fieldoffset recursively for children
function arrowtype(b, x::List{T, O, A}) where {T, O, A}
if eltype(A) == UInt8
if T == String || T == Union{String, Missing}
if O == Int32
Meta.utf8Start(b)
return Meta.Utf8, Meta.utf8End(b), nothing
else # if O == Int64
Meta.largUtf8Start(b)
return Meta.LargeUtf8, Meta.largUtf8End(b), nothing
end
else # if Vector{UInt8}
if O == Int32
Meta.binaryStart(b)
return Meta.Binary, Meta.binaryEnd(b), nothing
else # if O == Int64
Meta.largeBinaryStart(b)
return Meta.LargeBinary, Meta.largeBinaryEnd(b), nothing
end
end
else
children = [fieldoffset(b, "", x.data)]
if O == Int32
Meta.listStart(b)
return Meta.List, Meta.listEnd(b), children
else
Meta.largeListStart(b)
return Meta.LargeList, Meta.largeListEnd(b), children
end
end
end
function juliaeltype(f::Meta.Field, list::Meta.FixedSizeList, convert)
type = juliaeltype(f.children[1], buildmetadata(f.children[1]), convert)
return NTuple{Int(list.listSize), type}
end
function arrowtype(b, x::FixedSizeList{T, A}) where {T, A}
N = getN(Base.nonmissingtype(T))
if eltype(A) == UInt8
Meta.fixedSizeBinaryStart(b)
Meta.fixedSizeBinaryAddByteWidth(b, Int32(N))
return Meta.FixedSizeBinary, Meta.fixedSizeBinaryEnd(b), nothing
else
children = [fieldoffset(b, "", x.data)]
Meta.fixedSizeListStart(b)
Meta.fixedSizeListAddListSize(b, Int32(N))
return Meta.FixedSizeList, Meta.fixedSizeListEnd(b), children
end
end
function juliaeltype(f::Meta.Field, map::Meta.Map, convert)
K = juliaeltype(f.children[1].children[1], buildmetadata(f.children[1].children[1]), convert)
V = juliaeltype(f.children[1].children[2], buildmetadata(f.children[1].children[2]), convert)
return Dict{K, V}
end
function arrowtype(b, x::Map)
children = [fieldoffset(b, "entries", x.data)]
Meta.mapStart(b)
return Meta.Map, Meta.mapEnd(b), children
end
struct KeyValue{K, V}
key::K
value::V
end
keyvalueK(::Type{KeyValue{K, V}}) where {K, V} = K
keyvalueV(::Type{KeyValue{K, V}}) where {K, V} = V
Base.length(kv::KeyValue) = 1
Base.iterate(kv::KeyValue, st=1) = st === nothing ? nothing : (kv, nothing)
ArrowTypes.default(::Type{KeyValue{K, V}}) where {K, V} = KeyValue(default(K), default(V))
getnames(::Type{KeyValue{K, V}}) where {K, V} = (:key, :value)
function arrowtype(b, ::Type{KeyValue{K, V}}) where {K, V}
children = [fieldoffset(b, "key", K), fieldoffset(b, "value", V)]
Meta.structStart(b)
return Meta.Struct, Meta.structEnd(b), children
end
function juliaeltype(f::Meta.Field, list::Meta.Struct, convert)
names = Tuple(Symbol(x.name) for x in f.children)
types = Tuple(juliaeltype(x, buildmetadata(x), convert) for x in f.children)
return NamedTuple{names, Tuple{types...}}
end
function arrowtype(b, x::Struct{T, S}) where {T, S}
names = getnames(Base.nonmissingtype(T))
children = [fieldoffset(b, names[i], x.data[i]) for i = 1:length(names)]
Meta.structStart(b)
return Meta.Struct, Meta.structEnd(b), children
end
# Unions
function juliaeltype(f::Meta.Field, u::Meta.Union, convert)
return UnionT{u.mode, u.typeIds !== nothing ? Tuple(u.typeIds) : u.typeIds, Tuple{(juliaeltype(x, buildmetadata(x), convert) for x in f.children)...}}
end
arrowtype(b, x::Union{DenseUnion{TT, S}, SparseUnion{TT, S}}) where {TT, S} = arrowtype(b, TT, x)
function arrowtype(b, ::Type{UnionT{T, typeIds, U}}, x::Union{DenseUnion{TT, S}, SparseUnion{TT, S}}) where {T, typeIds, U, TT, S}
if typeIds !== nothing
Meta.unionStartTypeIdsVector(b, length(typeIds))
for id in Iterators.reverse(typeIds)
FlatBuffers.prepend!(b, id)
end
TI = FlatBuffers.endvector!(b, length(typeIds))
end
children = [fieldoffset(b, "", x.data[i]) for i = 1:fieldcount(U)]
Meta.unionStart(b)
Meta.unionAddMode(b, T)
if typeIds !== nothing
Meta.unionAddTypeIds(b, TI)
end
return Meta.Union, Meta.unionEnd(b), children
end