blob: a6a9065276bf396a081f58828bbe5f55bbd42975 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package schema
import (
"encoding/json"
"fmt"
"math"
"github.com/apache/arrow/go/v6/parquet"
"github.com/apache/arrow/go/v6/parquet/internal/debug"
format "github.com/apache/arrow/go/v6/parquet/internal/gen-go/parquet"
)
// DecimalMetadata is a struct for managing scale and precision information between
// converted and logical types.
type DecimalMetadata struct {
IsSet bool
Scale int32
Precision int32
}
func getLogicalType(l *format.LogicalType) LogicalType {
switch {
case l.IsSetSTRING():
return StringLogicalType{}
case l.IsSetMAP():
return MapLogicalType{}
case l.IsSetLIST():
return ListLogicalType{}
case l.IsSetENUM():
return EnumLogicalType{}
case l.IsSetDECIMAL():
return &DecimalLogicalType{typ: l.DECIMAL}
case l.IsSetDATE():
return DateLogicalType{}
case l.IsSetTIME():
if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
}
return &TimeLogicalType{typ: l.TIME}
case l.IsSetTIMESTAMP():
if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
}
return &TimestampLogicalType{typ: l.TIMESTAMP}
case l.IsSetINTEGER():
return &IntLogicalType{typ: l.INTEGER}
case l.IsSetUNKNOWN():
return NullLogicalType{}
case l.IsSetJSON():
return JSONLogicalType{}
case l.IsSetBSON():
return BSONLogicalType{}
case l.IsSetUUID():
return UUIDLogicalType{}
case l == nil:
return NoLogicalType{}
default:
panic("invalid logical type")
}
}
// TimeUnitType is an enum for denoting whether a time based logical type
// is using milliseconds, microseconds or nanoseconds.
type TimeUnitType int
// Constants for the TimeUnitType
const (
TimeUnitMillis TimeUnitType = iota
TimeUnitMicros
TimeUnitNanos
TimeUnitUnknown
)
// LogicalType is the descriptor that defines the usage of a physical primitive
// type in the schema, such as an Interval, Date, etc.
type LogicalType interface {
// Returns true if a nested type like List or Map
IsNested() bool
// Returns true if this type can be serialized, ie: not Unknown/NoType/Interval
IsSerialized() bool
// Returns true if not NoLogicalType
IsValid() bool
// Returns true if it is NoType
IsNone() bool
// returns a string representation of the Logical Type
String() string
toThrift() *format.LogicalType
// Return the equivalent ConvertedType for legacy Parquet systems
ToConvertedType() (ConvertedType, DecimalMetadata)
// Returns true if the specified ConvertedType is compatible with this
// logical type
IsCompatible(ConvertedType, DecimalMetadata) bool
// Returns true if this logical type can be used with the provided physical type
IsApplicable(t parquet.Type, tlen int32) bool
// Returns true if the logical types are the same
Equals(LogicalType) bool
// Returns the default stat sort order for this logical type
SortOrder() SortOrder
}
// TemporalLogicalType is a smaller interface for Time based logical types
// like Time / Timestamp
type TemporalLogicalType interface {
LogicalType
IsAdjustedToUTC() bool
TimeUnit() TimeUnitType
}
// SortOrder mirrors the parquet.thrift sort order type
type SortOrder int8
// Constants for the Stat sort order definitions
const (
SortSIGNED SortOrder = iota
SortUNSIGNED
SortUNKNOWN
)
// DefaultSortOrder returns the default stat sort order for the given physical type
func DefaultSortOrder(primitive format.Type) SortOrder {
switch primitive {
case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE:
return SortSIGNED
case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY:
return SortUNSIGNED
case format.Type_INT96:
fallthrough
default:
return SortUNKNOWN
}
}
// GetLogicalSortOrder returns the default sort order for this logical type
// or falls back to the default sort order for the physical type if not valid
func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder {
switch {
case logical == nil || !logical.IsValid():
return SortUNKNOWN
case logical.Equals(NoLogicalType{}):
return DefaultSortOrder(primitive)
default:
return logical.SortOrder()
}
}
type baseLogicalType struct{}
func (baseLogicalType) IsSerialized() bool {
return true
}
func (baseLogicalType) IsValid() bool {
return true
}
func (baseLogicalType) IsNested() bool {
return false
}
func (baseLogicalType) IsNone() bool { return false }
// StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
type StringLogicalType struct{ baseLogicalType }
func (StringLogicalType) SortOrder() SortOrder {
return SortUNSIGNED
}
func (StringLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()})
}
func (StringLogicalType) String() string {
return "String"
}
func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.UTF8, DecimalMetadata{}
}
func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
return t == ConvertedTypes.UTF8 && !dec.IsSet
}
func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
return t == parquet.Types.ByteArray
}
func (StringLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{STRING: format.NewStringType()}
}
func (StringLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(StringLogicalType)
return ok
}
// MapLogicalType represents a mapped type
type MapLogicalType struct{ baseLogicalType }
func (MapLogicalType) SortOrder() SortOrder {
return SortUNKNOWN
}
func (MapLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()})
}
func (MapLogicalType) String() string {
return "Map"
}
func (MapLogicalType) IsNested() bool {
return true
}
func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.Map, DecimalMetadata{}
}
func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet
}
func (MapLogicalType) IsApplicable(parquet.Type, int32) bool {
return false
}
func (MapLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{MAP: format.NewMapType()}
}
func (MapLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(MapLogicalType)
return ok
}
func NewListLogicalType() LogicalType {
return ListLogicalType{}
}
// ListLogicalType is used for columns which are themselves nested lists
type ListLogicalType struct{ baseLogicalType }
func (ListLogicalType) SortOrder() SortOrder {
return SortUNKNOWN
}
func (ListLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()})
}
func (ListLogicalType) String() string {
return "List"
}
func (ListLogicalType) IsNested() bool {
return true
}
func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.List, DecimalMetadata{}
}
func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
return t == ConvertedTypes.List && !dec.IsSet
}
func (ListLogicalType) IsApplicable(parquet.Type, int32) bool {
return false
}
func (ListLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{LIST: format.NewListType()}
}
func (ListLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(ListLogicalType)
return ok
}
// EnumLogicalType is for representing an enum, which should be a byte array type
type EnumLogicalType struct{ baseLogicalType }
func (EnumLogicalType) SortOrder() SortOrder {
return SortUNSIGNED
}
func (EnumLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()})
}
func (EnumLogicalType) String() string {
return "Enum"
}
func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.Enum, DecimalMetadata{}
}
func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
return t == ConvertedTypes.Enum && !dec.IsSet
}
func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
return t == parquet.Types.ByteArray
}
func (EnumLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{ENUM: format.NewEnumType()}
}
func (EnumLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(EnumLogicalType)
return ok
}
// NewDecimalLogicalType returns a Decimal logical type with the given
// precision and scale.
//
// Panics if precision < 1 or scale is not in the range (0, precision)
func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
if precision < 1 {
panic("parquet: precision must be greater than or equal to 1 for decimal logical type")
}
if scale < 0 || scale > precision {
panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
}
return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
}
// DecimalLogicalType is used to represent a decimal value of a given
// precision and scale
type DecimalLogicalType struct {
baseLogicalType
typ *format.DecimalType
}
func (t DecimalLogicalType) Precision() int32 {
return t.typ.Precision
}
func (t DecimalLogicalType) Scale() int32 {
return t.typ.Scale
}
func (DecimalLogicalType) SortOrder() SortOrder {
return SortSIGNED
}
func (t DecimalLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale})
}
func (t DecimalLogicalType) String() string {
return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale)
}
func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()}
}
func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
return c == ConvertedTypes.Decimal &&
dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision
}
func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool {
switch typ {
case parquet.Types.Int32:
return 1 <= t.typ.Precision && t.typ.Precision <= 9
case parquet.Types.Int64:
if t.typ.Precision < 10 {
debug.Log("int64 used for decimal logical, precision is small enough to use int32")
}
return 1 <= t.typ.Precision && t.typ.Precision <= 18
case parquet.Types.FixedLenByteArray:
return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0)))))
case parquet.Types.ByteArray:
return true
}
return false
}
func (t DecimalLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{DECIMAL: t.typ}
}
func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
other, ok := rhs.(*DecimalLogicalType)
if !ok {
return false
}
return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale
}
// DateLogicalType is an int32 representing the number of days since the Unix Epoch
// 1 January 1970
type DateLogicalType struct{ baseLogicalType }
func (DateLogicalType) SortOrder() SortOrder {
return SortSIGNED
}
func (DateLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()})
}
func (DateLogicalType) String() string {
return "Date"
}
func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.Date, DecimalMetadata{}
}
func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
return t == ConvertedTypes.Date && !dec.IsSet
}
func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
return t == parquet.Types.Int32
}
func (DateLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{DATE: format.NewDateType()}
}
func (DateLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(DateLogicalType)
return ok
}
func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType {
switch {
case unit == nil:
return TimeUnitUnknown
case unit.IsSetMILLIS():
return TimeUnitMillis
case unit.IsSetMICROS():
return TimeUnitMicros
case unit.IsSetNANOS():
return TimeUnitNanos
default:
return TimeUnitUnknown
}
}
func timeUnitToString(unit *format.TimeUnit) string {
switch {
case unit == nil:
return "unknown"
case unit.IsSetMILLIS():
return "milliseconds"
case unit.IsSetMICROS():
return "microseconds"
case unit.IsSetNANOS():
return "nanoseconds"
default:
return "unknown"
}
}
func timeUnitFromString(v string) TimeUnitType {
switch v {
case "millis":
return TimeUnitMillis
case "micros":
return TimeUnitMicros
case "nanos":
return TimeUnitNanos
default:
return TimeUnitUnknown
}
}
func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
tunit := format.NewTimeUnit()
switch unit {
case TimeUnitMicros:
tunit.MICROS = format.NewMicroSeconds()
case TimeUnitMillis:
tunit.MILLIS = format.NewMilliSeconds()
case TimeUnitNanos:
tunit.NANOS = format.NewNanoSeconds()
default:
panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type")
}
return tunit
}
// NewTimeLogicalType returns a time type of the given unit.
func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
return &TimeLogicalType{typ: &format.TimeType{
IsAdjustedToUTC: isAdjustedToUTC,
Unit: createTimeUnit(unit),
}}
}
// TimeLogicalType is a time type without a date and must be an
// int32 for milliseconds, or an int64 for micro or nano seconds.
type TimeLogicalType struct {
baseLogicalType
typ *format.TimeType
}
func (t TimeLogicalType) IsAdjustedToUTC() bool {
return t.typ.IsAdjustedToUTC
}
func (t TimeLogicalType) TimeUnit() TimeUnitType {
return timeUnitFromThrift(t.typ.Unit)
}
func (TimeLogicalType) SortOrder() SortOrder {
return SortSIGNED
}
func (t TimeLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
"Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())})
}
func (t TimeLogicalType) String() string {
return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()))
}
func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
unit := timeUnitFromThrift(t.typ.Unit)
if t.typ.IsAdjustedToUTC {
switch unit {
case TimeUnitMillis:
return ConvertedTypes.TimeMillis, DecimalMetadata{}
case TimeUnitMicros:
return ConvertedTypes.TimeMicros, DecimalMetadata{}
}
}
return ConvertedTypes.None, DecimalMetadata{}
}
func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
if dec.IsSet {
return false
}
unit := timeUnitFromThrift(t.typ.Unit)
if t.typ.IsAdjustedToUTC {
switch unit {
case TimeUnitMillis:
return c == ConvertedTypes.TimeMillis
case TimeUnitMicros:
return c == ConvertedTypes.TimeMicros
}
}
return c == ConvertedTypes.None || c == ConvertedTypes.NA
}
func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) ||
(typ == parquet.Types.Int64 &&
(t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS()))
}
func (t TimeLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{TIME: t.typ}
}
func (t TimeLogicalType) Equals(rhs LogicalType) bool {
other, ok := rhs.(*TimeLogicalType)
if !ok {
return false
}
return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
}
// NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
// set to false
func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
return &TimestampLogicalType{
typ: &format.TimestampType{
IsAdjustedToUTC: isAdjustedToUTC,
Unit: createTimeUnit(unit),
},
forceConverted: false,
fromConverted: false,
}
}
// NewTimestampLogicalTypeForce returns a timestamp logical type with
// "forceConverted" set to true
func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
return &TimestampLogicalType{
typ: &format.TimestampType{
IsAdjustedToUTC: isAdjustedToUTC,
Unit: createTimeUnit(unit),
},
forceConverted: true,
fromConverted: false,
}
}
// TimestampLogicalType represents an int64 number that can be decoded
// into a year, month, day, hour, minute, second, and subsecond
type TimestampLogicalType struct {
baseLogicalType
typ *format.TimestampType
// forceConverted denotes whether or not the resulting serialized
// type when writing to parquet will be written as the legacy
// ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true)
// or if it will write the proper current Logical Types (false, default)
forceConverted bool
// fromConverted denotes if the timestamp type was created by
// translating a legacy converted type of TIMESTAMP_MILLIS or
// TIMESTAMP_MICROS rather than by using the current logical
// types. Default is false.
fromConverted bool
}
func (t TimestampLogicalType) IsFromConvertedType() bool {
return t.fromConverted
}
func (t TimestampLogicalType) IsAdjustedToUTC() bool {
return t.typ.IsAdjustedToUTC
}
func (t TimestampLogicalType) TimeUnit() TimeUnitType {
return timeUnitFromThrift(t.typ.Unit)
}
func (TimestampLogicalType) SortOrder() SortOrder {
return SortSIGNED
}
func (t TimestampLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
"Type": "Timestamp",
"isAdjustedToUTC": t.typ.IsAdjustedToUTC,
"timeUnit": timeUnitToString(t.typ.GetUnit()),
"is_from_converted_type": t.fromConverted,
"force_set_converted_type": t.forceConverted,
})
}
func (t TimestampLogicalType) IsSerialized() bool {
return !t.fromConverted
}
func (t TimestampLogicalType) String() string {
return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)",
t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted)
}
func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
unit := timeUnitFromThrift(t.typ.Unit)
if t.typ.IsAdjustedToUTC || t.forceConverted {
switch unit {
case TimeUnitMillis:
return ConvertedTypes.TimestampMillis, DecimalMetadata{}
case TimeUnitMicros:
return ConvertedTypes.TimestampMicros, DecimalMetadata{}
}
}
return ConvertedTypes.None, DecimalMetadata{}
}
func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
if dec.IsSet {
return false
}
switch timeUnitFromThrift(t.typ.Unit) {
case TimeUnitMillis:
if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
return c == ConvertedTypes.TimestampMillis
}
case TimeUnitMicros:
if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
return c == ConvertedTypes.TimestampMicros
}
}
return c == ConvertedTypes.None || c == ConvertedTypes.NA
}
func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
return t == parquet.Types.Int64
}
func (t TimestampLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{TIMESTAMP: t.typ}
}
func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
other, ok := rhs.(*TimestampLogicalType)
if !ok {
return false
}
return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
}
// NewIntLogicalType creates an integer logical type of the desired bitwidth
// and whether it is signed or not.
//
// Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
switch bitWidth {
case 8, 16, 32, 64:
default:
panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
}
return &IntLogicalType{
typ: &format.IntType{
BitWidth: bitWidth,
IsSigned: signed,
},
}
}
// IntLogicalType represents an integer type of a specific bit width and
// is either signed or unsigned.
type IntLogicalType struct {
baseLogicalType
typ *format.IntType
}
func (t IntLogicalType) BitWidth() int8 {
return t.typ.BitWidth
}
func (t IntLogicalType) IsSigned() bool {
return t.typ.IsSigned
}
func (t IntLogicalType) SortOrder() SortOrder {
if t.typ.IsSigned {
return SortSIGNED
}
return SortUNSIGNED
}
func (t IntLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]interface{}{
"Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned,
})
}
func (t IntLogicalType) String() string {
return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned())
}
func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
var d DecimalMetadata
if t.typ.IsSigned {
switch t.typ.BitWidth {
case 8:
return ConvertedTypes.Int8, d
case 16:
return ConvertedTypes.Int16, d
case 32:
return ConvertedTypes.Int32, d
case 64:
return ConvertedTypes.Int64, d
}
} else {
switch t.typ.BitWidth {
case 8:
return ConvertedTypes.Uint8, d
case 16:
return ConvertedTypes.Uint16, d
case 32:
return ConvertedTypes.Uint32, d
case 64:
return ConvertedTypes.Uint64, d
}
}
return ConvertedTypes.None, d
}
func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
if dec.IsSet {
return false
}
v, _ := t.ToConvertedType()
return c == v
}
func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) ||
(typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64)
}
func (t IntLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{INTEGER: t.typ}
}
func (t IntLogicalType) Equals(rhs LogicalType) bool {
other, ok := rhs.(*IntLogicalType)
if !ok {
return false
}
return t.typ.GetIsSigned() == other.typ.GetIsSigned() &&
t.typ.GetBitWidth() == other.typ.GetBitWidth()
}
// UnknownLogicalType is a type that is essentially a placeholder for when
// we don't know the type.
type UnknownLogicalType struct{ baseLogicalType }
func (UnknownLogicalType) SortOrder() SortOrder {
return SortUNKNOWN
}
func (UnknownLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()})
}
func (UnknownLogicalType) IsValid() bool { return false }
func (UnknownLogicalType) IsSerialized() bool { return false }
func (UnknownLogicalType) String() string {
return "Unknown"
}
func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.NA, DecimalMetadata{}
}
func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
return c == ConvertedTypes.NA && !dec.IsSet
}
func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true }
func (UnknownLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{UNKNOWN: format.NewNullType()}
}
func (UnknownLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(UnknownLogicalType)
return ok
}
// JSONLogicalType represents a byte array column which is to be interpreted
// as a JSON string.
type JSONLogicalType struct{ baseLogicalType }
func (JSONLogicalType) SortOrder() SortOrder {
return SortUNSIGNED
}
func (JSONLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()})
}
func (JSONLogicalType) String() string {
return "JSON"
}
func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.JSON, DecimalMetadata{}
}
func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
return c == ConvertedTypes.JSON && !dec.IsSet
}
func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
return t == parquet.Types.ByteArray
}
func (JSONLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{JSON: format.NewJsonType()}
}
func (JSONLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(JSONLogicalType)
return ok
}
// BSONLogicalType represents a binary JSON string in the byte array
type BSONLogicalType struct{ baseLogicalType }
func (BSONLogicalType) SortOrder() SortOrder {
return SortUNSIGNED
}
func (BSONLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()})
}
func (BSONLogicalType) String() string {
return "BSON"
}
func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.BSON, DecimalMetadata{}
}
func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
return c == ConvertedTypes.BSON && !dec.IsSet
}
func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
return t == parquet.Types.ByteArray
}
func (BSONLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{BSON: format.NewBsonType()}
}
func (BSONLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(BSONLogicalType)
return ok
}
// UUIDLogicalType can only be used with a FixedLength byte array column
// that is exactly 16 bytes long
type UUIDLogicalType struct{ baseLogicalType }
func (UUIDLogicalType) SortOrder() SortOrder {
return SortUNSIGNED
}
func (UUIDLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()})
}
func (UUIDLogicalType) String() string {
return "UUID"
}
func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.None, DecimalMetadata{}
}
func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
if dec.IsSet {
return false
}
switch c {
case ConvertedTypes.None, ConvertedTypes.NA:
return true
}
return false
}
func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
return t == parquet.Types.FixedLenByteArray && tlen == 16
}
func (UUIDLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{UUID: format.NewUUIDType()}
}
func (UUIDLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(UUIDLogicalType)
return ok
}
// IntervalLogicalType is not yet in the thrift spec, but represents
// an interval time and needs to be a fixed length byte array of 12 bytes
type IntervalLogicalType struct{ baseLogicalType }
func (IntervalLogicalType) SortOrder() SortOrder {
return SortUNKNOWN
}
func (IntervalLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()})
}
func (IntervalLogicalType) String() string {
return "Interval"
}
func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.Interval, DecimalMetadata{}
}
func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
return c == ConvertedTypes.Interval && !dec.IsSet
}
func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
return t == parquet.Types.FixedLenByteArray && tlen == 12
}
func (IntervalLogicalType) toThrift() *format.LogicalType {
panic("no parquet IntervalLogicalType yet implemented")
}
func (IntervalLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(IntervalLogicalType)
return ok
}
type NullLogicalType struct{ baseLogicalType }
func (NullLogicalType) SortOrder() SortOrder {
return SortUNKNOWN
}
func (NullLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()})
}
func (NullLogicalType) String() string {
return "Null"
}
func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.None, DecimalMetadata{}
}
func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
if dec.IsSet {
return false
}
switch c {
case ConvertedTypes.None, ConvertedTypes.NA:
return true
}
return false
}
func (NullLogicalType) IsApplicable(parquet.Type, int32) bool {
return true
}
func (NullLogicalType) toThrift() *format.LogicalType {
return &format.LogicalType{UNKNOWN: format.NewNullType()}
}
func (NullLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(NullLogicalType)
return ok
}
type NoLogicalType struct{ baseLogicalType }
func (NoLogicalType) SortOrder() SortOrder {
return SortUNKNOWN
}
func (NoLogicalType) MarshalJSON() ([]byte, error) {
return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()})
}
func (NoLogicalType) IsSerialized() bool { return false }
func (NoLogicalType) String() string {
return "None"
}
func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
return ConvertedTypes.None, DecimalMetadata{}
}
func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
return c == ConvertedTypes.None && !dec.IsSet
}
func (NoLogicalType) IsApplicable(parquet.Type, int32) bool {
return true
}
func (NoLogicalType) toThrift() *format.LogicalType {
panic("cannot convert NoLogicalType to thrift")
}
func (NoLogicalType) Equals(rhs LogicalType) bool {
_, ok := rhs.(NoLogicalType)
return ok
}
func (NoLogicalType) IsNone() bool { return true }