blob: 9f87491aad9709a67895fd95a8a7373b6d8f5111 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scalar
import (
"encoding/binary"
"fmt"
"hash/maphash"
"math"
"math/big"
"reflect"
"strconv"
"unsafe"
"github.com/apache/arrow/go/v6/arrow"
"github.com/apache/arrow/go/v6/arrow/array"
"github.com/apache/arrow/go/v6/arrow/bitutil"
"github.com/apache/arrow/go/v6/arrow/decimal128"
"github.com/apache/arrow/go/v6/arrow/endian"
"github.com/apache/arrow/go/v6/arrow/float16"
"github.com/apache/arrow/go/v6/arrow/internal/debug"
"github.com/apache/arrow/go/v6/arrow/memory"
"golang.org/x/xerrors"
)
// Scalar represents a single value of a specific DataType as opposed to
// an array.
//
// Scalars are useful for passing single value inputs to compute functions
// (not yet implemented) or for representing individual array elements,
// (with a non-trivial cost though).
type Scalar interface {
fmt.Stringer
// IsValid returns true if the value is non-null, otherwise false.
IsValid() bool
// The datatype of the value in this scalar
DataType() arrow.DataType
// Performs cheap validation checks, returns nil if successful
Validate() error
// Perform more expensive validation checks, returns nil if successful
ValidateFull() error
// Cast the value to the desired DataType (returns an error if unable to do so)
// should take semantics into account and modify the value accordingly.
CastTo(arrow.DataType) (Scalar, error)
// internal only functions for delegation
value() interface{}
equals(Scalar) bool
//TODO(zeroshade): approxEquals
}
type Releasable interface {
Release()
Retain()
}
func validateOptional(s *scalar, value interface{}, valueDesc string) error {
if s.Valid && value == nil {
return xerrors.Errorf("%s scalar is marked valid but doesn't have a %s", s.Type, valueDesc)
}
if !s.Valid && value != nil && !reflect.ValueOf(value).IsNil() {
return xerrors.Errorf("%s scalar is marked null but has a %s", s.Type, valueDesc)
}
return nil
}
type scalar struct {
Type arrow.DataType
Valid bool
}
func (s *scalar) String() string {
if !s.Valid {
return "null"
}
return "..."
}
func (s *scalar) IsValid() bool { return s.Valid }
func (s *scalar) Validate() error {
if s.Type == nil {
return xerrors.New("scalar lacks a type")
}
return nil
}
func (s *scalar) ValidateFull() error {
return s.Validate()
}
func (s scalar) DataType() arrow.DataType { return s.Type }
type Null struct {
scalar
}
// by the time we get here we already know that the rhs is the right type
func (n *Null) equals(s Scalar) bool {
debug.Assert(s.DataType().ID() == arrow.NULL, "scalar null equals should only receive null")
return true
}
func (n *Null) value() interface{} { return nil }
func (n *Null) CastTo(dt arrow.DataType) (Scalar, error) {
return MakeNullScalar(dt), nil
}
func (n *Null) Validate() (err error) {
err = n.scalar.Validate()
if err != nil {
return
}
if n.Valid {
err = xerrors.New("null scalar should have Valid = false")
}
return
}
func (n *Null) ValidateFull() error { return n.Validate() }
var (
ScalarNull *Null = &Null{scalar{Type: arrow.Null, Valid: false}}
)
type PrimitiveScalar interface {
Scalar
Data() []byte
}
type Boolean struct {
scalar
Value bool
}
// by the time we get here we already know that the rhs is the right type
func (n *Boolean) equals(rhs Scalar) bool {
return n.Value == rhs.(*Boolean).Value
}
func (s *Boolean) value() interface{} { return s.Value }
func (s *Boolean) Data() []byte {
return (*[1]byte)(unsafe.Pointer(&s.Value))[:]
}
func (s *Boolean) String() string {
if !s.Valid {
return "null"
}
val, err := s.CastTo(arrow.BinaryTypes.String)
if err != nil {
return "..."
}
return string(val.(*String).Value.Bytes())
}
func (s *Boolean) CastTo(dt arrow.DataType) (Scalar, error) {
if !s.Valid {
return MakeNullScalar(dt), nil
}
if dt.ID() == arrow.STRING {
return NewStringScalar(strconv.FormatBool(s.Value)), nil
}
val := 0
if s.Value {
val = 1
}
switch dt.ID() {
case arrow.UINT8:
return NewUint8Scalar(uint8(val)), nil
case arrow.INT8:
return NewInt8Scalar(int8(val)), nil
case arrow.UINT16:
return NewUint16Scalar(uint16(val)), nil
case arrow.INT16:
return NewInt16Scalar(int16(val)), nil
case arrow.UINT32:
return NewUint32Scalar(uint32(val)), nil
case arrow.INT32:
return NewInt32Scalar(int32(val)), nil
case arrow.UINT64:
return NewUint64Scalar(uint64(val)), nil
case arrow.INT64:
return NewInt64Scalar(int64(val)), nil
case arrow.FLOAT16:
return NewFloat16Scalar(float16.New(float32(val))), nil
case arrow.FLOAT32:
return NewFloat32Scalar(float32(val)), nil
case arrow.FLOAT64:
return NewFloat64Scalar(float64(val)), nil
default:
return nil, xerrors.Errorf("invalid scalar cast from type bool to type %s", dt)
}
}
func NewBooleanScalar(val bool) *Boolean {
return &Boolean{scalar{arrow.FixedWidthTypes.Boolean, true}, val}
}
type Float16 struct {
scalar
Value float16.Num
}
func (s *Float16) value() interface{} { return s.Value }
func (f *Float16) Data() []byte {
return (*[arrow.Float16SizeBytes]byte)(unsafe.Pointer(&f.Value))[:]
}
func (f *Float16) equals(rhs Scalar) bool {
return f.Value == rhs.(*Float16).Value
}
func (f *Float16) CastTo(to arrow.DataType) (Scalar, error) {
if !f.Valid {
return MakeNullScalar(to), nil
}
if r, ok := numericMap[to.ID()]; ok {
return convertToNumeric(reflect.ValueOf(f.Value.Float32()), r.valueType, r.scalarFunc), nil
}
if to.ID() == arrow.BOOL {
return NewBooleanScalar(f.Value.Uint16() != 0), nil
} else if to.ID() == arrow.STRING {
return NewStringScalar(f.Value.String()), nil
}
return nil, xerrors.Errorf("cannot cast non-null float16 scalar to type %s", to)
}
func (s *Float16) String() string {
if !s.Valid {
return "null"
}
val, err := s.CastTo(arrow.BinaryTypes.String)
if err != nil {
return "..."
}
return string(val.(*String).Value.Bytes())
}
func NewFloat16ScalarFromFloat32(val float32) *Float16 {
return NewFloat16Scalar(float16.New(val))
}
func NewFloat16Scalar(val float16.Num) *Float16 {
return &Float16{scalar{arrow.FixedWidthTypes.Float16, true}, val}
}
type Decimal128 struct {
scalar
Value decimal128.Num
}
func (s *Decimal128) value() interface{} { return s.Value }
func (s *Decimal128) String() string {
if !s.Valid {
return "null"
}
val, err := s.CastTo(arrow.BinaryTypes.String)
if err != nil {
return "..."
}
return string(val.(*String).Value.Bytes())
}
func (s *Decimal128) equals(rhs Scalar) bool {
return s.Value == rhs.(*Decimal128).Value
}
func (s *Decimal128) CastTo(to arrow.DataType) (Scalar, error) {
if !s.Valid {
return MakeNullScalar(to), nil
}
switch to.ID() {
case arrow.DECIMAL128:
return NewDecimal128Scalar(s.Value, to), nil
case arrow.STRING:
dt := s.Type.(*arrow.Decimal128Type)
scale := big.NewFloat(math.Pow10(int(dt.Scale)))
val := (&big.Float{}).SetInt(s.Value.BigInt())
return NewStringScalar(val.Quo(val, scale).Text('g', int(dt.Precision))), nil
}
return nil, xerrors.Errorf("cannot cast non-nil decimal128 scalar to type %s", to)
}
func NewDecimal128Scalar(val decimal128.Num, typ arrow.DataType) *Decimal128 {
return &Decimal128{scalar{typ, true}, val}
}
type Extension struct {
scalar
Value Scalar
}
func (s *Extension) value() interface{} { return s.Value }
func (s *Extension) equals(rhs Scalar) bool {
return Equals(s.Value, rhs.(*Extension).Value)
}
func (e *Extension) Validate() (err error) {
if err = e.scalar.Validate(); err != nil {
return err
}
if !e.Valid {
if e.Value != nil {
err = xerrors.Errorf("null %s scalar has storage value", e.Type)
}
return
}
switch {
case e.Value == nil:
err = xerrors.Errorf("non-null %s scalar doesn't have a storage value", e.Type)
case !e.Value.IsValid():
err = xerrors.Errorf("non-null %s scalar has a null storage value", e.Type)
default:
if err = e.Value.Validate(); err != nil {
err = xerrors.Errorf("%s scalar fails validation for storage value: %w", e.Type, err)
}
}
return
}
func (e *Extension) ValidateFull() error {
if err := e.Validate(); err != nil {
return err
}
if e.Valid {
return e.Value.ValidateFull()
}
return nil
}
func (s *Extension) CastTo(to arrow.DataType) (Scalar, error) {
if !s.Valid {
return MakeNullScalar(to), nil
}
if arrow.TypeEqual(s.Type, to) {
return s, nil
}
return nil, xerrors.Errorf("cannot cast non-null extension scalar of type %s to type %s", s.Type, to)
}
func (s *Extension) String() string {
if !s.Valid {
return "null"
}
val, err := s.CastTo(arrow.BinaryTypes.String)
if err != nil {
return "..."
}
return string(val.(*String).Value.Bytes())
}
func NewExtensionScalar(storage Scalar, typ arrow.DataType) *Extension {
return &Extension{scalar{typ, true}, storage}
}
func convertToNumeric(v reflect.Value, to reflect.Type, fn reflect.Value) Scalar {
return fn.Call([]reflect.Value{v.Convert(to)})[0].Interface().(Scalar)
}
// MakeNullScalar creates a scalar value of the desired type representing a null value
func MakeNullScalar(dt arrow.DataType) Scalar {
return makeNullFn[byte(dt.ID()&0x3f)](dt)
}
func unsupportedScalarType(dt arrow.DataType) Scalar {
panic("unsupported scalar data type: " + dt.ID().String())
}
func invalidScalarType(dt arrow.DataType) Scalar {
panic("invalid scalar type: " + dt.ID().String())
}
type scalarMakeNullFn func(arrow.DataType) Scalar
var makeNullFn [64]scalarMakeNullFn
func init() {
makeNullFn = [...]scalarMakeNullFn{
arrow.NULL: func(dt arrow.DataType) Scalar { return ScalarNull },
arrow.BOOL: func(dt arrow.DataType) Scalar { return &Boolean{scalar: scalar{dt, false}} },
arrow.UINT8: func(dt arrow.DataType) Scalar { return &Uint8{scalar: scalar{dt, false}} },
arrow.INT8: func(dt arrow.DataType) Scalar { return &Int8{scalar: scalar{dt, false}} },
arrow.UINT16: func(dt arrow.DataType) Scalar { return &Uint16{scalar: scalar{dt, false}} },
arrow.INT16: func(dt arrow.DataType) Scalar { return &Int16{scalar: scalar{dt, false}} },
arrow.UINT32: func(dt arrow.DataType) Scalar { return &Uint32{scalar: scalar{dt, false}} },
arrow.INT32: func(dt arrow.DataType) Scalar { return &Int32{scalar: scalar{dt, false}} },
arrow.UINT64: func(dt arrow.DataType) Scalar { return &Uint64{scalar: scalar{dt, false}} },
arrow.INT64: func(dt arrow.DataType) Scalar { return &Int64{scalar: scalar{dt, false}} },
arrow.FLOAT16: func(dt arrow.DataType) Scalar { return &Float16{scalar: scalar{dt, false}} },
arrow.FLOAT32: func(dt arrow.DataType) Scalar { return &Float32{scalar: scalar{dt, false}} },
arrow.FLOAT64: func(dt arrow.DataType) Scalar { return &Float64{scalar: scalar{dt, false}} },
arrow.STRING: func(dt arrow.DataType) Scalar { return &String{&Binary{scalar: scalar{dt, false}}} },
arrow.BINARY: func(dt arrow.DataType) Scalar { return &Binary{scalar: scalar{dt, false}} },
arrow.FIXED_SIZE_BINARY: func(dt arrow.DataType) Scalar { return &FixedSizeBinary{&Binary{scalar: scalar{dt, false}}} },
arrow.DATE32: func(dt arrow.DataType) Scalar { return &Date32{scalar: scalar{dt, false}} },
arrow.DATE64: func(dt arrow.DataType) Scalar { return &Date64{scalar: scalar{dt, false}} },
arrow.TIMESTAMP: func(dt arrow.DataType) Scalar { return &Timestamp{scalar: scalar{dt, false}} },
arrow.TIME32: func(dt arrow.DataType) Scalar { return &Time32{scalar: scalar{dt, false}} },
arrow.TIME64: func(dt arrow.DataType) Scalar { return &Time64{scalar: scalar{dt, false}} },
arrow.INTERVAL: func(dt arrow.DataType) Scalar {
if arrow.TypeEqual(dt, arrow.FixedWidthTypes.MonthInterval) {
return &MonthInterval{scalar: scalar{dt, false}}
}
if arrow.TypeEqual(dt, arrow.FixedWidthTypes.MonthDayNanoInterval) {
return &MonthDayNanoInterval{scalar: scalar{dt, false}}
}
return &DayTimeInterval{scalar: scalar{dt, false}}
},
arrow.INTERVAL_MONTHS: func(dt arrow.DataType) Scalar { return &MonthInterval{scalar: scalar{dt, false}} },
arrow.INTERVAL_DAY_TIME: func(dt arrow.DataType) Scalar { return &DayTimeInterval{scalar: scalar{dt, false}} },
arrow.INTERVAL_MONTH_DAY_NANO: func(dt arrow.DataType) Scalar { return &MonthDayNanoInterval{scalar: scalar{dt, false}} },
arrow.DECIMAL128: func(dt arrow.DataType) Scalar { return &Decimal128{scalar: scalar{dt, false}} },
arrow.LIST: func(dt arrow.DataType) Scalar { return &List{scalar: scalar{dt, false}} },
arrow.STRUCT: func(dt arrow.DataType) Scalar { return &Struct{scalar: scalar{dt, false}} },
arrow.SPARSE_UNION: unsupportedScalarType,
arrow.DENSE_UNION: unsupportedScalarType,
arrow.DICTIONARY: unsupportedScalarType,
arrow.LARGE_STRING: unsupportedScalarType,
arrow.LARGE_BINARY: unsupportedScalarType,
arrow.LARGE_LIST: unsupportedScalarType,
arrow.DECIMAL256: unsupportedScalarType,
arrow.MAP: func(dt arrow.DataType) Scalar { return &Map{&List{scalar: scalar{dt, false}}} },
arrow.EXTENSION: func(dt arrow.DataType) Scalar { return &Extension{scalar: scalar{dt, false}} },
arrow.FIXED_SIZE_LIST: func(dt arrow.DataType) Scalar { return &FixedSizeList{&List{scalar: scalar{dt, false}}} },
arrow.DURATION: func(dt arrow.DataType) Scalar { return &Duration{scalar: scalar{dt, false}} },
// invalid data types to fill out array size 2^6 - 1
63: invalidScalarType,
}
f := numericMap[arrow.FLOAT16]
f.scalarFunc = reflect.ValueOf(NewFloat16ScalarFromFloat32)
f.valueType = reflect.TypeOf(float32(0))
numericMap[arrow.FLOAT16] = f
}
// GetScalar creates a scalar object from the value at a given index in the
// passed in array, returns an error if unable to do so.
func GetScalar(arr array.Interface, idx int) (Scalar, error) {
switch arr := arr.(type) {
case *array.Binary:
buf := memory.NewBufferBytes(arr.Value(idx))
defer buf.Release()
return NewBinaryScalar(buf, arr.DataType()), nil
case *array.Boolean:
return NewBooleanScalar(arr.Value(idx)), nil
case *array.Date32:
return NewDate32Scalar(arr.Value(idx)), nil
case *array.Date64:
return NewDate64Scalar(arr.Value(idx)), nil
case *array.DayTimeInterval:
return NewDayTimeIntervalScalar(arr.Value(idx)), nil
case *array.Decimal128:
return NewDecimal128Scalar(arr.Value(idx), arr.DataType()), nil
case *array.Duration:
return NewDurationScalar(arr.Value(idx), arr.DataType()), nil
case array.ExtensionArray:
storage, err := GetScalar(arr.Storage(), idx)
if err != nil {
return nil, err
}
return NewExtensionScalar(storage, arr.DataType()), nil
case *array.FixedSizeBinary:
buf := memory.NewBufferBytes(arr.Value(idx))
defer buf.Release()
return NewFixedSizeBinaryScalar(buf, arr.DataType()), nil
case *array.FixedSizeList:
size := int(arr.DataType().(*arrow.FixedSizeListType).Len())
slice := array.NewSlice(arr.ListValues(), int64(idx*size), int64((idx+1)*size))
defer slice.Release()
return NewFixedSizeListScalarWithType(slice, arr.DataType()), nil
case *array.Float16:
return NewFloat16Scalar(arr.Value(idx)), nil
case *array.Float32:
return NewFloat32Scalar(arr.Value(idx)), nil
case *array.Float64:
return NewFloat64Scalar(arr.Value(idx)), nil
case *array.Int8:
return NewInt8Scalar(arr.Value(idx)), nil
case *array.Int16:
return NewInt16Scalar(arr.Value(idx)), nil
case *array.Int32:
return NewInt32Scalar(arr.Value(idx)), nil
case *array.Int64:
return NewInt64Scalar(arr.Value(idx)), nil
case *array.Uint8:
return NewUint8Scalar(arr.Value(idx)), nil
case *array.Uint16:
return NewUint16Scalar(arr.Value(idx)), nil
case *array.Uint32:
return NewUint32Scalar(arr.Value(idx)), nil
case *array.Uint64:
return NewUint64Scalar(arr.Value(idx)), nil
case *array.List:
offsets := arr.Offsets()
slice := array.NewSlice(arr.ListValues(), int64(offsets[idx]), int64(offsets[idx+1]))
defer slice.Release()
return NewListScalar(slice), nil
case *array.Map:
offsets := arr.Offsets()
slice := array.NewSlice(arr.ListValues(), int64(offsets[idx]), int64(offsets[idx+1]))
defer slice.Release()
return NewMapScalar(slice), nil
case *array.MonthInterval:
return NewMonthIntervalScalar(arr.Value(idx)), nil
case *array.MonthDayNanoInterval:
return NewMonthDayNanoIntervalScalar(arr.Value(idx)), nil
case *array.Null:
return ScalarNull, nil
case *array.String:
return NewStringScalar(arr.Value(idx)), nil
case *array.Struct:
children := make(Vector, arr.NumField())
for i := range children {
child, err := GetScalar(arr.Field(i), idx)
if err != nil {
return nil, err
}
children[i] = child
}
return NewStructScalar(children, arr.DataType()), nil
case *array.Time32:
return NewTime32Scalar(arr.Value(idx), arr.DataType()), nil
case *array.Time64:
return NewTime64Scalar(arr.Value(idx), arr.DataType()), nil
case *array.Timestamp:
return NewTimestampScalar(arr.Value(idx), arr.DataType()), nil
}
return nil, xerrors.Errorf("cannot create scalar from array of type %s", arr.DataType())
}
// MakeArrayOfNull creates an array of size length which is all null of the given data type.
func MakeArrayOfNull(dt arrow.DataType, length int, mem memory.Allocator) array.Interface {
nullBuf := memory.NewResizableBuffer(mem)
nullBuf.Resize(int(bitutil.BytesForBits(int64(length))))
defer nullBuf.Release()
memory.Set(nullBuf.Bytes(), 0xFF)
data := array.NewData(dt, length, []*memory.Buffer{nullBuf, nil}, nil, length, 0)
defer data.Release()
return array.MakeFromData(data)
}
// MakeArrayFromScalar returns an array filled with the scalar value repeated length times.
// Not yet implemented for nested types such as Struct, List, extension and so on.
func MakeArrayFromScalar(sc Scalar, length int, mem memory.Allocator) (array.Interface, error) {
if !sc.IsValid() {
return MakeArrayOfNull(sc.DataType(), length, mem), nil
}
createOffsets := func(valLength int32) *memory.Buffer {
buffer := memory.NewResizableBuffer(mem)
buffer.Resize(arrow.Int32Traits.BytesRequired(length + 1))
out := arrow.Int32Traits.CastFromBytes(buffer.Bytes())
for i, offset := 0, int32(0); i < length+1; i, offset = i+1, offset+valLength {
out[i] = offset
}
return buffer
}
createBuffer := func(data []byte) *memory.Buffer {
buffer := memory.NewResizableBuffer(mem)
buffer.Resize(len(data) * length)
out := buffer.Bytes()
copy(out, data)
for j := len(data); j < len(out); j *= 2 {
copy(out[j:], out[:j])
}
return buffer
}
finishFixedWidth := func(data []byte) *array.Data {
buffer := createBuffer(data)
defer buffer.Release()
return array.NewData(sc.DataType(), length, []*memory.Buffer{nil, buffer}, nil, 0, 0)
}
switch s := sc.(type) {
case *Boolean:
data := memory.NewResizableBuffer(mem)
defer data.Release()
data.Resize(int(bitutil.BytesForBits(int64(length))))
c := byte(0x00)
if s.Value {
c = 0xFF
}
memory.Set(data.Bytes(), c)
defer data.Release()
return array.NewBoolean(length, data, nil, 0), nil
case BinaryScalar:
if s.DataType().ID() == arrow.FIXED_SIZE_BINARY {
data := finishFixedWidth(s.Data())
defer data.Release()
return array.MakeFromData(data), nil
}
valuesBuf := createBuffer(s.Data())
offsetsBuf := createOffsets(int32(len(s.Data())))
data := array.NewData(sc.DataType(), length, []*memory.Buffer{nil, offsetsBuf, valuesBuf}, nil, 0, 0)
defer func() {
valuesBuf.Release()
offsetsBuf.Release()
data.Release()
}()
return array.MakeFromData(data), nil
case PrimitiveScalar:
data := finishFixedWidth(s.Data())
defer data.Release()
return array.MakeFromData(data), nil
case *Decimal128:
data := finishFixedWidth(arrow.Decimal128Traits.CastToBytes([]decimal128.Num{s.Value}))
defer data.Release()
return array.MakeFromData(data), nil
case *List:
values := make([]array.Interface, length)
for i := range values {
values[i] = s.Value
}
valueArray, err := array.Concatenate(values, mem)
if err != nil {
return nil, err
}
defer valueArray.Release()
offsetsBuf := createOffsets(int32(s.Value.Len()))
defer offsetsBuf.Release()
data := array.NewData(s.DataType(), length, []*memory.Buffer{nil, offsetsBuf}, []*array.Data{valueArray.Data()}, 0, 0)
defer data.Release()
return array.MakeFromData(data), nil
case *FixedSizeList:
values := make([]array.Interface, length)
for i := range values {
values[i] = s.Value
}
valueArray, err := array.Concatenate(values, mem)
if err != nil {
return nil, err
}
defer valueArray.Release()
data := array.NewData(s.DataType(), length, []*memory.Buffer{nil}, []*array.Data{valueArray.Data()}, 0, 0)
defer data.Release()
return array.MakeFromData(data), nil
case *Struct:
fields := make([]*array.Data, 0)
for _, v := range s.Value {
arr, err := MakeArrayFromScalar(v, length, mem)
if err != nil {
return nil, err
}
defer arr.Release()
fields = append(fields, arr.Data())
}
data := array.NewData(s.DataType(), length, []*memory.Buffer{nil}, fields, 0, 0)
defer data.Release()
return array.NewStructData(data), nil
case *Map:
structArr := s.GetList().(*array.Struct)
keys := make([]array.Interface, length)
values := make([]array.Interface, length)
for i := 0; i < length; i++ {
keys[i] = structArr.Field(0)
values[i] = structArr.Field(1)
}
keyArr, err := array.Concatenate(keys, mem)
if err != nil {
return nil, err
}
defer keyArr.Release()
valueArr, err := array.Concatenate(values, mem)
if err != nil {
return nil, err
}
defer valueArr.Release()
offsetsBuf := createOffsets(int32(structArr.Len()))
outStructArr := array.NewData(structArr.DataType(), keyArr.Len(), []*memory.Buffer{nil}, []*array.Data{keyArr.Data(), valueArr.Data()}, 0, 0)
data := array.NewData(s.DataType(), length, []*memory.Buffer{nil, offsetsBuf}, []*array.Data{outStructArr}, 0, 0)
defer func() {
offsetsBuf.Release()
outStructArr.Release()
data.Release()
}()
return array.MakeFromData(data), nil
default:
return nil, xerrors.Errorf("array from scalar not yet implemented for type %s", sc.DataType())
}
}
func Hash(seed maphash.Seed, s Scalar) uint64 {
var h maphash.Hash
h.SetSeed(seed)
binary.Write(&h, endian.Native, arrow.HashType(seed, s.DataType()))
out := h.Sum64()
if !s.IsValid() {
return out
}
hash := func() {
out ^= h.Sum64()
h.Reset()
}
valueHash := func(v interface{}) uint64 {
switch v := v.(type) {
case int32:
h.Write((*[4]byte)(unsafe.Pointer(&v))[:])
case int64:
h.Write((*[8]byte)(unsafe.Pointer(&v))[:])
case arrow.Date32:
binary.Write(&h, endian.Native, uint32(v))
case arrow.Time32:
binary.Write(&h, endian.Native, uint32(v))
case arrow.MonthInterval:
binary.Write(&h, endian.Native, uint32(v))
case arrow.Duration:
binary.Write(&h, endian.Native, uint64(v))
case arrow.Date64:
binary.Write(&h, endian.Native, uint64(v))
case arrow.Time64:
binary.Write(&h, endian.Native, uint64(v))
case arrow.Timestamp:
binary.Write(&h, endian.Native, uint64(v))
case float16.Num:
binary.Write(&h, endian.Native, v.Uint16())
case decimal128.Num:
binary.Write(&h, endian.Native, v.LowBits())
hash()
binary.Write(&h, endian.Native, uint64(v.HighBits()))
}
hash()
return out
}
h.Reset()
switch s := s.(type) {
case *Null:
case *Extension:
out ^= Hash(seed, s.Value)
case *DayTimeInterval:
return valueHash(s.Value.Days) & valueHash(s.Value.Milliseconds)
case *MonthDayNanoInterval:
return valueHash(s.Value.Months) & valueHash(s.Value.Days) & valueHash(s.Value.Nanoseconds)
case PrimitiveScalar:
h.Write(s.Data())
hash()
case TemporalScalar:
return valueHash(s.value())
case ListScalar:
array.Hash(&h, s.GetList().Data())
hash()
case *Struct:
for _, c := range s.Value {
if c.IsValid() {
out ^= Hash(seed, c)
}
}
}
return out
}