blob: 6cfec04d69052196cb1eb08e8958388d54b96ee8 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package array
import (
"reflect"
"github.com/apache/arrow/go/v6/arrow"
"github.com/apache/arrow/go/v6/arrow/memory"
"golang.org/x/xerrors"
)
// ExtensionArray is the interface that needs to be implemented to handle
// user-defined extension type arrays. In order to ensure consistency and
// proper behavior, all ExtensionArray types must embed ExtensionArrayBase
// in order to meet the interface which provides the default implementation
// and handling for the array while allowing custom behavior to be built
// on top of it.
type ExtensionArray interface {
Interface
// ExtensionType returns the datatype as per calling DataType(), but
// already cast to ExtensionType
ExtensionType() arrow.ExtensionType
// Storage returns the underlying storage array for this array.
Storage() Interface
// by having a non-exported function in the interface, it means that
// consumers must embed ExtensionArrayBase in their structs in order
// to fulfill this interface.
mustEmbedExtensionArrayBase()
}
// two extension arrays are equal if their data types are equal and
// their underlying storage arrays are equal.
func arrayEqualExtension(l, r ExtensionArray) bool {
if !arrow.TypeEqual(l.DataType(), r.DataType()) {
return false
}
return ArrayEqual(l.Storage(), r.Storage())
}
// two extension arrays are approximately equal if their data types are
// equal and their underlying storage arrays are approximately equal.
func arrayApproxEqualExtension(l, r ExtensionArray, opt equalOption) bool {
if !arrow.TypeEqual(l.DataType(), r.DataType()) {
return false
}
return arrayApproxEqual(l.Storage(), r.Storage(), opt)
}
// NewExtensionArrayWithStorage constructs a new ExtensionArray from the provided
// ExtensionType and uses the provided storage interface as the underlying storage.
// This will not release the storage array passed in so consumers should call Release
// on it manually while the new Extension array will share references to the underlying
// Data buffers.
func NewExtensionArrayWithStorage(dt arrow.ExtensionType, storage Interface) Interface {
if !arrow.TypeEqual(dt.StorageType(), storage.DataType()) {
panic(xerrors.Errorf("arrow/array: storage type %s for extension type %s, does not match expected type %s", storage.DataType(), dt.ExtensionName(), dt.StorageType()))
}
base := ExtensionArrayBase{}
base.refCount = 1
base.storage = storage
storage.Retain()
storageData := storage.Data()
// create a new data instance with the ExtensionType as the datatype but referencing the
// same underlying buffers to share them with the storage array.
baseData := NewData(dt, storageData.length, storageData.buffers, storageData.childData, storageData.nulls, storageData.offset)
defer baseData.Release()
base.array.setData(baseData)
// use the ExtensionType's ArrayType to construct the correctly typed object
// to use as the ExtensionArray interface. reflect.New returns a pointer to
// the newly created object.
arr := reflect.New(base.ExtensionType().ArrayType())
// set the embedded ExtensionArrayBase to the value we created above. We know
// that this field will exist because the interface requires embedding ExtensionArrayBase
// so we don't have to separately check, this will panic if called on an ArrayType
// that doesn't embed ExtensionArrayBase which is what we want.
arr.Elem().FieldByName("ExtensionArrayBase").Set(reflect.ValueOf(base))
return arr.Interface().(ExtensionArray)
}
// NewExtensionData expects a data with a datatype of arrow.ExtensionType and
// underlying data built for the storage array.
func NewExtensionData(data *Data) ExtensionArray {
base := ExtensionArrayBase{}
base.refCount = 1
base.setData(data)
// use the ExtensionType's ArrayType to construct the correctly typed object
// to use as the ExtensionArray interface. reflect.New returns a pointer to
// the newly created object.
arr := reflect.New(base.ExtensionType().ArrayType())
// set the embedded ExtensionArrayBase to the value we created above. We know
// that this field will exist because the interface requires embedding ExtensionArrayBase
// so we don't have to separately check, this will panic if called on an ArrayType
// that doesn't embed ExtensionArrayBase which is what we want.
arr.Elem().FieldByName("ExtensionArrayBase").Set(reflect.ValueOf(base))
return arr.Interface().(ExtensionArray)
}
// ExtensionArrayBase is the base struct for user-defined Extension Array types
// and must be embedded in any user-defined extension arrays like so:
//
// type UserDefinedArray struct {
// array.ExtensionArrayBase
// }
//
type ExtensionArrayBase struct {
array
storage Interface
}
// Retain increases the reference count by 1.
// Retain may be called simultaneously from multiple goroutines.
func (e *ExtensionArrayBase) Retain() {
e.array.Retain()
e.storage.Retain()
}
// Release decreases the reference count by 1.
// Release may be called simultaneously from multiple goroutines.
// When the reference count goes to zero, the memory is freed.
func (e *ExtensionArrayBase) Release() {
e.array.Release()
e.storage.Release()
}
// Storage returns the underlying storage array
func (e *ExtensionArrayBase) Storage() Interface { return e.storage }
// ExtensionType returns the same thing as DataType, just already casted
// to an ExtensionType interface for convenience.
func (e *ExtensionArrayBase) ExtensionType() arrow.ExtensionType {
return e.DataType().(arrow.ExtensionType)
}
func (e *ExtensionArrayBase) setData(data *Data) {
if data.DataType().ID() != arrow.EXTENSION {
panic("arrow/array: must use extension type to construct an extension array")
}
extType, ok := data.dtype.(arrow.ExtensionType)
if !ok {
panic("arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType")
}
e.array.setData(data)
// our underlying storage needs to reference the same data buffers (no copying)
// but should have the storage type's datatype, so we create a Data for it.
storageData := NewData(extType.StorageType(), data.length, data.buffers, data.childData, data.nulls, data.offset)
defer storageData.Release()
e.storage = MakeFromData(storageData)
}
// no-op function that exists simply to force embedding this in any extension array types.
func (ExtensionArrayBase) mustEmbedExtensionArrayBase() {}
// ExtensionBuilder is a convenience builder so that NewBuilder and such will still work
// with extension types properly. Depending on preference it may be cleaner or easier to just use
// NewExtensionArrayWithStorage and pass a storage array.
//
// That said, this allows easily building an extension array by providing the extension
// type and retrieving the storage builder.
type ExtensionBuilder struct {
Builder
dt arrow.ExtensionType
}
// NewExtensionBuilder returns a builder using the provided memory allocator for the desired
// extension type. It will internally construct a builder of the storage type for the extension
// type and keep a copy of the extension type. The underlying type builder can then be retrieved
// by calling `StorageBuilder` on this and then type asserting it to the desired builder type.
//
// After using the storage builder, calling NewArray or NewExtensionArray will construct
// the appropriate extension array type and set the storage correctly, resetting the builder for
// reuse.
//
// Example
//
// Simple example assuming an extension type of a UUID defined as a FixedSizeBinary(16) was registered
// using the type name "uuid":
//
// uuidType := arrow.GetExtensionType("uuid")
// bldr := array.NewExtensionBuilder(memory.DefaultAllocator, uuidType)
// defer bldr.Release()
// uuidBldr := bldr.StorageBuilder().(*array.FixedSizeBinaryBuilder)
// /* build up the fixed size binary array as usual via Append/AppendValues */
// uuidArr := bldr.NewExtensionArray()
// defer uuidArr.Release()
//
// Because the storage builder is embedded in the Extension builder it also means
// that any of the functions available on the Builder interface can be called on
// an instance of ExtensionBuilder and will respond appropriately as the storage
// builder would for generically grabbing the Lenth, Cap, Nulls, reserving, etc.
func NewExtensionBuilder(mem memory.Allocator, dt arrow.ExtensionType) *ExtensionBuilder {
return &ExtensionBuilder{Builder: NewBuilder(mem, dt.StorageType()), dt: dt}
}
// StorageBuilder returns the builder for the underlying storage type.
func (b *ExtensionBuilder) StorageBuilder() Builder { return b.Builder }
// NewArray creates a new array from the memory buffers used by the builder
// and resets the builder so it can be used to build a new array.
func (b *ExtensionBuilder) NewArray() Interface {
return b.NewExtensionArray()
}
// NewExtensionArray creates an Extension array from the memory buffers used
// by the builder and resets the ExtensionBuilder so it can be used to build
// a new ExtensionArray of the same type.
func (b *ExtensionBuilder) NewExtensionArray() ExtensionArray {
storage := b.Builder.NewArray()
defer storage.Release()
data := NewData(b.dt, storage.Len(), storage.Data().buffers, storage.Data().childData, storage.Data().nulls, 0)
defer data.Release()
return NewExtensionData(data)
}