blob: 0060172b74c27589c84f3202411a52c77e380a9c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package compute
import (
//go:generate go run -type=ValueShape -linecomment
//go:generate go run -type=DatumKind -linecomment
// ValueShape is a brief description of the shape of a value (array, scalar or otherwise)
type ValueShape int8
const (
// either Array or Scalar
ShapeAny ValueShape = iota // any
ShapeArray // array
ShapeScalar // scalar
// ValueDescr is a descriptor type giving both the shape and the datatype of a value
// but without the data.
type ValueDescr struct {
Shape ValueShape
Type arrow.DataType
func (v *ValueDescr) String() string {
return fmt.Sprintf("%s [%s]", v.Shape, v.Type)
// DatumKind is an enum used for denoting which kind of type a datum is encapsulating
type DatumKind int
const (
KindNone DatumKind = iota // none
KindScalar // scalar
KindArray // array
KindChunked // chunked_array
KindRecord // record_batch
KindTable // table
KindCollection // collection
const UnknownLength int64 = -1
// Datum is a variant interface for wrapping the various Arrow data structures
// for now the various Datum types just hold a Value which is the type they
// are wrapping, but it might make sense in the future for those types
// to actually be aliases or embed their types instead. Not sure yet.
type Datum interface {
Kind() DatumKind
Len() int64
Equals(Datum) bool
// ArrayLikeDatum is an interface for treating a Datum similarly to an Array,
// so that it is easy to differentiate between Record/Table/Collection and Scalar,
// Array/ChunkedArray for ease of use. Chunks will return an empty slice for Scalar,
// a slice with 1 element for Array, and the slice of chunks for a chunked array.
type ArrayLikeDatum interface {
Shape() ValueShape
Descr() ValueDescr
NullN() int64
Type() arrow.DataType
Chunks() []arrow.Array
// TableLikeDatum is an interface type for specifying either a RecordBatch or a
// Table as both contain a schema as opposed to just a single data type.
type TableLikeDatum interface {
Schema() *arrow.Schema
// EmptyDatum is the null case, a Datum with nothing in it.
type EmptyDatum struct{}
func (EmptyDatum) String() string { return "nullptr" }
func (EmptyDatum) Kind() DatumKind { return KindNone }
func (EmptyDatum) Len() int64 { return UnknownLength }
func (EmptyDatum) Release() {}
func (EmptyDatum) Equals(other Datum) bool {
_, ok := other.(EmptyDatum)
return ok
// ScalarDatum contains a scalar value
type ScalarDatum struct {
Value scalar.Scalar
func (ScalarDatum) Kind() DatumKind { return KindScalar }
func (ScalarDatum) Shape() ValueShape { return ShapeScalar }
func (ScalarDatum) Len() int64 { return 1 }
func (ScalarDatum) Chunks() []arrow.Array { return nil }
func (d *ScalarDatum) Type() arrow.DataType { return d.Value.DataType() }
func (d *ScalarDatum) String() string { return d.Value.String() }
func (d *ScalarDatum) Descr() ValueDescr { return ValueDescr{ShapeScalar, d.Value.DataType()} }
func (d *ScalarDatum) ToScalar() (scalar.Scalar, error) {
return d.Value, nil
func (d *ScalarDatum) NullN() int64 {
if d.Value.IsValid() {
return 0
return 1
type releasable interface {
func (d *ScalarDatum) Release() {
if v, ok := d.Value.(releasable); ok {
func (d *ScalarDatum) Equals(other Datum) bool {
if rhs, ok := other.(*ScalarDatum); ok {
return scalar.Equals(d.Value, rhs.Value)
return false
// ArrayDatum references an array.Data object which can be used to create
// array instances from if needed.
type ArrayDatum struct {
Value arrow.ArrayData
func (ArrayDatum) Kind() DatumKind { return KindArray }
func (ArrayDatum) Shape() ValueShape { return ShapeArray }
func (d *ArrayDatum) Type() arrow.DataType { return d.Value.DataType() }
func (d *ArrayDatum) Len() int64 { return int64(d.Value.Len()) }
func (d *ArrayDatum) NullN() int64 { return int64(d.Value.NullN()) }
func (d *ArrayDatum) Descr() ValueDescr { return ValueDescr{ShapeArray, d.Value.DataType()} }
func (d *ArrayDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) }
func (d *ArrayDatum) MakeArray() arrow.Array { return array.MakeFromData(d.Value) }
func (d *ArrayDatum) Chunks() []arrow.Array { return []arrow.Array{d.MakeArray()} }
func (d *ArrayDatum) ToScalar() (scalar.Scalar, error) {
return scalar.NewListScalarData(d.Value), nil
func (d *ArrayDatum) Release() {
d.Value = nil
func (d *ArrayDatum) Equals(other Datum) bool {
rhs, ok := other.(*ArrayDatum)
if !ok {
return false
left := d.MakeArray()
defer left.Release()
right := rhs.MakeArray()
defer right.Release()
return array.Equal(left, right)
// ChunkedDatum contains a chunked array for use with expressions and compute.
type ChunkedDatum struct {
Value *arrow.Chunked
func (ChunkedDatum) Kind() DatumKind { return KindChunked }
func (ChunkedDatum) Shape() ValueShape { return ShapeArray }
func (d *ChunkedDatum) Type() arrow.DataType { return d.Value.DataType() }
func (d *ChunkedDatum) Len() int64 { return int64(d.Value.Len()) }
func (d *ChunkedDatum) NullN() int64 { return int64(d.Value.NullN()) }
func (d *ChunkedDatum) Descr() ValueDescr { return ValueDescr{ShapeArray, d.Value.DataType()} }
func (d *ChunkedDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) }
func (d *ChunkedDatum) Chunks() []arrow.Array { return d.Value.Chunks() }
func (d *ChunkedDatum) Release() {
d.Value = nil
func (d *ChunkedDatum) Equals(other Datum) bool {
if rhs, ok := other.(*ChunkedDatum); ok {
return array.ChunkedEqual(d.Value, rhs.Value)
return false
// RecordDatum contains an array.Record for passing a full record to an expression
// or to compute.
type RecordDatum struct {
Value arrow.Record
func (RecordDatum) Kind() DatumKind { return KindRecord }
func (RecordDatum) String() string { return "RecordBatch" }
func (r *RecordDatum) Len() int64 { return r.Value.NumRows() }
func (r *RecordDatum) Schema() *arrow.Schema { return r.Value.Schema() }
func (r *RecordDatum) Release() {
r.Value = nil
func (r *RecordDatum) Equals(other Datum) bool {
if rhs, ok := other.(*RecordDatum); ok {
return array.RecordEqual(r.Value, rhs.Value)
return false
// TableDatum contains a table so that multiple record batches can be worked with
// together as a single table for being passed to compute and expression handling.
type TableDatum struct {
Value arrow.Table
func (TableDatum) Kind() DatumKind { return KindTable }
func (TableDatum) String() string { return "Table" }
func (d *TableDatum) Len() int64 { return d.Value.NumRows() }
func (d *TableDatum) Schema() *arrow.Schema { return d.Value.Schema() }
func (d *TableDatum) Release() {
d.Value = nil
func (d *TableDatum) Equals(other Datum) bool {
if rhs, ok := other.(*TableDatum); ok {
return array.TableEqual(d.Value, rhs.Value)
return false
// CollectionDatum is a slice of Datums
type CollectionDatum []Datum
func (CollectionDatum) Kind() DatumKind { return KindCollection }
func (c CollectionDatum) Len() int64 { return int64(len(c)) }
func (c CollectionDatum) String() string {
var b strings.Builder
for i, d := range c {
if i > 0 {
b.WriteString(", ")
return b.String()
func (c CollectionDatum) Release() {
for _, v := range c {
func (c CollectionDatum) Equals(other Datum) bool {
rhs, ok := other.(CollectionDatum)
if !ok {
return false
if len(c) != len(rhs) {
return false
for i := range c {
if !c[i].Equals(rhs[i]) {
return false
return true
// NewDatum will construct the appropriate Datum type based on what is passed in
// as the argument.
// An arrow.Array gets an ArrayDatum
// An array.Chunked gets a ChunkedDatum
// An array.Record gets a RecordDatum
// an array.Table gets a TableDatum
// a []Datum gets a CollectionDatum
// a scalar.Scalar gets a ScalarDatum
// Anything else is passed to scalar.MakeScalar and recieves a scalar
// datum of that appropriate type.
func NewDatum(value interface{}) Datum {
switch v := value.(type) {
case Datum:
return v
case arrow.Array:
return &ArrayDatum{v.Data().(*array.Data)}
case *arrow.Chunked:
return &ChunkedDatum{v}
case arrow.Record:
return &RecordDatum{v}
case arrow.Table:
return &TableDatum{v}
case []Datum:
return CollectionDatum(v)
case scalar.Scalar:
return &ScalarDatum{v}
return &ScalarDatum{scalar.MakeScalar(value)}
var (
_ ArrayLikeDatum = (*ScalarDatum)(nil)
_ ArrayLikeDatum = (*ArrayDatum)(nil)
_ ArrayLikeDatum = (*ChunkedDatum)(nil)
_ TableLikeDatum = (*RecordDatum)(nil)
_ TableLikeDatum = (*TableDatum)(nil)
_ Datum = (CollectionDatum)(nil)