blob: c4a6351cce269b9827d8e485bad20beda5530da0 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package arrow
import (
"sync/atomic"
"github.com/apache/arrow/go/v10/arrow/internal/debug"
)
// Table represents a logical sequence of chunked arrays of equal length. It is
// similar to a Record except that the columns are ChunkedArrays instead,
// allowing for a Table to be built up by chunks progressively whereas the columns
// in a single Record are always each a single contiguous array.
type Table interface {
Schema() *Schema
NumRows() int64
NumCols() int64
Column(i int) *Column
Retain()
Release()
}
// Column is an immutable column data structure consisting of
// a field (type metadata) and a chunked data array.
//
// To get strongly typed data from a Column, you need to iterate the
// chunks and type assert each individual Array. For example:
//
// switch column.DataType().ID {
// case arrow.INT32:
// for _, c := range column.Data().Chunks() {
// arr := c.(*array.Int32)
// // do something with arr
// }
// case arrow.INT64:
// for _, c := range column.Data().Chunks() {
// arr := c.(*array.Int64)
// // do something with arr
// }
// case ...
// }
//
type Column struct {
field Field
data *Chunked
}
// NewColumnFromArr is a convenience function to create a column from
// a field and a non-chunked array.
//
// This provides a simple mechanism for bypassing the middle step of
// constructing a Chunked array of one and then releasing it because
// of the ref counting.
func NewColumnFromArr(field Field, arr Array) Column {
if !TypeEqual(field.Type, arr.DataType()) {
panic("arrow/array: inconsistent data type")
}
arr.Retain()
return Column{
field: field,
data: &Chunked{
refCount: 1,
chunks: []Array{arr},
length: arr.Len(),
nulls: arr.NullN(),
dtype: field.Type,
},
}
}
// NewColumn returns a column from a field and a chunked data array.
//
// NewColumn panics if the field's data type is inconsistent with the data type
// of the chunked data array.
func NewColumn(field Field, chunks *Chunked) *Column {
col := Column{
field: field,
data: chunks,
}
col.data.Retain()
if !TypeEqual(col.data.DataType(), col.field.Type) {
col.data.Release()
panic("arrow/array: inconsistent data type")
}
return &col
}
// Retain increases the reference count by 1.
// Retain may be called simultaneously from multiple goroutines.
func (col *Column) Retain() {
col.data.Retain()
}
// Release decreases the reference count by 1.
// When the reference count goes to zero, the memory is freed.
// Release may be called simultaneously from multiple goroutines.
func (col *Column) Release() {
col.data.Release()
}
func (col *Column) Len() int { return col.data.Len() }
func (col *Column) NullN() int { return col.data.NullN() }
func (col *Column) Data() *Chunked { return col.data }
func (col *Column) Field() Field { return col.field }
func (col *Column) Name() string { return col.field.Name }
func (col *Column) DataType() DataType { return col.field.Type }
// Chunked manages a collection of primitives arrays as one logical large array.
type Chunked struct {
refCount int64 // refCount must be first in the struct for 64 bit alignment and sync/atomic (https://github.com/golang/go/issues/37262)
chunks []Array
length int
nulls int
dtype DataType
}
// NewChunked returns a new chunked array from the slice of arrays.
//
// NewChunked panics if the chunks do not have the same data type.
func NewChunked(dtype DataType, chunks []Array) *Chunked {
arr := &Chunked{
chunks: make([]Array, len(chunks)),
refCount: 1,
dtype: dtype,
}
for i, chunk := range chunks {
if !TypeEqual(chunk.DataType(), dtype) {
panic("arrow/array: mismatch data type")
}
chunk.Retain()
arr.chunks[i] = chunk
arr.length += chunk.Len()
arr.nulls += chunk.NullN()
}
return arr
}
// Retain increases the reference count by 1.
// Retain may be called simultaneously from multiple goroutines.
func (a *Chunked) Retain() {
atomic.AddInt64(&a.refCount, 1)
}
// Release decreases the reference count by 1.
// When the reference count goes to zero, the memory is freed.
// Release may be called simultaneously from multiple goroutines.
func (a *Chunked) Release() {
debug.Assert(atomic.LoadInt64(&a.refCount) > 0, "too many releases")
if atomic.AddInt64(&a.refCount, -1) == 0 {
for _, arr := range a.chunks {
arr.Release()
}
a.chunks = nil
a.length = 0
a.nulls = 0
}
}
func (a *Chunked) Len() int { return a.length }
func (a *Chunked) NullN() int { return a.nulls }
func (a *Chunked) DataType() DataType { return a.dtype }
func (a *Chunked) Chunks() []Array { return a.chunks }
func (a *Chunked) Chunk(i int) Array { return a.chunks[i] }