| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package arrow |
| |
| import ( |
| "sync/atomic" |
| |
| "github.com/apache/arrow/go/v10/arrow/internal/debug" |
| ) |
| |
| // Table represents a logical sequence of chunked arrays of equal length. It is |
| // similar to a Record except that the columns are ChunkedArrays instead, |
| // allowing for a Table to be built up by chunks progressively whereas the columns |
| // in a single Record are always each a single contiguous array. |
| type Table interface { |
| Schema() *Schema |
| NumRows() int64 |
| NumCols() int64 |
| Column(i int) *Column |
| |
| Retain() |
| Release() |
| } |
| |
| // Column is an immutable column data structure consisting of |
| // a field (type metadata) and a chunked data array. |
| // |
| // To get strongly typed data from a Column, you need to iterate the |
| // chunks and type assert each individual Array. For example: |
| // |
| // switch column.DataType().ID { |
| // case arrow.INT32: |
| // for _, c := range column.Data().Chunks() { |
| // arr := c.(*array.Int32) |
| // // do something with arr |
| // } |
| // case arrow.INT64: |
| // for _, c := range column.Data().Chunks() { |
| // arr := c.(*array.Int64) |
| // // do something with arr |
| // } |
| // case ... |
| // } |
| // |
| type Column struct { |
| field Field |
| data *Chunked |
| } |
| |
| // NewColumnFromArr is a convenience function to create a column from |
| // a field and a non-chunked array. |
| // |
| // This provides a simple mechanism for bypassing the middle step of |
| // constructing a Chunked array of one and then releasing it because |
| // of the ref counting. |
| func NewColumnFromArr(field Field, arr Array) Column { |
| if !TypeEqual(field.Type, arr.DataType()) { |
| panic("arrow/array: inconsistent data type") |
| } |
| |
| arr.Retain() |
| return Column{ |
| field: field, |
| data: &Chunked{ |
| refCount: 1, |
| chunks: []Array{arr}, |
| length: arr.Len(), |
| nulls: arr.NullN(), |
| dtype: field.Type, |
| }, |
| } |
| } |
| |
| // NewColumn returns a column from a field and a chunked data array. |
| // |
| // NewColumn panics if the field's data type is inconsistent with the data type |
| // of the chunked data array. |
| func NewColumn(field Field, chunks *Chunked) *Column { |
| col := Column{ |
| field: field, |
| data: chunks, |
| } |
| col.data.Retain() |
| |
| if !TypeEqual(col.data.DataType(), col.field.Type) { |
| col.data.Release() |
| panic("arrow/array: inconsistent data type") |
| } |
| |
| return &col |
| } |
| |
| // Retain increases the reference count by 1. |
| // Retain may be called simultaneously from multiple goroutines. |
| func (col *Column) Retain() { |
| col.data.Retain() |
| } |
| |
| // Release decreases the reference count by 1. |
| // When the reference count goes to zero, the memory is freed. |
| // Release may be called simultaneously from multiple goroutines. |
| func (col *Column) Release() { |
| col.data.Release() |
| } |
| |
| func (col *Column) Len() int { return col.data.Len() } |
| func (col *Column) NullN() int { return col.data.NullN() } |
| func (col *Column) Data() *Chunked { return col.data } |
| func (col *Column) Field() Field { return col.field } |
| func (col *Column) Name() string { return col.field.Name } |
| func (col *Column) DataType() DataType { return col.field.Type } |
| |
| // Chunked manages a collection of primitives arrays as one logical large array. |
| type Chunked struct { |
| refCount int64 // refCount must be first in the struct for 64 bit alignment and sync/atomic (https://github.com/golang/go/issues/37262) |
| |
| chunks []Array |
| |
| length int |
| nulls int |
| dtype DataType |
| } |
| |
| // NewChunked returns a new chunked array from the slice of arrays. |
| // |
| // NewChunked panics if the chunks do not have the same data type. |
| func NewChunked(dtype DataType, chunks []Array) *Chunked { |
| arr := &Chunked{ |
| chunks: make([]Array, len(chunks)), |
| refCount: 1, |
| dtype: dtype, |
| } |
| for i, chunk := range chunks { |
| if !TypeEqual(chunk.DataType(), dtype) { |
| panic("arrow/array: mismatch data type") |
| } |
| chunk.Retain() |
| arr.chunks[i] = chunk |
| arr.length += chunk.Len() |
| arr.nulls += chunk.NullN() |
| } |
| return arr |
| } |
| |
| // Retain increases the reference count by 1. |
| // Retain may be called simultaneously from multiple goroutines. |
| func (a *Chunked) Retain() { |
| atomic.AddInt64(&a.refCount, 1) |
| } |
| |
| // Release decreases the reference count by 1. |
| // When the reference count goes to zero, the memory is freed. |
| // Release may be called simultaneously from multiple goroutines. |
| func (a *Chunked) Release() { |
| debug.Assert(atomic.LoadInt64(&a.refCount) > 0, "too many releases") |
| |
| if atomic.AddInt64(&a.refCount, -1) == 0 { |
| for _, arr := range a.chunks { |
| arr.Release() |
| } |
| a.chunks = nil |
| a.length = 0 |
| a.nulls = 0 |
| } |
| } |
| |
| func (a *Chunked) Len() int { return a.length } |
| func (a *Chunked) NullN() int { return a.nulls } |
| func (a *Chunked) DataType() DataType { return a.dtype } |
| func (a *Chunked) Chunks() []Array { return a.chunks } |
| func (a *Chunked) Chunk(i int) Array { return a.chunks[i] } |