blob: 580c57a371acabb0e9e750b07f5123a19494ee44 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package array_test
import (
"fmt"
"math"
"reflect"
"strings"
"testing"
"github.com/apache/arrow/go/v10/arrow"
"github.com/apache/arrow/go/v10/arrow/array"
"github.com/apache/arrow/go/v10/arrow/bitutil"
"github.com/apache/arrow/go/v10/arrow/decimal128"
"github.com/apache/arrow/go/v10/arrow/internal/testing/types"
"github.com/apache/arrow/go/v10/arrow/memory"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
)
type PrimitiveDictionaryTestSuite struct {
suite.Suite
mem *memory.CheckedAllocator
typ arrow.DataType
reftyp reflect.Type
}
func (p *PrimitiveDictionaryTestSuite) SetupTest() {
p.mem = memory.NewCheckedAllocator(memory.DefaultAllocator)
}
func (p *PrimitiveDictionaryTestSuite) TearDownTest() {
p.mem.AssertSize(p.T(), 0)
}
func TestPrimitiveDictionaryBuilders(t *testing.T) {
tests := []struct {
name string
typ arrow.DataType
reftyp reflect.Type
}{
{"int8", arrow.PrimitiveTypes.Int8, reflect.TypeOf(int8(0))},
{"uint8", arrow.PrimitiveTypes.Uint8, reflect.TypeOf(uint8(0))},
{"int16", arrow.PrimitiveTypes.Int16, reflect.TypeOf(int16(0))},
{"uint16", arrow.PrimitiveTypes.Uint16, reflect.TypeOf(uint16(0))},
{"int32", arrow.PrimitiveTypes.Int32, reflect.TypeOf(int32(0))},
{"uint32", arrow.PrimitiveTypes.Uint32, reflect.TypeOf(uint32(0))},
{"int64", arrow.PrimitiveTypes.Int64, reflect.TypeOf(int64(0))},
{"uint64", arrow.PrimitiveTypes.Uint64, reflect.TypeOf(uint64(0))},
{"float32", arrow.PrimitiveTypes.Float32, reflect.TypeOf(float32(0))},
{"float64", arrow.PrimitiveTypes.Float64, reflect.TypeOf(float64(0))},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
suite.Run(t, &PrimitiveDictionaryTestSuite{typ: tt.typ, reftyp: tt.reftyp})
})
}
}
func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderBasic() {
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ}
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
defer bldr.Release()
builder := reflect.ValueOf(bldr)
appfn := builder.MethodByName("Append")
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
bldr.AppendNull()
p.EqualValues(4, bldr.Len())
p.EqualValues(1, bldr.NullN())
arr := bldr.NewArray().(*array.Dictionary)
defer arr.Release()
p.True(arrow.TypeEqual(expectedType, arr.DataType()))
expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]"))
p.NoError(err)
defer expectedDict.Release()
expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0, null]"))
p.NoError(err)
defer expectedIndices.Release()
expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict)
defer expected.Release()
p.True(array.ArrayEqual(expected, arr))
}
func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderInit() {
valueType := p.typ
dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]"))
p.NoError(err)
defer dictArr.Release()
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType}
bldr := array.NewDictionaryBuilderWithDict(p.mem, dictType, dictArr)
defer bldr.Release()
builder := reflect.ValueOf(bldr)
appfn := builder.MethodByName("Append")
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
bldr.AppendNull()
p.EqualValues(4, bldr.Len())
p.EqualValues(1, bldr.NullN())
arr := bldr.NewDictionaryArray()
defer arr.Release()
expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]"))
p.NoError(err)
defer expectedIndices.Release()
expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr)
defer expected.Release()
p.True(array.ArrayEqual(expected, arr))
}
func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() {
valueType := p.typ
dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]"))
p.NoError(err)
defer dictArr.Release()
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType}
bldr := array.NewBuilder(p.mem, dictType)
defer bldr.Release()
builder := reflect.ValueOf(bldr)
appfn := builder.MethodByName("Append")
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
bldr.AppendNull()
p.EqualValues(4, bldr.Len())
p.EqualValues(1, bldr.NullN())
arr := bldr.NewArray().(*array.Dictionary)
defer arr.Release()
expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]"))
p.NoError(err)
defer expectedIndices.Release()
expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr)
defer expected.Release()
p.True(array.ArrayEqual(expected, arr))
}
func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderAppendArr() {
valueType := p.typ
intermediate, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2, 1]"))
p.NoError(err)
defer intermediate.Release()
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ}
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
defer bldr.Release()
bldr.AppendArray(intermediate)
result := bldr.NewArray()
defer result.Release()
expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]"))
p.NoError(err)
defer expectedDict.Release()
expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0]"))
p.NoError(err)
defer expectedIndices.Release()
expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict)
defer expected.Release()
p.True(array.ArrayEqual(expected, result))
}
func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDeltaDictionary() {
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ}
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
defer bldr.Release()
builder := reflect.ValueOf(bldr)
appfn := builder.MethodByName("Append")
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
result := bldr.NewArray()
defer result.Release()
exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]"))
p.NoError(err)
defer exdict.Release()
exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]"))
p.NoError(err)
defer exindices.Release()
expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict)
defer expected.Release()
p.True(array.ArrayEqual(expected, result))
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
indices, delta, err := bldr.NewDelta()
p.NoError(err)
defer indices.Release()
defer delta.Release()
exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]"))
defer exindices.Release()
exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]"))
defer exdelta.Release()
p.True(array.ArrayEqual(exindices, indices))
p.True(array.ArrayEqual(exdelta, delta))
}
func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDoubleDeltaDictionary() {
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ}
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
defer bldr.Release()
builder := reflect.ValueOf(bldr)
appfn := builder.MethodByName("Append")
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
result := bldr.NewArray()
defer result.Release()
exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]"))
p.NoError(err)
defer exdict.Release()
exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]"))
p.NoError(err)
defer exindices.Release()
expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict)
defer expected.Release()
p.True(array.ArrayEqual(expected, result))
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
indices, delta, err := bldr.NewDelta()
p.NoError(err)
defer indices.Release()
defer delta.Release()
exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]"))
defer exindices.Release()
exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]"))
defer exdelta.Release()
p.True(array.ArrayEqual(exindices, indices))
p.True(array.ArrayEqual(exdelta, delta))
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(5).Convert(p.reftyp)})[0].Interface())
indices, delta, err = bldr.NewDelta()
p.NoError(err)
defer indices.Release()
defer delta.Release()
exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 2, 3, 4]"))
defer exindices.Release()
exdelta, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4, 5]"))
defer exdelta.Release()
p.True(array.ArrayEqual(exindices, indices))
p.True(array.ArrayEqual(exdelta, delta))
}
func (p *PrimitiveDictionaryTestSuite) TestNewResetBehavior() {
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ}
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
defer bldr.Release()
builder := reflect.ValueOf(bldr)
appfn := builder.MethodByName("Append")
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
bldr.AppendNull()
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
p.Less(0, bldr.Cap())
p.Less(0, bldr.NullN())
p.Equal(4, bldr.Len())
result := bldr.NewDictionaryArray()
defer result.Release()
p.Zero(bldr.Cap())
p.Zero(bldr.Len())
p.Zero(bldr.NullN())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
bldr.AppendNull()
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface())
result = bldr.NewDictionaryArray()
defer result.Release()
p.Equal(4, result.Dictionary().Len())
}
func (p *PrimitiveDictionaryTestSuite) TestResetFull() {
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int32Type{}, ValueType: p.typ}
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
defer bldr.Release()
builder := reflect.ValueOf(bldr)
appfn := builder.MethodByName("Append")
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
bldr.AppendNull()
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface())
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface())
result := bldr.NewDictionaryArray()
defer result.Release()
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface())
result = bldr.NewDictionaryArray()
defer result.Release()
exindices, _, _ := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[2]"))
exdict, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2, 3]"))
defer exindices.Release()
defer exdict.Release()
p.True(array.ArrayEqual(exindices, result.Indices()))
p.True(array.ArrayEqual(exdict, result.Dictionary()))
bldr.ResetFull()
p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface())
result = bldr.NewDictionaryArray()
defer result.Release()
exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[0]"))
exdict, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4]"))
defer exindices.Release()
defer exdict.Release()
p.True(array.ArrayEqual(exindices, result.Indices()))
p.True(array.ArrayEqual(exdict, result.Dictionary()))
}
func TestBasicStringDictionaryBuilder(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.BinaryDictionaryBuilder)
assert.NoError(t, builder.Append([]byte("test")))
assert.NoError(t, builder.AppendString("test2"))
assert.NoError(t, builder.AppendString("test"))
result := bldr.NewDictionaryArray()
defer result.Release()
exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`))
defer exdict.Release()
exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]"))
defer exint.Release()
assert.True(t, arrow.TypeEqual(dictType, result.DataType()))
expected := array.NewDictionaryArray(dictType, exint, exdict)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
}
func TestStringDictionaryInsertValues(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c", "a", "b", "d"]`))
defer exdict.Release()
invalidDict, _, err := array.FromJSON(mem, arrow.BinaryTypes.Binary, strings.NewReader(`["ZQ==", "Zg=="]`))
assert.NoError(t, err)
defer invalidDict.Release()
dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.BinaryDictionaryBuilder)
assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String)))
// inserting again should have no effect
assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String)))
assert.Error(t, builder.InsertDictValues(invalidDict.(*array.Binary)))
for i := 0; i < 2; i++ {
builder.AppendString("c")
builder.AppendString("a")
builder.AppendString("b")
builder.AppendNull()
builder.AppendString("d")
}
assert.Equal(t, 10, bldr.Len())
result := bldr.NewDictionaryArray()
defer result.Release()
exindices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader("[0, 1, 2, null, 3, 0, 1, 2, null, 3]"))
defer exindices.Release()
expected := array.NewDictionaryArray(dictType, exindices, exdict)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
}
func TestStringDictionaryBuilderInit(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictArr, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`))
defer dictArr.Release()
intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]"))
defer intarr.Release()
dictType := &arrow.DictionaryType{IndexType: intarr.DataType().(arrow.FixedWidthDataType), ValueType: arrow.BinaryTypes.String}
bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr)
defer bldr.Release()
builder := bldr.(*array.BinaryDictionaryBuilder)
assert.NoError(t, builder.AppendString("test"))
assert.NoError(t, builder.AppendString("test2"))
assert.NoError(t, builder.AppendString("test"))
result := bldr.NewDictionaryArray()
defer result.Release()
expected := array.NewDictionaryArray(dictType, intarr, dictArr)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
}
func TestStringDictionaryBuilderOnlyNull(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
bldr.AppendNull()
result := bldr.NewDictionaryArray()
defer result.Release()
dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader("[]"))
defer dict.Release()
intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[null]"))
defer intarr.Release()
expected := array.NewDictionaryArray(dictType, intarr, dict)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
}
func TestStringDictionaryBuilderDelta(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.BinaryDictionaryBuilder)
assert.NoError(t, builder.AppendString("test"))
assert.NoError(t, builder.AppendString("test2"))
assert.NoError(t, builder.AppendString("test"))
result := bldr.NewDictionaryArray()
defer result.Release()
exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`))
defer exdict.Release()
exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]"))
defer exint.Release()
assert.True(t, arrow.TypeEqual(dictType, result.DataType()))
expected := array.NewDictionaryArray(dictType, exint, exdict)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
assert.NoError(t, builder.AppendString("test2"))
assert.NoError(t, builder.AppendString("test3"))
assert.NoError(t, builder.AppendString("test2"))
indices, delta, err := builder.NewDelta()
assert.NoError(t, err)
defer indices.Release()
defer delta.Release()
exdelta, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test3"]`))
defer exdelta.Release()
exint, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 1]"))
defer exint.Release()
assert.True(t, array.ArrayEqual(exdelta, delta))
assert.True(t, array.ArrayEqual(exint, indices))
}
func TestStringDictionaryBuilderBigDelta(t *testing.T) {
const testlen = 2048
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.BinaryDictionaryBuilder)
strbldr := array.NewStringBuilder(mem)
defer strbldr.Release()
intbldr := array.NewInt16Builder(mem)
defer intbldr.Release()
for idx := int16(0); idx < testlen; idx++ {
var b strings.Builder
b.WriteString("test")
fmt.Fprint(&b, idx)
val := b.String()
assert.NoError(t, builder.AppendString(val))
strbldr.Append(val)
intbldr.Append(idx)
}
result := bldr.NewDictionaryArray()
defer result.Release()
strarr := strbldr.NewStringArray()
defer strarr.Release()
intarr := intbldr.NewInt16Array()
defer intarr.Release()
expected := array.NewDictionaryArray(dictType, intarr, strarr)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
strbldr2 := array.NewStringBuilder(mem)
defer strbldr2.Release()
intbldr2 := array.NewInt16Builder(mem)
defer intbldr2.Release()
for idx := int16(0); idx < testlen; idx++ {
builder.AppendString("test1")
intbldr2.Append(1)
}
for idx := int16(0); idx < testlen; idx++ {
builder.AppendString("test_new_value1")
intbldr2.Append(testlen)
}
strbldr2.Append("test_new_value1")
indices2, delta2, err := bldr.NewDelta()
assert.NoError(t, err)
defer indices2.Release()
defer delta2.Release()
strarr2 := strbldr2.NewStringArray()
defer strarr2.Release()
intarr2 := intbldr2.NewInt16Array()
defer intarr2.Release()
assert.True(t, array.ArrayEqual(intarr2, indices2))
assert.True(t, array.ArrayEqual(strarr2, delta2))
strbldr3 := array.NewStringBuilder(mem)
defer strbldr3.Release()
intbldr3 := array.NewInt16Builder(mem)
defer intbldr3.Release()
for idx := int16(0); idx < testlen; idx++ {
assert.NoError(t, builder.AppendString("test2"))
intbldr3.Append(2)
}
for idx := int16(0); idx < testlen; idx++ {
assert.NoError(t, builder.AppendString("test_new_value2"))
intbldr3.Append(testlen + 1)
}
strbldr3.Append("test_new_value2")
indices3, delta3, err := bldr.NewDelta()
assert.NoError(t, err)
defer indices3.Release()
defer delta3.Release()
strarr3 := strbldr3.NewStringArray()
defer strarr3.Release()
intarr3 := intbldr3.NewInt16Array()
defer intarr3.Release()
assert.True(t, array.ArrayEqual(intarr3, indices3))
assert.True(t, array.ArrayEqual(strarr3, delta3))
}
func TestFixedSizeBinaryDictionaryBuilder(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder)
test := []byte{12, 12, 11, 12}
test2 := []byte{12, 12, 11, 11}
assert.NoError(t, builder.Append(test))
assert.NoError(t, builder.Append(test2))
assert.NoError(t, builder.Append(test))
result := builder.NewDictionaryArray()
defer result.Release()
fsbBldr := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType))
defer fsbBldr.Release()
fsbBldr.Append(test)
fsbBldr.Append(test2)
fsbArr := fsbBldr.NewFixedSizeBinaryArray()
defer fsbArr.Release()
intbldr := array.NewInt8Builder(mem)
defer intbldr.Release()
intbldr.AppendValues([]int8{0, 1, 0}, nil)
intArr := intbldr.NewInt8Array()
defer intArr.Release()
expected := array.NewDictionaryArray(dictType, intArr, fsbArr)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
}
func TestFixedSizeBinaryDictionaryBuilderInit(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4})
defer fsbBldr.Release()
test, test2 := []byte("abcd"), []byte("wxyz")
fsbBldr.AppendValues([][]byte{test, test2}, nil)
dictArr := fsbBldr.NewFixedSizeBinaryArray()
defer dictArr.Release()
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()}
bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr)
defer bldr.Release()
builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder)
assert.NoError(t, builder.Append(test))
assert.NoError(t, builder.Append(test2))
assert.NoError(t, builder.Append(test))
result := builder.NewDictionaryArray()
defer result.Release()
indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]"))
defer indices.Release()
expected := array.NewDictionaryArray(dictType, indices, dictArr)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
}
func TestFixedSizeBinaryDictionaryBuilderMakeBuilder(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4})
defer fsbBldr.Release()
test, test2 := []byte("abcd"), []byte("wxyz")
fsbBldr.AppendValues([][]byte{test, test2}, nil)
dictArr := fsbBldr.NewFixedSizeBinaryArray()
defer dictArr.Release()
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()}
bldr := array.NewBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder)
assert.NoError(t, builder.Append(test))
assert.NoError(t, builder.Append(test2))
assert.NoError(t, builder.Append(test))
result := builder.NewDictionaryArray()
defer result.Release()
indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]"))
defer indices.Release()
expected := array.NewDictionaryArray(dictType, indices, dictArr)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result))
}
func TestFixedSizeBinaryDictionaryBuilderDeltaDictionary(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder)
test := []byte{12, 12, 11, 12}
test2 := []byte{12, 12, 11, 11}
test3 := []byte{12, 12, 11, 10}
assert.NoError(t, builder.Append(test))
assert.NoError(t, builder.Append(test2))
assert.NoError(t, builder.Append(test))
result1 := bldr.NewDictionaryArray()
defer result1.Release()
fsbBuilder := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType))
defer fsbBuilder.Release()
fsbBuilder.AppendValues([][]byte{test, test2}, nil)
fsbArr1 := fsbBuilder.NewFixedSizeBinaryArray()
defer fsbArr1.Release()
intBuilder := array.NewInt8Builder(mem)
defer intBuilder.Release()
intBuilder.AppendValues([]int8{0, 1, 0}, nil)
intArr1 := intBuilder.NewInt8Array()
defer intArr1.Release()
expected := array.NewDictionaryArray(dictType, intArr1, fsbArr1)
defer expected.Release()
assert.True(t, array.ArrayEqual(expected, result1))
assert.NoError(t, builder.Append(test))
assert.NoError(t, builder.Append(test2))
assert.NoError(t, builder.Append(test3))
indices2, delta2, err := builder.NewDelta()
assert.NoError(t, err)
defer indices2.Release()
defer delta2.Release()
fsbBuilder.Append(test3)
fsbArr2 := fsbBuilder.NewFixedSizeBinaryArray()
defer fsbArr2.Release()
intBuilder.AppendValues([]int8{0, 1, 2}, nil)
intArr2 := intBuilder.NewInt8Array()
defer intArr2.Release()
assert.True(t, array.ArrayEqual(intArr2, indices2))
assert.True(t, array.ArrayEqual(fsbArr2, delta2))
}
func TestDecimalDictionaryBuilderBasic(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
test := []decimal128.Num{decimal128.FromI64(12), decimal128.FromI64(12), decimal128.FromI64(11), decimal128.FromI64(12)}
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.Decimal128Type{Precision: 2, Scale: 0}}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.Decimal128DictionaryBuilder)
for _, v := range test {
assert.NoError(t, builder.Append(v))
}
result := bldr.NewDictionaryArray()
defer result.Release()
indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader("[0, 0, 1, 0]"))
defer indices.Release()
dict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader("[12, 11]"))
defer dict.Release()
expected := array.NewDictionaryArray(dictType, indices, dict)
defer expected.Release()
assert.True(t, array.ArrayApproxEqual(expected, result))
}
func TestNullDictionaryBuilderBasic(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.Null}
bldr := array.NewBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.NullDictionaryBuilder)
builder.AppendNull()
builder.AppendNull()
builder.AppendNull()
assert.Equal(t, 3, builder.Len())
assert.Equal(t, 3, builder.NullN())
nullarr, _, _ := array.FromJSON(mem, arrow.Null, strings.NewReader("[null, null, null]"))
defer nullarr.Release()
assert.NoError(t, builder.AppendArray(nullarr))
assert.Equal(t, 6, bldr.Len())
assert.Equal(t, 6, bldr.NullN())
result := builder.NewDictionaryArray()
defer result.Release()
assert.Equal(t, 6, result.Len())
assert.Equal(t, 6, result.NullN())
}
func TestDictionaryEquals(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
var (
isValid = []bool{true, true, false, true, true, true}
dict, dict2 arrow.Array
indices, indices2, indices3 arrow.Array
)
dict, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`))
defer dict.Release()
dictType := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String}
dict2, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz", "qux"]`))
defer dict2.Release()
dictType2 := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String}
idxbuilder := array.NewUint16Builder(mem)
defer idxbuilder.Release()
idxbuilder.AppendValues([]uint16{1, 2, math.MaxUint16, 0, 2, 0}, isValid)
indices = idxbuilder.NewArray()
defer indices.Release()
idxbuilder.AppendValues([]uint16{1, 2, 0, 0, 2, 0}, isValid)
indices2 = idxbuilder.NewArray()
defer indices2.Release()
idxbuilder.AppendValues([]uint16{1, 1, 0, 0, 2, 0}, isValid)
indices3 = idxbuilder.NewArray()
defer indices3.Release()
var (
arr = array.NewDictionaryArray(dictType, indices, dict)
arr2 = array.NewDictionaryArray(dictType, indices2, dict)
arr3 = array.NewDictionaryArray(dictType2, indices, dict2)
arr4 = array.NewDictionaryArray(dictType, indices3, dict)
)
defer func() {
arr.Release()
arr2.Release()
arr3.Release()
arr4.Release()
}()
assert.True(t, array.ArrayEqual(arr, arr))
// equal because the unequal index is masked by null
assert.True(t, array.ArrayEqual(arr, arr2))
// unequal dictionaries
assert.False(t, array.ArrayEqual(arr, arr3))
// unequal indices
assert.False(t, array.ArrayEqual(arr, arr4))
assert.True(t, array.ArraySliceEqual(arr, 3, 6, arr4, 3, 6))
assert.False(t, array.ArraySliceEqual(arr, 1, 3, arr4, 1, 3))
sz := arr.Len()
slice := array.NewSlice(arr, 2, int64(sz))
defer slice.Release()
slice2 := array.NewSlice(arr, 2, int64(sz))
defer slice2.Release()
assert.Equal(t, sz-2, slice.Len())
assert.True(t, array.ArrayEqual(slice, slice2))
assert.True(t, array.ArraySliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len())))
// chained slice
slice2 = array.NewSlice(arr, 1, int64(arr.Len()))
defer slice2.Release()
slice2 = array.NewSlice(slice2, 1, int64(slice2.Len()))
defer slice2.Release()
assert.True(t, array.ArrayEqual(slice, slice2))
slice = array.NewSlice(arr, 1, 4)
defer slice.Release()
slice2 = array.NewSlice(arr, 1, 4)
defer slice2.Release()
assert.Equal(t, 3, slice.Len())
assert.True(t, array.ArrayEqual(slice, slice2))
assert.True(t, array.ArraySliceEqual(arr, 1, 4, slice, 0, int64(slice.Len())))
}
func TestDictionaryIndexTypes(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictIndexTypes := []arrow.DataType{
arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8,
arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16,
arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32,
arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64,
}
for _, indextyp := range dictIndexTypes {
t.Run(indextyp.Name(), func(t *testing.T) {
scope := memory.NewCheckedAllocatorScope(mem)
defer scope.CheckSize(t)
dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String}
bldr := array.NewDictionaryBuilder(mem, dictType)
defer bldr.Release()
builder := bldr.(*array.BinaryDictionaryBuilder)
builder.AppendString("foo")
builder.AppendString("bar")
builder.AppendString("foo")
builder.AppendString("baz")
builder.Append(nil)
assert.Equal(t, 5, builder.Len())
assert.Equal(t, 1, builder.NullN())
result := builder.NewDictionaryArray()
defer result.Release()
expectedIndices, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[0, 1, 0, 2, null]"))
defer expectedIndices.Release()
assert.True(t, array.ArrayEqual(expectedIndices, result.Indices()))
})
}
}
func TestDictionaryFromArrays(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`))
defer dict.Release()
dictIndexTypes := []arrow.DataType{
arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8,
arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16,
arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32,
arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64,
}
for _, indextyp := range dictIndexTypes {
t.Run(indextyp.Name(), func(t *testing.T) {
scope := memory.NewCheckedAllocatorScope(mem)
defer scope.CheckSize(t)
dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String}
indices1, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 0, 2, 0]"))
defer indices1.Release()
indices2, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 3, 2, 0]"))
defer indices2.Release()
arr1, err := array.NewValidatedDictionaryArray(dictType, indices1, dict)
assert.NoError(t, err)
defer arr1.Release()
_, err = array.NewValidatedDictionaryArray(dictType, indices2, dict)
assert.Error(t, err)
switch indextyp.ID() {
case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64:
indices3, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, null, 2, 0]"))
defer indices3.Release()
bitutil.ClearBit(indices3.Data().Buffers()[0].Bytes(), 2)
arr3, err := array.NewValidatedDictionaryArray(dictType, indices3, dict)
assert.NoError(t, err)
defer arr3.Release()
}
indices4, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, null, 3, 2, 0]"))
defer indices4.Release()
_, err = array.NewValidatedDictionaryArray(dictType, indices4, dict)
assert.Error(t, err)
diffIndexType := arrow.PrimitiveTypes.Int8
if indextyp.ID() == arrow.INT8 {
diffIndexType = arrow.PrimitiveTypes.Uint8
}
_, err = array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: diffIndexType, ValueType: arrow.BinaryTypes.String}, indices4, dict)
assert.Error(t, err)
})
}
}
func TestListOfDictionary(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
rootBuilder := array.NewBuilder(mem, arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String}))
defer rootBuilder.Release()
listBldr := rootBuilder.(*array.ListBuilder)
dictBldr := listBldr.ValueBuilder().(*array.BinaryDictionaryBuilder)
listBldr.Append(true)
expected := []string{}
for _, a := range []byte("abc") {
for _, d := range []byte("def") {
for _, g := range []byte("ghi") {
for _, j := range []byte("jkl") {
for _, m := range []byte("mno") {
for _, p := range []byte("pqr") {
if a+d+g+j+m+p%16 == 0 {
listBldr.Append(true)
}
str := string([]byte{a, d, g, j, m, p})
dictBldr.AppendString(str)
expected = append(expected, str)
}
}
}
}
}
}
strbldr := array.NewStringBuilder(mem)
defer strbldr.Release()
strbldr.AppendValues(expected, nil)
expectedDict := strbldr.NewStringArray()
defer expectedDict.Release()
arr := rootBuilder.NewArray()
defer arr.Release()
actualDict := arr.(*array.List).ListValues().(*array.Dictionary)
assert.True(t, array.ArrayEqual(expectedDict, actualDict.Dictionary()))
}
func TestDictionaryCanCompareIndices(t *testing.T) {
makeDict := func(mem memory.Allocator, idxType, valueType arrow.DataType, dictJSON string) *array.Dictionary {
indices, _, _ := array.FromJSON(mem, idxType, strings.NewReader("[]"))
defer indices.Release()
dict, _, _ := array.FromJSON(mem, valueType, strings.NewReader(dictJSON))
defer dict.Release()
out, _ := array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: idxType, ValueType: valueType}, indices, dict)
return out
}
compareSwap := func(t *testing.T, l, r *array.Dictionary, expected bool) {
assert.Equalf(t, expected, l.CanCompareIndices(r), "left: %s\nright: %s\n", l, r)
assert.Equalf(t, expected, r.CanCompareIndices(l), "left: %s\nright: %s\n", r, l)
}
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
t.Run("same", func(t *testing.T) {
arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`)
defer arr.Release()
same := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`)
defer same.Release()
compareSwap(t, arr, same, true)
})
t.Run("prefix dict", func(t *testing.T) {
arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`)
defer arr.Release()
prefixDict := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`)
defer prefixDict.Release()
compareSwap(t, arr, prefixDict, true)
})
t.Run("indices need cast", func(t *testing.T) {
arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`)
defer arr.Release()
needcast := makeDict(mem, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, `["foo", "bar"]`)
defer needcast.Release()
compareSwap(t, arr, needcast, false)
})
t.Run("non prefix", func(t *testing.T) {
arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`)
defer arr.Release()
nonPrefix := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "blink"]`)
defer nonPrefix.Release()
compareSwap(t, arr, nonPrefix, false)
})
}
func TestDictionaryGetValueIndex(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
indicesJson := "[5, 0, 1, 3, 2, 4]"
indices64, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(indicesJson))
defer indices64.Release()
dict, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[10, 20, 30, 40, 50, 60]"))
defer dict.Release()
dictIndexTypes := []arrow.DataType{
arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8,
arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16,
arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32,
arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64,
}
i64Index := indices64.(*array.Int64)
for _, idxt := range dictIndexTypes {
t.Run(idxt.Name(), func(t *testing.T) {
indices, _, _ := array.FromJSON(mem, idxt, strings.NewReader(indicesJson))
defer indices.Release()
dictType := &arrow.DictionaryType{IndexType: idxt, ValueType: arrow.PrimitiveTypes.Int32}
dictArr := array.NewDictionaryArray(dictType, indices, dict)
defer dictArr.Release()
const offset = 1
slicedDictArr := array.NewSlice(dictArr, offset, int64(dictArr.Len()))
defer slicedDictArr.Release()
for i := 0; i < indices.Len(); i++ {
assert.EqualValues(t, i64Index.Value(i), dictArr.GetValueIndex(i))
if i < slicedDictArr.Len() {
assert.EqualValues(t, i64Index.Value(i+offset), slicedDictArr.(*array.Dictionary).GetValueIndex(i))
}
}
})
}
}
func checkTransposeMap(t *testing.T, b *memory.Buffer, exp []int32) bool {
got := arrow.Int32Traits.CastFromBytes(b.Bytes())
return assert.Equal(t, exp, got)
}
func TestDictionaryUnifierNumeric(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := arrow.PrimitiveTypes.Int64
d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7]`))
require.NoError(t, err)
d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, 7, 4, 8]`))
require.NoError(t, err)
d3, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, -200]`))
require.NoError(t, err)
expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType}
expectedDict, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7, 1, 8, -200]`))
require.NoError(t, err)
defer func() {
d1.Release()
d2.Release()
d3.Release()
expectedDict.Release()
}()
unifier, err := array.NewDictionaryUnifier(mem, dictType)
assert.NoError(t, err)
defer unifier.Release()
assert.NoError(t, unifier.Unify(d1))
assert.NoError(t, unifier.Unify(d2))
assert.NoError(t, unifier.Unify(d3))
invalid, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, -200]`))
defer invalid.Release()
assert.EqualError(t, unifier.Unify(invalid), "dictionary type different from unifier: int32, expected: int64")
outType, outDict, err := unifier.GetResult()
assert.NoError(t, err)
defer outDict.Release()
assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected)
assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict)
b1, err := unifier.UnifyAndTranspose(d1)
assert.NoError(t, err)
b2, err := unifier.UnifyAndTranspose(d2)
assert.NoError(t, err)
b3, err := unifier.UnifyAndTranspose(d3)
assert.NoError(t, err)
outType, outDict, err = unifier.GetResult()
assert.NoError(t, err)
defer func() {
outDict.Release()
b1.Release()
b2.Release()
b3.Release()
}()
assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected)
assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict)
checkTransposeMap(t, b1, []int32{0, 1, 2})
checkTransposeMap(t, b2, []int32{3, 2, 1, 4})
checkTransposeMap(t, b3, []int32{3, 5})
}
func TestDictionaryUnifierString(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := arrow.BinaryTypes.String
d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar"]`))
require.NoError(t, err)
defer d1.Release()
d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["quux", "foo"]`))
require.NoError(t, err)
defer d2.Release()
expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType}
expectedDict, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar", "quux"]`))
defer expectedDict.Release()
unifier, err := array.NewDictionaryUnifier(mem, dictType)
assert.NoError(t, err)
defer unifier.Release()
assert.NoError(t, unifier.Unify(d1))
assert.NoError(t, unifier.Unify(d2))
outType, outDict, err := unifier.GetResult()
assert.NoError(t, err)
defer outDict.Release()
assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected)
assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict)
b1, err := unifier.UnifyAndTranspose(d1)
assert.NoError(t, err)
b2, err := unifier.UnifyAndTranspose(d2)
assert.NoError(t, err)
outType, outDict, err = unifier.GetResult()
assert.NoError(t, err)
defer func() {
outDict.Release()
b1.Release()
b2.Release()
}()
assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected)
assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict)
checkTransposeMap(t, b1, []int32{0, 1})
checkTransposeMap(t, b2, []int32{2, 0})
}
func TestDictionaryUnifierFixedSizeBinary(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.FixedSizeBinaryType{ByteWidth: 3}
data := memory.NewBufferBytes([]byte(`foobarbazqux`))
fsbData := array.NewData(dictType, 2, []*memory.Buffer{nil, memory.SliceBuffer(data, 0, 6)}, nil, 0, 0)
defer fsbData.Release()
d1 := array.NewFixedSizeBinaryData(fsbData)
fsbData = array.NewData(dictType, 3, []*memory.Buffer{nil, memory.SliceBuffer(data, 3, 9)}, nil, 0, 0)
defer fsbData.Release()
d2 := array.NewFixedSizeBinaryData(fsbData)
fsbData = array.NewData(dictType, 4, []*memory.Buffer{nil, data}, nil, 0, 0)
defer fsbData.Release()
expectedDict := array.NewFixedSizeBinaryData(fsbData)
expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType}
unifier, err := array.NewDictionaryUnifier(mem, dictType)
assert.NoError(t, err)
defer func() {
d1.Release()
d2.Release()
expectedDict.Release()
unifier.Release()
}()
assert.NoError(t, unifier.Unify(d1))
assert.NoError(t, unifier.Unify(d2))
outType, outDict, err := unifier.GetResult()
assert.NoError(t, err)
defer outDict.Release()
assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected)
assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict)
b1, err := unifier.UnifyAndTranspose(d1)
assert.NoError(t, err)
b2, err := unifier.UnifyAndTranspose(d2)
assert.NoError(t, err)
outType, outDict, err = unifier.GetResult()
assert.NoError(t, err)
defer func() {
outDict.Release()
b1.Release()
b2.Release()
}()
assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected)
assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict)
checkTransposeMap(t, b1, []int32{0, 1})
checkTransposeMap(t, b2, []int32{1, 2, 3})
}
func TestDictionaryUnifierLarge(t *testing.T) {
// unifying larger dictionaries should choose the right index type
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
bldr := array.NewInt32Builder(mem)
defer bldr.Release()
bldr.Reserve(120)
for i := int32(0); i < 120; i++ {
bldr.UnsafeAppend(i)
}
d1 := bldr.NewInt32Array()
defer d1.Release()
assert.EqualValues(t, 120, d1.Len())
bldr.Reserve(30)
for i := int32(110); i < 140; i++ {
bldr.UnsafeAppend(i)
}
d2 := bldr.NewInt32Array()
defer d2.Release()
assert.EqualValues(t, 30, d2.Len())
bldr.Reserve(140)
for i := int32(0); i < 140; i++ {
bldr.UnsafeAppend(i)
}
expectedDict := bldr.NewInt32Array()
defer expectedDict.Release()
assert.EqualValues(t, 140, expectedDict.Len())
// int8 would be too narrow to hold all the values
expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.PrimitiveTypes.Int32}
unifier, err := array.NewDictionaryUnifier(mem, arrow.PrimitiveTypes.Int32)
assert.NoError(t, err)
defer unifier.Release()
assert.NoError(t, unifier.Unify(d1))
assert.NoError(t, unifier.Unify(d2))
outType, outDict, err := unifier.GetResult()
assert.NoError(t, err)
defer outDict.Release()
assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected)
assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict)
}
func checkDictionaryArray(t *testing.T, arr, expectedVals, expectedIndices arrow.Array) bool {
require.IsType(t, (*array.Dictionary)(nil), arr)
dictArr := arr.(*array.Dictionary)
ret := true
ret = ret && assert.Truef(t, array.Equal(expectedVals, dictArr.Dictionary()), "got: %s, expected: %s", dictArr.Dictionary(), expectedVals)
return ret && assert.Truef(t, array.Equal(expectedIndices, dictArr.Indices()), "got: %s, expected: %s", dictArr.Indices(), expectedIndices)
}
func TestDictionaryUnifierSimpleChunkedArray(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}
chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`))
chunk2, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "cd", "ef"]`))
chunk3, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "ab", null, "ab"]`))
chunk4, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`[]`))
chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1, chunk2, chunk3, chunk4})
defer func() {
chunk1.Release()
chunk2.Release()
chunk3.Release()
chunk4.Release()
chunked.Release()
}()
unified, err := array.UnifyChunkedDicts(mem, chunked)
assert.NoError(t, err)
defer unified.Release()
assert.Len(t, unified.Chunks(), 4)
expectedDict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader(`["ab", "cd", "ef"]`))
defer expectedDict.Release()
c1Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[0, 1, null, 1]`))
defer c1Indices.Release()
c2Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 1, 2]`))
defer c2Indices.Release()
c3Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 0, null, 0]`))
defer c3Indices.Release()
c4Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[]`))
defer c4Indices.Release()
checkDictionaryArray(t, unified.Chunk(0), expectedDict, c1Indices)
checkDictionaryArray(t, unified.Chunk(1), expectedDict, c2Indices)
checkDictionaryArray(t, unified.Chunk(2), expectedDict, c3Indices)
checkDictionaryArray(t, unified.Chunk(3), expectedDict, c4Indices)
}
func TestDictionaryUnifierChunkedArrayZeroChunks(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}
chunked := arrow.NewChunked(dictType, []arrow.Array{})
unified, err := array.UnifyChunkedDicts(mem, chunked)
assert.NoError(t, err)
assert.True(t, array.ChunkedEqual(unified, chunked))
}
func TestDictionaryUnifierChunkedArrayOneChunk(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}
chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`))
defer chunk1.Release()
chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1})
defer chunked.Release()
unified, err := array.UnifyChunkedDicts(mem, chunked)
assert.NoError(t, err)
defer unified.Release()
assert.True(t, array.ChunkedEqual(unified, chunked))
assert.Same(t, unified, chunked)
}
func TestDictionaryUnifierChunkedArrayNoDict(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
typ := arrow.PrimitiveTypes.Int8
chunk1, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[1, 1, 2, 3]`))
defer chunk1.Release()
chunk2, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[5, 8, 13]`))
defer chunk2.Release()
chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2})
defer chunked.Release()
unified, err := array.UnifyChunkedDicts(mem, chunked)
assert.NoError(t, err)
defer unified.Release()
assert.True(t, array.ChunkedEqual(unified, chunked))
assert.Same(t, unified, chunked)
}
func TestDictionaryUnifierChunkedArrayNested(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
typ := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String})
chunk1, _, err := array.FromJSON(mem, typ, strings.NewReader(`[["ab", "cd"], ["cd"]]`))
assert.NoError(t, err)
// defer chunk1.Release()
chunk2, _, err := array.FromJSON(mem, typ, strings.NewReader(`[[], ["ef", "cd", "ef"]]`))
assert.NoError(t, err)
// defer chunk2.Release()
chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2})
// defer chunked.Release()
unified, err := array.UnifyChunkedDicts(mem, chunked)
assert.NoError(t, err)
// defer unified.Release()
assert.Len(t, unified.Chunks(), 2)
expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`))
// defer expectedDict.Release()
unified1 := unified.Chunk(0).(*array.List)
assert.Equal(t, []int32{0, 2, 3}, unified1.Offsets())
expectedIndices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[0, 1, 1]`))
// defer expectedIndices1.Release()
checkDictionaryArray(t, unified1.ListValues(), expectedDict, expectedIndices1)
unified2 := unified.Chunk(1).(*array.List)
assert.Equal(t, []int32{0, 0, 3}, unified2.Offsets())
expectedIndices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[2, 1, 2]`))
// defer expectedIndices2.Release()
checkDictionaryArray(t, unified2.ListValues(), expectedDict, expectedIndices2)
defer func() {
expectedIndices1.Release()
expectedIndices2.Release()
expectedDict.Release()
unified.Release()
chunked.Release()
chunk2.Release()
chunk1.Release()
}()
}
func TestDictionaryUnifierChunkedArrayExtension(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
dt := types.NewDictExtensionType()
chunk1, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ab", null, "cd", "ab"]`))
assert.NoError(t, err)
defer chunk1.Release()
chunk2, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ef", "ab", "ab"]`))
assert.NoError(t, err)
defer chunk2.Release()
chunked := arrow.NewChunked(dt, []arrow.Array{chunk1, chunk2})
defer chunked.Release()
unified, err := array.UnifyChunkedDicts(mem, chunked)
assert.NoError(t, err)
defer unified.Release()
assert.Len(t, unified.Chunks(), 2)
expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`))
defer expectedDict.Release()
unified1 := unified.Chunk(0).(array.ExtensionArray)
assert.Truef(t, arrow.TypeEqual(dt, unified1.DataType()), "expected: %s, got: %s", dt, unified1.DataType())
indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, null, 1, 0]`))
defer indices.Release()
checkDictionaryArray(t, unified1.Storage(), expectedDict, indices)
unified2 := unified.Chunk(1).(array.ExtensionArray)
assert.Truef(t, arrow.TypeEqual(dt, unified2.DataType()), "expected: %s, got: %s", dt, unified1.DataType())
indices, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 0, 0]`))
defer indices.Release()
checkDictionaryArray(t, unified2.Storage(), expectedDict, indices)
}
func TestDictionaryUnifierChunkedArrayNestedDict(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
innerType := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: arrow.BinaryTypes.String})
innerDict1, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["ab", "cd"], [], ["cd", null]]`))
assert.NoError(t, err)
defer innerDict1.Release()
indices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 1, 0, 1, 2]`))
defer indices1.Release()
chunk1 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: innerType}, indices1, innerDict1)
defer chunk1.Release()
innerDict2, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["cd", "ef"], ["cd", null], []]`))
assert.NoError(t, err)
defer innerDict2.Release()
indices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 2, 0]`))
defer indices2.Release()
chunk2 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: indices2.DataType(), ValueType: innerType}, indices2, innerDict2)
defer chunk2.Release()
chunked := arrow.NewChunked(chunk1.DataType(), []arrow.Array{chunk1, chunk2})
defer chunked.Release()
unified, err := array.UnifyChunkedDicts(mem, chunked)
assert.Nil(t, unified)
assert.EqualError(t, err, "unimplemented dictionary value type, list<item: dictionary<values=utf8, indices=uint32, ordered=false>, nullable>")
}
func TestDictioanryUnifierTableZeroColumns(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
schema := arrow.NewSchema([]arrow.Field{}, nil)
table := array.NewTable(schema, []arrow.Column{}, 42)
defer table.Release()
unified, err := array.UnifyTableDicts(mem, table)
assert.NoError(t, err)
assert.True(t, schema.Equal(unified.Schema()))
assert.EqualValues(t, 42, unified.NumRows())
assert.True(t, array.TableEqual(table, unified))
}