blob: a6ee85d9687954ae355bffa06ed2b65f37917472 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package iceberg_test
import (
"bytes"
"fmt"
"reflect"
"strings"
"testing"
"time"
"github.com/apache/arrow-go/v18/arrow/decimal"
"github.com/apache/arrow-go/v18/arrow/decimal128"
"github.com/apache/iceberg-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParseTransform(t *testing.T) {
tests := []struct {
toparse string
expected iceberg.Transform
}{
{"identity", iceberg.IdentityTransform{}},
{"IdEnTiTy", iceberg.IdentityTransform{}},
{"void", iceberg.VoidTransform{}},
{"VOId", iceberg.VoidTransform{}},
{"year", iceberg.YearTransform{}},
{"yEAr", iceberg.YearTransform{}},
{"month", iceberg.MonthTransform{}},
{"MONtH", iceberg.MonthTransform{}},
{"day", iceberg.DayTransform{}},
{"DaY", iceberg.DayTransform{}},
{"hour", iceberg.HourTransform{}},
{"hOuR", iceberg.HourTransform{}},
{"bucket[5]", iceberg.BucketTransform{NumBuckets: 5}},
{"bucket[100]", iceberg.BucketTransform{NumBuckets: 100}},
{"BUCKET[5]", iceberg.BucketTransform{NumBuckets: 5}},
{"bUCKeT[100]", iceberg.BucketTransform{NumBuckets: 100}},
{"truncate[10]", iceberg.TruncateTransform{Width: 10}},
{"truncate[255]", iceberg.TruncateTransform{Width: 255}},
{"TRUNCATE[10]", iceberg.TruncateTransform{Width: 10}},
{"tRuNCATe[255]", iceberg.TruncateTransform{Width: 255}},
}
for _, tt := range tests {
t.Run(tt.toparse, func(t *testing.T) {
transform, err := iceberg.ParseTransform(tt.toparse)
require.NoError(t, err)
assert.Equal(t, tt.expected, transform)
txt, err := transform.MarshalText()
assert.NoError(t, err)
assert.Equal(t, strings.ToLower(tt.toparse), string(txt))
})
}
errorTests := []struct {
name string
toparse string
}{
{"foobar", "foobar"},
{"bucket no brackets", "bucket"},
{"truncate no brackets", "truncate"},
{"bucket no val", "bucket[]"},
{"truncate no val", "truncate[]"},
{"bucket neg", "bucket[-1]"},
{"truncate neg", "truncate[-1]"},
}
for _, tt := range errorTests {
t.Run(tt.name, func(t *testing.T) {
tr, err := iceberg.ParseTransform(tt.toparse)
assert.Nil(t, tr)
assert.ErrorIs(t, err, iceberg.ErrInvalidTransform)
assert.ErrorContains(t, err, tt.toparse)
})
}
}
func TestToHumanString(t *testing.T) {
decVal, _ := decimal.Decimal128FromString("14.21", 4, 2)
negDecVal, _ := decimal.Decimal128FromString("-1.50", 9, 2)
tests := []struct {
transform iceberg.Transform
input any
expected string
}{
{iceberg.YearTransform{}, int32(47), "2017"},
{iceberg.MonthTransform{}, int32(575), "2017-12"},
{iceberg.DayTransform{}, int32(17501), "2017-12-01"},
{iceberg.YearTransform{}, nil, "null"},
{iceberg.MonthTransform{}, nil, "null"},
{iceberg.DayTransform{}, nil, "null"},
{iceberg.HourTransform{}, nil, "null"},
{iceberg.HourTransform{}, int32(420042), "2017-12-01-18"},
{iceberg.YearTransform{}, int32(-1), "1969"},
{iceberg.MonthTransform{}, int32(-1), "1969-12"},
{iceberg.DayTransform{}, int32(-1), "1969-12-31"},
{iceberg.HourTransform{}, int32(-1), "1969-12-31-23"},
{iceberg.YearTransform{}, int32(0), "1970"},
{iceberg.MonthTransform{}, int32(0), "1970-01"},
{iceberg.DayTransform{}, int32(0), "1970-01-01"},
{iceberg.HourTransform{}, int32(0), "1970-01-01-00"},
{iceberg.VoidTransform{}, nil, "null"},
{iceberg.IdentityTransform{}, nil, "null"},
{iceberg.TruncateTransform{Width: 1}, []byte{0x00, 0x01, 0x02, 0x03}, "AAECAw=="},
{iceberg.TruncateTransform{Width: 1}, iceberg.Decimal{
Val: decVal, Scale: 2,
}, "14.21"},
{iceberg.TruncateTransform{Width: 1}, int32(123), "123"},
{iceberg.TruncateTransform{Width: 1}, int64(123), "123"},
{iceberg.TruncateTransform{Width: 1}, "foo", "foo"},
{iceberg.IdentityTransform{}, nil, "null"},
{iceberg.IdentityTransform{}, iceberg.Date(17501), "2017-12-01"},
{iceberg.IdentityTransform{}, iceberg.Time(36775038194), "10:12:55.038194"},
{iceberg.IdentityTransform{}, iceberg.Timestamp(1512151975038194), "2017-12-01T18:12:55.038194"},
{iceberg.IdentityTransform{}, int64(-1234567890000), "-1234567890000"},
{iceberg.IdentityTransform{}, "a/b/c=d", "a/b/c=d"},
{iceberg.IdentityTransform{}, []byte("foo"), "Zm9v"},
{iceberg.IdentityTransform{}, iceberg.Decimal{Val: negDecVal, Scale: 2}, "-1.50"},
}
for _, tt := range tests {
t.Run(tt.expected, func(t *testing.T) {
assert.Equal(t, tt.expected, tt.transform.ToHumanStr(tt.input))
})
}
}
func TestManifestPartitionVals(t *testing.T) {
// Sanity checks that the source and result types of the transform are
// compatible with their use to generate partition data in manifests.
ts := time.Date(1971, 2, 10, 10, 20, 30, 4_000_000, time.UTC)
tests := []struct {
transform iceberg.Transform
input iceberg.Literal
expectResult iceberg.Literal
}{
{
transform: iceberg.HourTransform{},
input: iceberg.TimestampLiteral(ts.UnixMicro()),
expectResult: iceberg.Int32Literal((365+40)*24 + 10),
},
{
transform: iceberg.DayTransform{},
input: iceberg.TimestampLiteral(ts.UnixMicro()),
expectResult: iceberg.Int32Literal(365 + 40),
},
{
transform: iceberg.MonthTransform{},
input: iceberg.TimestampLiteral(ts.UnixMicro()),
expectResult: iceberg.Int32Literal(13),
},
{
transform: iceberg.YearTransform{},
input: iceberg.TimestampLiteral(ts.UnixMicro()),
expectResult: iceberg.Int32Literal(1),
},
{
transform: iceberg.TruncateTransform{Width: 100},
input: iceberg.Int64Literal(123456789),
expectResult: iceberg.Int64Literal(123456700),
},
{
transform: iceberg.IdentityTransform{},
input: iceberg.StringLiteral("foobar"),
expectResult: iceberg.StringLiteral("foobar"),
},
{
transform: iceberg.BucketTransform{NumBuckets: 128},
input: iceberg.StringLiteral("foobar"),
expectResult: iceberg.Int32Literal(61),
},
}
for _, tt := range tests {
t.Run(reflect.TypeOf(tt.transform).String(), func(t *testing.T) {
result := tt.transform.Apply(iceberg.Optional[iceberg.Literal]{Val: tt.input, Valid: true})
require.True(t, result.Valid)
assert.Equal(t, tt.expectResult, result.Val)
schema := iceberg.NewSchema(0, iceberg.NestedField{
Name: "abc",
ID: 1,
Type: tt.input.Type(),
})
partitionSpec := iceberg.NewPartitionSpec(iceberg.PartitionField{
Name: "transformed_abc",
SourceID: 1,
FieldID: 1000,
Transform: tt.transform,
})
dataFile, err := iceberg.NewDataFileBuilder(
partitionSpec, iceberg.EntryContentData,
"1234.parquet", iceberg.ParquetFile,
map[int]any{1000: result.Val.Any()},
nil, nil,
100, 100_000,
)
require.NoError(t, err)
snapshotID := int64(123)
seqNum := int64(456)
manifestEntry := iceberg.NewManifestEntry(iceberg.EntryStatusADDED, &snapshotID, &seqNum, &seqNum, dataFile.Build())
var buf bytes.Buffer
manifestFile, err := iceberg.WriteManifest(
"abc.avro", &buf,
2, partitionSpec, schema, 123,
[]iceberg.ManifestEntry{manifestEntry},
)
require.NoError(t, err)
// Now make sure we can deserialize and re-serialize the manifest, too.
entries, err := iceberg.ReadManifest(manifestFile, &buf, false)
require.NoError(t, err)
buf.Reset()
_, err = iceberg.WriteManifest(
"abc.avro", &buf,
2, partitionSpec, schema, 123,
entries,
)
require.NoError(t, err)
})
}
}
func TestCanTransform(t *testing.T) {
tests := []struct {
transform iceberg.Transform
allowed []iceberg.Type
notAllowed []iceberg.Type
}{
{
transform: iceberg.IdentityTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64,
iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64, iceberg.PrimitiveTypes.Date,
iceberg.PrimitiveTypes.Time, iceberg.PrimitiveTypes.Timestamp, iceberg.PrimitiveTypes.TimestampTz,
iceberg.PrimitiveTypes.String, iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1), iceberg.FixedTypeOf(2),
},
notAllowed: []iceberg.Type{
&iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
},
{
transform: iceberg.VoidTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64,
iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64, iceberg.PrimitiveTypes.Date,
iceberg.PrimitiveTypes.Time, iceberg.PrimitiveTypes.Timestamp, iceberg.PrimitiveTypes.TimestampTz,
iceberg.PrimitiveTypes.TimestampNs, iceberg.PrimitiveTypes.TimestampTzNs,
iceberg.PrimitiveTypes.String, iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1), iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
notAllowed: []iceberg.Type{},
},
{
transform: iceberg.BucketTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64, iceberg.PrimitiveTypes.Date,
iceberg.PrimitiveTypes.Time, iceberg.PrimitiveTypes.Timestamp, iceberg.PrimitiveTypes.TimestampTz,
iceberg.PrimitiveTypes.TimestampNs, iceberg.PrimitiveTypes.TimestampTzNs,
iceberg.DecimalTypeOf(2, 1), iceberg.PrimitiveTypes.String, iceberg.FixedTypeOf(2), iceberg.PrimitiveTypes.Binary,
iceberg.PrimitiveTypes.UUID,
},
notAllowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64,
&iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
},
{
transform: iceberg.TruncateTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64, iceberg.PrimitiveTypes.String,
iceberg.PrimitiveTypes.Binary, iceberg.DecimalTypeOf(2, 1),
},
notAllowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64,
iceberg.PrimitiveTypes.Date, iceberg.PrimitiveTypes.Time, iceberg.PrimitiveTypes.Timestamp,
iceberg.PrimitiveTypes.TimestampTz, iceberg.PrimitiveTypes.UUID, iceberg.FixedTypeOf(2),
iceberg.PrimitiveTypes.TimestampNs, iceberg.PrimitiveTypes.TimestampTzNs,
&iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
},
{
transform: iceberg.YearTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.Date, iceberg.PrimitiveTypes.Timestamp, iceberg.PrimitiveTypes.TimestampTz,
iceberg.PrimitiveTypes.TimestampNs, iceberg.PrimitiveTypes.TimestampTzNs,
},
notAllowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64,
iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64, iceberg.PrimitiveTypes.Time,
iceberg.PrimitiveTypes.String, iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1), iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
},
{
transform: iceberg.MonthTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.Date, iceberg.PrimitiveTypes.Timestamp, iceberg.PrimitiveTypes.TimestampTz,
iceberg.PrimitiveTypes.TimestampNs, iceberg.PrimitiveTypes.TimestampTzNs,
},
notAllowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64,
iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64, iceberg.PrimitiveTypes.Time,
iceberg.PrimitiveTypes.String, iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1), iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
},
{
transform: iceberg.DayTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.TimestampNs, iceberg.PrimitiveTypes.TimestampTzNs,
iceberg.PrimitiveTypes.Date, iceberg.PrimitiveTypes.Timestamp, iceberg.PrimitiveTypes.TimestampTz,
},
notAllowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64,
iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64, iceberg.PrimitiveTypes.Time,
iceberg.PrimitiveTypes.String, iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.DecimalTypeOf(2, 1), iceberg.FixedTypeOf(2), &iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
},
{
transform: iceberg.HourTransform{},
allowed: []iceberg.Type{
iceberg.PrimitiveTypes.TimestampNs, iceberg.PrimitiveTypes.TimestampTzNs,
iceberg.PrimitiveTypes.Timestamp, iceberg.PrimitiveTypes.TimestampTz,
},
notAllowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Int32, iceberg.PrimitiveTypes.Int64,
iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64, iceberg.PrimitiveTypes.Time,
iceberg.PrimitiveTypes.String, iceberg.PrimitiveTypes.Binary, iceberg.PrimitiveTypes.UUID,
iceberg.PrimitiveTypes.Date, iceberg.DecimalTypeOf(2, 1), iceberg.FixedTypeOf(2),
&iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
},
},
}
for _, tt := range tests {
for _, typ := range tt.allowed {
assert.True(t, tt.transform.CanTransform(typ), "%s: expected CanTransform(%T) to be true", tt.transform.String(), typ)
}
for _, typ := range tt.notAllowed {
assert.False(t, tt.transform.CanTransform(typ), "%s: expected CanTransform(%T) to be false", tt.transform.String(), typ)
}
}
}
func TestTruncateTransform(t *testing.T) {
tests := []struct {
width int
value iceberg.Literal
expected iceberg.Literal
}{
{10, iceberg.Int32Literal(1), iceberg.Int32Literal(0)},
{10, iceberg.Int32Literal(-1), iceberg.Int32Literal(-10)},
{10, iceberg.Int64Literal(1), iceberg.Int64Literal(0)},
{10, iceberg.Int64Literal(-1), iceberg.Int64Literal(-10)},
{50, iceberg.DecimalLiteral{
Val: decimal128.FromI64(1065),
Scale: 2,
}, iceberg.DecimalLiteral{
Val: decimal128.FromI64(1050),
Scale: 2,
}},
{3, iceberg.StringLiteral("abcdef"), iceberg.StringLiteral("abc")},
{
3, iceberg.BinaryLiteral([]byte{0x01, 0x02, 0x03, 0x04, 0x05}),
iceberg.BinaryLiteral([]byte{0x01, 0x02, 0x03}),
},
}
for _, tt := range tests {
t.Run(fmt.Sprintf("width=%d value=%v", tt.width, tt.value.Any()), func(t *testing.T) {
transform := iceberg.TruncateTransform{Width: tt.width}
result := transform.Apply(iceberg.Optional[iceberg.Literal]{Val: tt.value, Valid: true})
require.True(t, result.Valid)
assert.Equal(t, tt.expected, result.Val)
})
}
}