| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package schema_test |
| |
| import ( |
| "log" |
| "os" |
| "reflect" |
| "testing" |
| |
| "github.com/apache/arrow/go/v6/parquet" |
| "github.com/apache/arrow/go/v6/parquet/schema" |
| "github.com/stretchr/testify/assert" |
| ) |
| |
| func ExampleNewSchemaFromStruct_primitives() { |
| type Schema struct { |
| Bool bool |
| Int8 int8 |
| Uint16 uint16 |
| Int32 int32 |
| Int64 int64 |
| Int96 parquet.Int96 |
| Float float32 |
| Double float64 |
| ByteArray string |
| FixedLenByteArray [10]byte |
| } |
| |
| sc, err := schema.NewSchemaFromStruct(Schema{}) |
| if err != nil { |
| log.Fatal(err) |
| } |
| |
| schema.PrintSchema(sc.Root(), os.Stdout, 2) |
| |
| // Output: |
| // repeated group field_id=-1 Schema { |
| // required boolean field_id=-1 Bool; |
| // required int32 field_id=-1 Int8 (Int(bitWidth=8, isSigned=true)); |
| // required int32 field_id=-1 Uint16 (Int(bitWidth=16, isSigned=false)); |
| // required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true)); |
| // required int64 field_id=-1 Int64 (Int(bitWidth=64, isSigned=true)); |
| // required int96 field_id=-1 Int96; |
| // required float field_id=-1 Float; |
| // required double field_id=-1 Double; |
| // required byte_array field_id=-1 ByteArray; |
| // required fixed_len_byte_array field_id=-1 FixedLenByteArray; |
| // } |
| } |
| |
| func ExampleNewSchemaFromStruct_convertedtypes() { |
| type ConvertedSchema struct { |
| Utf8 string `parquet:"name=utf8, converted=UTF8"` |
| Uint32 uint32 `parquet:"converted=INT_32"` |
| Date int32 `parquet:"name=date, converted=date"` |
| TimeMilli int32 `parquet:"name=timemilli, converted=TIME_MILLIS"` |
| TimeMicro int64 `parquet:"name=timemicro, converted=time_micros"` |
| TimeStampMilli int64 `parquet:"converted=timestamp_millis"` |
| TimeStampMicro int64 `parquet:"converted=timestamp_micros"` |
| Interval parquet.Int96 `parquet:"converted=INTERVAL"` |
| Decimal1 int32 `parquet:"converted=decimal, scale=2, precision=9"` |
| Decimal2 int64 `parquet:"converted=decimal, scale=2, precision=18"` |
| Decimal3 [12]byte `parquet:"converted=decimal, scale=2, precision=10"` |
| Decimal4 string `parquet:"converted=decimal, scale=2, precision=20"` |
| } |
| |
| sc, err := schema.NewSchemaFromStruct(&ConvertedSchema{}) |
| if err != nil { |
| log.Fatal(err) |
| } |
| |
| schema.PrintSchema(sc.Root(), os.Stdout, 2) |
| |
| // Output: |
| // repeated group field_id=-1 ConvertedSchema { |
| // required byte_array field_id=-1 utf8 (String); |
| // required int32 field_id=-1 Uint32 (Int(bitWidth=32, isSigned=true)); |
| // required int32 field_id=-1 date (Date); |
| // required int32 field_id=-1 timemilli (Time(isAdjustedToUTC=true, timeUnit=milliseconds)); |
| // required int64 field_id=-1 timemicro (Time(isAdjustedToUTC=true, timeUnit=microseconds)); |
| // required int64 field_id=-1 TimeStampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=true, force_set_converted_type=false)); |
| // required int64 field_id=-1 TimeStampMicro (Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, is_from_converted_type=true, force_set_converted_type=false)); |
| // required int96 field_id=-1 Interval; |
| // required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2)); |
| // required int64 field_id=-1 Decimal2 (Decimal(precision=18, scale=2)); |
| // required fixed_len_byte_array field_id=-1 Decimal3 (Decimal(precision=10, scale=2)); |
| // required byte_array field_id=-1 Decimal4 (Decimal(precision=20, scale=2)); |
| // } |
| } |
| |
| func ExampleNewSchemaFromStruct_repetition() { |
| type RepetitionSchema struct { |
| List []int64 `parquet:"fieldid=1"` |
| Repeated []int64 `parquet:"repetition=repeated, fieldid=2"` |
| Optional *int64 `parquet:"fieldid=3"` |
| Required *int64 `parquet:"repetition=REQUIRED, fieldid=4"` |
| Opt int64 `parquet:"repetition=OPTIONAL, fieldid=5"` |
| } |
| |
| sc, err := schema.NewSchemaFromStruct(RepetitionSchema{}) |
| if err != nil { |
| log.Fatal(err) |
| } |
| |
| schema.PrintSchema(sc.Root(), os.Stdout, 2) |
| |
| // Output: |
| // repeated group field_id=-1 RepetitionSchema { |
| // required group field_id=1 List (List) { |
| // repeated group field_id=-1 list { |
| // required int64 field_id=-1 element (Int(bitWidth=64, isSigned=true)); |
| // } |
| // } |
| // repeated int64 field_id=2 Repeated (Int(bitWidth=64, isSigned=true)); |
| // optional int64 field_id=3 Optional (Int(bitWidth=64, isSigned=true)); |
| // required int64 field_id=4 Required (Int(bitWidth=64, isSigned=true)); |
| // optional int64 field_id=5 Opt (Int(bitWidth=64, isSigned=true)); |
| // } |
| } |
| |
| func ExampleNewSchemaFromStruct_logicaltypes() { |
| type LogicalTypes struct { |
| String []byte `parquet:"logical=String"` |
| Enum string `parquet:"logical=enum"` |
| Date int32 `parquet:"logical=date"` |
| Decimal1 int32 `parquet:"logical=decimal, precision=9, scale=2"` |
| Decimal2 int32 `parquet:"logical=decimal, logical.precision=9, scale=2"` |
| Decimal3 int32 `parquet:"logical=decimal, precision=5, logical.precision=9, scale=1, logical.scale=3"` |
| TimeMilliUTC int32 `parquet:"logical=TIME, logical.unit=millis"` |
| TimeMilli int32 `parquet:"logical=Time, logical.unit=millis, logical.isadjustedutc=false"` |
| TimeMicros int64 `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=false"` |
| TimeMicrosUTC int64 `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=true"` |
| TimeNanos int64 `parquet:"logical=time, logical.unit=nanos"` |
| TimestampMilli int64 `parquet:"logical=timestamp, logical.unit=millis"` |
| TimestampMicrosNotUTC int64 `parquet:"logical=timestamp, logical.unit=micros, logical.isadjustedutc=false"` |
| TimestampNanos int64 `parquet:"logical=timestamp, logical.unit=nanos"` |
| JSON string `parquet:"logical=json"` |
| BSON []byte `parquet:"logical=BSON"` |
| UUID [16]byte `parquet:"logical=uuid"` |
| } |
| |
| sc, err := schema.NewSchemaFromStruct(LogicalTypes{}) |
| if err != nil { |
| log.Fatal(err) |
| } |
| |
| schema.PrintSchema(sc.Root(), os.Stdout, 2) |
| |
| // Output: |
| // repeated group field_id=-1 LogicalTypes { |
| // required byte_array field_id=-1 String (String); |
| // required byte_array field_id=-1 Enum (Enum); |
| // required int32 field_id=-1 Date (Date); |
| // required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2)); |
| // required int32 field_id=-1 Decimal2 (Decimal(precision=9, scale=2)); |
| // required int32 field_id=-1 Decimal3 (Decimal(precision=9, scale=3)); |
| // required int32 field_id=-1 TimeMilliUTC (Time(isAdjustedToUTC=true, timeUnit=milliseconds)); |
| // required int32 field_id=-1 TimeMilli (Time(isAdjustedToUTC=false, timeUnit=milliseconds)); |
| // required int64 field_id=-1 TimeMicros (Time(isAdjustedToUTC=false, timeUnit=microseconds)); |
| // required int64 field_id=-1 TimeMicrosUTC (Time(isAdjustedToUTC=true, timeUnit=microseconds)); |
| // required int64 field_id=-1 TimeNanos (Time(isAdjustedToUTC=true, timeUnit=nanoseconds)); |
| // required int64 field_id=-1 TimestampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false)); |
| // required int64 field_id=-1 TimestampMicrosNotUTC (Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false)); |
| // required int64 field_id=-1 TimestampNanos (Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false)); |
| // required byte_array field_id=-1 JSON (JSON); |
| // required byte_array field_id=-1 BSON (BSON); |
| // required fixed_len_byte_array field_id=-1 UUID (UUID); |
| // } |
| } |
| |
| func ExampleNewSchemaFromStruct_physicaltype() { |
| type ChangeTypes struct { |
| Int32 int64 `parquet:"type=int32"` |
| FixedLen string `parquet:"type=fixed_len_byte_array, length=10"` |
| SliceAsFixed []byte `parquet:"type=fixed_len_byte_array, length=12"` |
| Int int `parquet:"type=int32"` |
| } |
| |
| sc, err := schema.NewSchemaFromStruct(ChangeTypes{}) |
| if err != nil { |
| log.Fatal(err) |
| } |
| |
| schema.PrintSchema(sc.Root(), os.Stdout, 2) |
| |
| // Output: |
| // repeated group field_id=-1 ChangeTypes { |
| // required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true)); |
| // required fixed_len_byte_array field_id=-1 FixedLen; |
| // required fixed_len_byte_array field_id=-1 SliceAsFixed; |
| // required int32 field_id=-1 Int (Int(bitWidth=32, isSigned=true)); |
| // } |
| } |
| |
| func ExampleNewSchemaFromStruct_nestedtypes() { |
| type Other struct { |
| OptionalMap *map[string]*string `parquet:"valuerepetition=required, keylogical=String, valueconverted=BSON"` |
| } |
| |
| type MyMap map[int32]string |
| |
| type Nested struct { |
| SimpleMap map[int32]string |
| FixedLenMap map[string][]byte `parquet:"keytype=fixed_len_byte_array, keyfieldid=10, valuefieldid=11, keylength=10"` |
| DecimalMap map[int32]string `parquet:"logical=map, keyconverted=DECIMAL, keyscale=3, keyprecision=7, valuetype=fixed_len_byte_array, valuelength=4, valuelogical=decimal, valuelogical.precision=9, valuescale=2"` |
| OtherList []*Other |
| OtherRepeated []Other `parquet:"repetition=repeated"` |
| DateArray [5]int32 `parquet:"valuelogical=date, logical=list"` |
| DateMap MyMap `parquet:"keylogical=TIME, keylogical.unit=MILLIS, keylogical.isadjustedutc=false, valuelogical=enum"` |
| } |
| |
| sc, err := schema.NewSchemaFromStruct(Nested{}) |
| if err != nil { |
| log.Fatal(err) |
| } |
| |
| schema.PrintSchema(sc.Root(), os.Stdout, 2) |
| |
| // Output: |
| // repeated group field_id=-1 Nested { |
| // required group field_id=-1 SimpleMap (Map) { |
| // repeated group field_id=-1 key_value { |
| // required int32 field_id=-1 key (Int(bitWidth=32, isSigned=true)); |
| // required byte_array field_id=-1 value; |
| // } |
| // } |
| // required group field_id=-1 FixedLenMap (Map) { |
| // repeated group field_id=-1 key_value { |
| // required fixed_len_byte_array field_id=10 key; |
| // required byte_array field_id=11 value; |
| // } |
| // } |
| // required group field_id=-1 DecimalMap (Map) { |
| // repeated group field_id=-1 key_value { |
| // required int32 field_id=-1 key (Decimal(precision=7, scale=3)); |
| // required fixed_len_byte_array field_id=-1 value (Decimal(precision=9, scale=2)); |
| // } |
| // } |
| // required group field_id=-1 OtherList (List) { |
| // repeated group field_id=-1 list { |
| // optional group field_id=-1 element { |
| // optional group field_id=-1 OptionalMap (Map) { |
| // repeated group field_id=-1 key_value { |
| // required byte_array field_id=-1 key (String); |
| // required byte_array field_id=-1 value (BSON); |
| // } |
| // } |
| // } |
| // } |
| // } |
| // repeated group field_id=-1 OtherRepeated { |
| // optional group field_id=-1 OptionalMap (Map) { |
| // repeated group field_id=-1 key_value { |
| // required byte_array field_id=-1 key (String); |
| // required byte_array field_id=-1 value (BSON); |
| // } |
| // } |
| // } |
| // required group field_id=-1 DateArray (List) { |
| // repeated group field_id=-1 list { |
| // required int32 field_id=-1 element (Date); |
| // } |
| // } |
| // required group field_id=-1 DateMap (Map) { |
| // repeated group field_id=-1 key_value { |
| // required int32 field_id=-1 key (Time(isAdjustedToUTC=false, timeUnit=milliseconds)); |
| // required byte_array field_id=-1 value (Enum); |
| // } |
| // } |
| // } |
| } |
| |
| func TestStructFromSchema(t *testing.T) { |
| root, err := schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{ |
| schema.NewBooleanNode("bool", parquet.Repetitions.Required, -1), |
| schema.NewInt32Node("int32", parquet.Repetitions.Optional, -1), |
| schema.NewInt64Node("int64", parquet.Repetitions.Repeated, -1), |
| schema.NewInt96Node("int96", parquet.Repetitions.Required, -1), |
| schema.NewFloat32Node("float", parquet.Repetitions.Required, -1), |
| schema.NewByteArrayNode("bytearray", parquet.Repetitions.Required, -1), |
| schema.NewFixedLenByteArrayNode("fixedLen", parquet.Repetitions.Required, 10, -1), |
| }, -1) |
| assert.NoError(t, err) |
| |
| sc := schema.NewSchema(root) |
| |
| typ, err := schema.NewStructFromSchema(sc) |
| assert.NoError(t, err) |
| |
| assert.Equal(t, reflect.Struct, typ.Kind()) |
| assert.Equal(t, "struct { bool bool; int32 *int32; int64 []int64; int96 parquet.Int96; float float32; bytearray parquet.ByteArray; fixedLen parquet.FixedLenByteArray }", |
| typ.String()) |
| } |
| |
| func TestStructFromSchemaWithNesting(t *testing.T) { |
| type Other struct { |
| List *[]*float32 |
| } |
| |
| type Nested struct { |
| Nest []int32 |
| OptionalNest []*int64 |
| Mapped map[string]float32 |
| Other []Other |
| Other2 Other |
| } |
| |
| sc, err := schema.NewSchemaFromStruct(Nested{}) |
| assert.NoError(t, err) |
| |
| typ, err := schema.NewStructFromSchema(sc) |
| assert.NoError(t, err) |
| assert.Equal(t, "struct { Nest []int32; OptionalNest []*int64; Mapped map[string]float32; Other []struct { List *[]*float32 }; Other2 struct { List *[]*float32 } }", |
| typ.String()) |
| } |
| |
| func TestStructFromSchemaBackwardsCompatList(t *testing.T) { |
| tests := []struct { |
| name string |
| n schema.Node |
| expected string |
| }{ |
| {"proper list", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required, |
| schema.FieldList{ |
| schema.MustGroup(schema.NewGroupNode("list", parquet.Repetitions.Repeated, schema.FieldList{schema.NewBooleanNode("element", parquet.Repetitions.Optional, -1)}, -1)), |
| }, schema.NewListLogicalType(), -1)), "struct { my_list []*bool }"}, |
| {"backward nullable list nonnull ints", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{ |
| schema.NewInt32Node("element", parquet.Repetitions.Repeated, -1), |
| }, schema.NewListLogicalType(), -1)), "struct { my_list *[]int32 }"}, |
| {"backward nullable list tuple string int", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{ |
| schema.MustGroup(schema.NewGroupNode("element", parquet.Repetitions.Repeated, schema.FieldList{ |
| schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)), |
| schema.NewInt32Node("num", parquet.Repetitions.Required, -1), |
| }, -1)), |
| }, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string; num int32 } }"}, |
| {"list tuple string", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required, schema.FieldList{ |
| schema.MustGroup(schema.NewGroupNode("array", parquet.Repetitions.Repeated, schema.FieldList{ |
| schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1), |
| }, -1)), |
| }, schema.NewListLogicalType(), -1)), "struct { my_list []struct { str parquet.ByteArray } }"}, |
| {"list tuple string my_list_tuple", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{ |
| schema.MustGroup(schema.NewGroupNode("my_list_tuple", parquet.Repetitions.Repeated, schema.FieldList{ |
| schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)), |
| }, -1)), |
| }, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string } }"}, |
| } |
| |
| for _, tt := range tests { |
| t.Run(tt.name, func(t *testing.T) { |
| typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1)))) |
| assert.NoError(t, err) |
| assert.Equal(t, tt.expected, typ.String()) |
| }) |
| } |
| } |
| |
| func TestStructFromSchemaMaps(t *testing.T) { |
| tests := []struct { |
| name string |
| n schema.Node |
| expected string |
| }{ |
| {"map string int", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Required, schema.FieldList{ |
| schema.MustGroup(schema.NewGroupNode("key_value", parquet.Repetitions.Repeated, schema.FieldList{ |
| schema.MustPrimitive(schema.NewPrimitiveNodeLogical("key", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)), |
| schema.NewInt32Node("value", parquet.Repetitions.Optional, -1), |
| }, -1)), |
| }, schema.MapLogicalType{}, -1)), "struct { my_map map[string]*int32 }"}, |
| {"nullable map string, int, required values", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Optional, schema.FieldList{ |
| schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{ |
| schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1), |
| schema.NewInt32Node("num", parquet.Repetitions.Required, -1), |
| }, -1)), |
| }, schema.MapLogicalType{}, -1)), "struct { my_map *map[string]int32 }"}, |
| {"map_key_value with missing value", schema.MustGroup(schema.NewGroupNodeConverted("my_map", parquet.Repetitions.Optional, schema.FieldList{ |
| schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{ |
| schema.NewByteArrayNode("key", parquet.Repetitions.Required, -1), |
| }, -1)), |
| }, schema.ConvertedTypes.MapKeyValue, -1)), "struct { my_map *map[string]bool }"}, |
| } |
| for _, tt := range tests { |
| t.Run(tt.name, func(t *testing.T) { |
| typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1)))) |
| assert.NoError(t, err) |
| assert.Equal(t, tt.expected, typ.String()) |
| }) |
| } |
| } |