blob: 02d0d83e8465e7c176ac81ee8722a8c4d83530ee [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package schema
import (
"fmt"
"strings"
"github.com/apache/arrow/go/v15/parquet"
format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
)
// Column encapsulates the information necessary to interpret primitive
// column data in the context of a particular schema. We have to examine
// the node structure of a column's path to the root in the schema tree
// to be able to reassemble the nested structure from the repetition and
// definition levels.
type Column struct {
pnode *PrimitiveNode
// the maximum definition level in this column
// if this is > 0 then either this column or a parent column must be optional.
maxDefLvl int16
// the maximum repetition level in this column
// if this is > 0, then either this column or a parent column must be repeated.
// when the repetition level in the column data equals this value, it indicates
// additional elements in the innermost list.
maxRepLvl int16
}
// NewColumn returns a new column object for the given node with the provided
// maximum definition and repetition levels.
func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column {
return &Column{n, maxDefinitionLvl, maxRepetitionLvl}
}
// Name is the column's name
func (c *Column) Name() string { return c.pnode.Name() }
// ColumnPath returns the full path to this column from the root of the schema
func (c *Column) ColumnPath() parquet.ColumnPath { return c.pnode.columnPath() }
// Path is equivalent to ColumnPath().String() returning the dot-string version of the path
func (c *Column) Path() string { return c.pnode.Path() }
// TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column
func (c *Column) TypeLength() int { return c.pnode.TypeLength() }
func (c *Column) MaxDefinitionLevel() int16 { return c.maxDefLvl }
func (c *Column) MaxRepetitionLevel() int16 { return c.maxRepLvl }
func (c *Column) PhysicalType() parquet.Type { return c.pnode.PhysicalType() }
func (c *Column) ConvertedType() ConvertedType { return c.pnode.convertedType }
func (c *Column) LogicalType() LogicalType { return c.pnode.logicalType }
func (c *Column) ColumnOrder() parquet.ColumnOrder { return c.pnode.ColumnOrder }
func (c *Column) String() string {
var bld strings.Builder
bld.WriteString("column descriptor = {\n")
fmt.Fprintf(&bld, " name: %s,\n", c.Name())
fmt.Fprintf(&bld, " path: %s,\n", c.Path())
fmt.Fprintf(&bld, " physical_type: %s,\n", c.PhysicalType())
fmt.Fprintf(&bld, " converted_type: %s,\n", c.ConvertedType())
fmt.Fprintf(&bld, " logical_type: %s,\n", c.LogicalType())
fmt.Fprintf(&bld, " max_definition_level: %d,\n", c.MaxDefinitionLevel())
fmt.Fprintf(&bld, " max_repetition_level: %d,\n", c.MaxRepetitionLevel())
if c.PhysicalType() == parquet.Types.FixedLenByteArray {
fmt.Fprintf(&bld, " length: %d,\n", c.TypeLength())
}
if c.ConvertedType() == ConvertedTypes.Decimal {
fmt.Fprintf(&bld, " precision: %d,\n scale: %d,\n", c.pnode.decimalMetaData.Precision, c.pnode.decimalMetaData.Scale)
}
bld.WriteString("}")
return bld.String()
}
// Equals will return true if the rhs Column has the same Max Repetition and Definition levels
// along with having the same node definition.
func (c *Column) Equals(rhs *Column) bool {
return c.pnode.Equals(rhs.pnode) &&
c.MaxRepetitionLevel() == rhs.MaxRepetitionLevel() &&
c.MaxDefinitionLevel() == rhs.MaxDefinitionLevel()
}
// SchemaNode returns the underlying Node in the schema tree for this column.
func (c *Column) SchemaNode() Node {
return c.pnode
}
// SortOrder returns the sort order of this column's statistics based on the
// Logical and Converted types.
func (c *Column) SortOrder() SortOrder {
if c.LogicalType() != nil {
return GetLogicalSortOrder(c.LogicalType(), format.Type(c.pnode.PhysicalType()))
}
return GetSortOrder(c.ConvertedType(), format.Type(c.pnode.PhysicalType()))
}