blob: e6fdb9708759cfa0aa0b2226749a0e46f6066c4e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Contains Rust mappings for Thrift definition.
//! Refer to `parquet.thrift` file to see raw definitions.
use std::{convert, fmt, result, str};
use parquet_format as parquet;
use crate::errors::ParquetError;
// ----------------------------------------------------------------------
// Types from the Thrift definition
// ----------------------------------------------------------------------
// Mirrors `parquet::Type`
/// Types supported by Parquet.
/// These physical types are intended to be used in combination with the encodings to
/// control the on disk storage format.
/// For example INT16 is not included as a type since a good encoding of INT32
/// would handle this.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Type {
BOOLEAN,
INT32,
INT64,
INT96,
FLOAT,
DOUBLE,
BYTE_ARRAY,
FIXED_LEN_BYTE_ARRAY,
}
// ----------------------------------------------------------------------
// Mirrors `parquet::ConvertedType`
/// Common types (logical types) used by frameworks when using Parquet.
/// This helps map between types in those frameworks to the base types in Parquet.
/// This is only metadata and not needed to read or write the data.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum LogicalType {
NONE,
/// A BYTE_ARRAY actually contains UTF8 encoded chars.
UTF8,
/// A map is converted as an optional field containing a repeated key/value pair.
MAP,
/// A key/value pair is converted into a group of two fields.
MAP_KEY_VALUE,
/// A list is converted into an optional field containing a repeated field for its
/// values.
LIST,
/// An enum is converted into a binary field
ENUM,
/// A decimal value.
/// This may be used to annotate binary or fixed primitive types. The
/// underlying byte array stores the unscaled value encoded as two's
/// complement using big-endian byte order (the most significant byte is the
/// zeroth element).
///
/// This must be accompanied by a (maximum) precision and a scale in the
/// SchemaElement. The precision specifies the number of digits in the decimal
/// and the scale stores the location of the decimal point. For example 1.23
/// would have precision 3 (3 total digits) and scale 2 (the decimal point is
/// 2 digits over).
DECIMAL,
/// A date stored as days since Unix epoch, encoded as the INT32 physical type.
DATE,
/// The total number of milliseconds since midnight. The value is stored as an INT32
/// physical type.
TIME_MILLIS,
/// The total number of microseconds since midnight. The value is stored as an INT64
/// physical type.
TIME_MICROS,
/// Date and time recorded as milliseconds since the Unix epoch.
/// Recorded as a physical type of INT64.
TIMESTAMP_MILLIS,
/// Date and time recorded as microseconds since the Unix epoch.
/// The value is stored as an INT64 physical type.
TIMESTAMP_MICROS,
/// An unsigned 8 bit integer value stored as INT32 physical type.
UINT_8,
/// An unsigned 16 bit integer value stored as INT32 physical type.
UINT_16,
/// An unsigned 32 bit integer value stored as INT32 physical type.
UINT_32,
/// An unsigned 64 bit integer value stored as INT64 physical type.
UINT_64,
/// A signed 8 bit integer value stored as INT32 physical type.
INT_8,
/// A signed 16 bit integer value stored as INT32 physical type.
INT_16,
/// A signed 32 bit integer value stored as INT32 physical type.
INT_32,
/// A signed 64 bit integer value stored as INT64 physical type.
INT_64,
/// A JSON document embedded within a single UTF8 column.
JSON,
/// A BSON document embedded within a single BINARY column.
BSON,
/// An interval of time.
///
/// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
/// This data is composed of three separate little endian unsigned integers.
/// Each stores a component of a duration of time. The first integer identifies
/// the number of months associated with the duration, the second identifies
/// the number of days associated with the duration and the third identifies
/// the number of milliseconds associated with the provided duration.
/// This duration of time is independent of any particular timezone or date.
INTERVAL,
}
// ----------------------------------------------------------------------
// Mirrors `parquet::FieldRepetitionType`
/// Representation of field types in schema.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Repetition {
/// Field is required (can not be null) and each record has exactly 1 value.
REQUIRED,
/// Field is optional (can be null) and each record has 0 or 1 values.
OPTIONAL,
/// Field is repeated and can contain 0 or more values.
REPEATED,
}
// ----------------------------------------------------------------------
// Mirrors `parquet::Encoding`
/// Encodings supported by Parquet.
/// Not all encodings are valid for all types. These enums are also used to specify the
/// encoding of definition and repetition levels.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Encoding {
/// Default byte encoding.
/// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
/// - INT32 - 4 bytes per value, stored as little-endian.
/// - INT64 - 8 bytes per value, stored as little-endian.
/// - FLOAT - 4 bytes per value, stored as little-endian.
/// - DOUBLE - 8 bytes per value, stored as little-endian.
/// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
/// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
PLAIN,
/// **Deprecated** dictionary encoding.
///
/// The values in the dictionary are encoded using PLAIN encoding.
/// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
/// PLAIN encoding is used for dictionary page.
PLAIN_DICTIONARY,
/// Group packed run length encoding.
///
/// Usable for definition/repetition levels encoding and boolean values.
RLE,
/// Bit packed encoding.
///
/// This can only be used if the data has a known max width.
/// Usable for definition/repetition levels encoding.
BIT_PACKED,
/// Delta encoding for integers, either INT32 or INT64.
///
/// Works best on sorted data.
DELTA_BINARY_PACKED,
/// Encoding for byte arrays to separate the length values and the data.
///
/// The lengths are encoded using DELTA_BINARY_PACKED encoding.
DELTA_LENGTH_BYTE_ARRAY,
/// Incremental encoding for byte arrays.
///
/// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
/// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
DELTA_BYTE_ARRAY,
/// Dictionary encoding.
///
/// The ids are encoded using the RLE encoding.
RLE_DICTIONARY,
}
// ----------------------------------------------------------------------
// Mirrors `parquet::CompressionCodec`
/// Supported compression algorithms.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Compression {
UNCOMPRESSED,
SNAPPY,
GZIP,
LZO,
BROTLI,
LZ4,
ZSTD,
}
// ----------------------------------------------------------------------
// Mirrors `parquet::PageType`
/// Available data pages for Parquet file format.
/// Note that some of the page types may not be supported.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum PageType {
DATA_PAGE,
INDEX_PAGE,
DICTIONARY_PAGE,
DATA_PAGE_V2,
}
// ----------------------------------------------------------------------
// Mirrors `parquet::ColumnOrder`
/// Sort order for page and column statistics.
///
/// Types are associated with sort orders and column stats are aggregated using a sort
/// order, and a sort order should be considered when comparing values with statistics
/// min/max.
///
/// See reference in
/// https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum SortOrder {
/// Signed (either value or legacy byte-wise) comparison.
SIGNED,
/// Unsigned (depending on physical type either value or byte-wise) comparison.
UNSIGNED,
/// Comparison is undefined.
UNDEFINED,
}
/// Column order that specifies what method was used to aggregate min/max values for
/// statistics.
///
/// If column order is undefined, then it is the legacy behaviour and all values should
/// be compared as signed values/bytes.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ColumnOrder {
/// Column uses the order defined by its logical or physical type
/// (if there is no logical type), parquet-format 2.4.0+.
TYPE_DEFINED_ORDER(SortOrder),
/// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
/// Sort order is always SIGNED.
UNDEFINED,
}
impl ColumnOrder {
/// Returns sort order for a physical/logical type.
pub fn get_sort_order(logical_type: LogicalType, physical_type: Type) -> SortOrder {
match logical_type {
// Unsigned byte-wise comparison.
LogicalType::UTF8
| LogicalType::JSON
| LogicalType::BSON
| LogicalType::ENUM => SortOrder::UNSIGNED,
LogicalType::INT_8
| LogicalType::INT_16
| LogicalType::INT_32
| LogicalType::INT_64 => SortOrder::SIGNED,
LogicalType::UINT_8
| LogicalType::UINT_16
| LogicalType::UINT_32
| LogicalType::UINT_64 => SortOrder::UNSIGNED,
// Signed comparison of the represented value.
LogicalType::DECIMAL => SortOrder::SIGNED,
LogicalType::DATE => SortOrder::SIGNED,
LogicalType::TIME_MILLIS
| LogicalType::TIME_MICROS
| LogicalType::TIMESTAMP_MILLIS
| LogicalType::TIMESTAMP_MICROS => SortOrder::SIGNED,
LogicalType::INTERVAL => SortOrder::UNSIGNED,
LogicalType::LIST | LogicalType::MAP | LogicalType::MAP_KEY_VALUE => {
SortOrder::UNDEFINED
}
// Fall back to physical type.
LogicalType::NONE => Self::get_default_sort_order(physical_type),
}
}
/// Returns default sort order based on physical type.
fn get_default_sort_order(physical_type: Type) -> SortOrder {
match physical_type {
// Order: false, true
Type::BOOLEAN => SortOrder::UNSIGNED,
Type::INT32 | Type::INT64 => SortOrder::SIGNED,
Type::INT96 => SortOrder::UNDEFINED,
// Notes to remember when comparing float/double values:
// If the min is a NaN, it should be ignored.
// If the max is a NaN, it should be ignored.
// If the min is +0, the row group may contain -0 values as well.
// If the max is -0, the row group may contain +0 values as well.
// When looking for NaN values, min and max should be ignored.
Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
// unsigned byte-wise comparison
Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
}
}
/// Returns sort order associated with this column order.
pub fn sort_order(&self) -> SortOrder {
match *self {
ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
ColumnOrder::UNDEFINED => SortOrder::SIGNED,
}
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl fmt::Display for LogicalType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl fmt::Display for Repetition {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl fmt::Display for Encoding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl fmt::Display for Compression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl fmt::Display for PageType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl fmt::Display for SortOrder {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl fmt::Display for ColumnOrder {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
// ----------------------------------------------------------------------
// parquet::Type <=> Type conversion
impl convert::From<parquet::Type> for Type {
fn from(value: parquet::Type) -> Self {
match value {
parquet::Type::BOOLEAN => Type::BOOLEAN,
parquet::Type::INT32 => Type::INT32,
parquet::Type::INT64 => Type::INT64,
parquet::Type::INT96 => Type::INT96,
parquet::Type::FLOAT => Type::FLOAT,
parquet::Type::DOUBLE => Type::DOUBLE,
parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
}
}
}
impl convert::From<Type> for parquet::Type {
fn from(value: Type) -> Self {
match value {
Type::BOOLEAN => parquet::Type::BOOLEAN,
Type::INT32 => parquet::Type::INT32,
Type::INT64 => parquet::Type::INT64,
Type::INT96 => parquet::Type::INT96,
Type::FLOAT => parquet::Type::FLOAT,
Type::DOUBLE => parquet::Type::DOUBLE,
Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
}
}
}
// ----------------------------------------------------------------------
// parquet::ConvertedType <=> LogicalType conversion
impl convert::From<Option<parquet::ConvertedType>> for LogicalType {
fn from(option: Option<parquet::ConvertedType>) -> Self {
match option {
None => LogicalType::NONE,
Some(value) => match value {
parquet::ConvertedType::UTF8 => LogicalType::UTF8,
parquet::ConvertedType::MAP => LogicalType::MAP,
parquet::ConvertedType::MAP_KEY_VALUE => LogicalType::MAP_KEY_VALUE,
parquet::ConvertedType::LIST => LogicalType::LIST,
parquet::ConvertedType::ENUM => LogicalType::ENUM,
parquet::ConvertedType::DECIMAL => LogicalType::DECIMAL,
parquet::ConvertedType::DATE => LogicalType::DATE,
parquet::ConvertedType::TIME_MILLIS => LogicalType::TIME_MILLIS,
parquet::ConvertedType::TIME_MICROS => LogicalType::TIME_MICROS,
parquet::ConvertedType::TIMESTAMP_MILLIS => LogicalType::TIMESTAMP_MILLIS,
parquet::ConvertedType::TIMESTAMP_MICROS => LogicalType::TIMESTAMP_MICROS,
parquet::ConvertedType::UINT_8 => LogicalType::UINT_8,
parquet::ConvertedType::UINT_16 => LogicalType::UINT_16,
parquet::ConvertedType::UINT_32 => LogicalType::UINT_32,
parquet::ConvertedType::UINT_64 => LogicalType::UINT_64,
parquet::ConvertedType::INT_8 => LogicalType::INT_8,
parquet::ConvertedType::INT_16 => LogicalType::INT_16,
parquet::ConvertedType::INT_32 => LogicalType::INT_32,
parquet::ConvertedType::INT_64 => LogicalType::INT_64,
parquet::ConvertedType::JSON => LogicalType::JSON,
parquet::ConvertedType::BSON => LogicalType::BSON,
parquet::ConvertedType::INTERVAL => LogicalType::INTERVAL,
},
}
}
}
impl convert::From<LogicalType> for Option<parquet::ConvertedType> {
fn from(value: LogicalType) -> Self {
match value {
LogicalType::NONE => None,
LogicalType::UTF8 => Some(parquet::ConvertedType::UTF8),
LogicalType::MAP => Some(parquet::ConvertedType::MAP),
LogicalType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
LogicalType::LIST => Some(parquet::ConvertedType::LIST),
LogicalType::ENUM => Some(parquet::ConvertedType::ENUM),
LogicalType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
LogicalType::DATE => Some(parquet::ConvertedType::DATE),
LogicalType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
LogicalType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
LogicalType::TIMESTAMP_MILLIS => {
Some(parquet::ConvertedType::TIMESTAMP_MILLIS)
}
LogicalType::TIMESTAMP_MICROS => {
Some(parquet::ConvertedType::TIMESTAMP_MICROS)
}
LogicalType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
LogicalType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
LogicalType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
LogicalType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
LogicalType::INT_8 => Some(parquet::ConvertedType::INT_8),
LogicalType::INT_16 => Some(parquet::ConvertedType::INT_16),
LogicalType::INT_32 => Some(parquet::ConvertedType::INT_32),
LogicalType::INT_64 => Some(parquet::ConvertedType::INT_64),
LogicalType::JSON => Some(parquet::ConvertedType::JSON),
LogicalType::BSON => Some(parquet::ConvertedType::BSON),
LogicalType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
}
}
}
// ----------------------------------------------------------------------
// parquet::FieldRepetitionType <=> Repetition conversion
impl convert::From<parquet::FieldRepetitionType> for Repetition {
fn from(value: parquet::FieldRepetitionType) -> Self {
match value {
parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
}
}
}
impl convert::From<Repetition> for parquet::FieldRepetitionType {
fn from(value: Repetition) -> Self {
match value {
Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
}
}
}
// ----------------------------------------------------------------------
// parquet::Encoding <=> Encoding conversion
impl convert::From<parquet::Encoding> for Encoding {
fn from(value: parquet::Encoding) -> Self {
match value {
parquet::Encoding::PLAIN => Encoding::PLAIN,
parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
parquet::Encoding::RLE => Encoding::RLE,
parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => {
Encoding::DELTA_LENGTH_BYTE_ARRAY
}
parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
}
}
}
impl convert::From<Encoding> for parquet::Encoding {
fn from(value: Encoding) -> Self {
match value {
Encoding::PLAIN => parquet::Encoding::PLAIN,
Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
Encoding::RLE => parquet::Encoding::RLE,
Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
Encoding::DELTA_LENGTH_BYTE_ARRAY => {
parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY
}
Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
}
}
}
// ----------------------------------------------------------------------
// parquet::CompressionCodec <=> Compression conversion
impl convert::From<parquet::CompressionCodec> for Compression {
fn from(value: parquet::CompressionCodec) -> Self {
match value {
parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
parquet::CompressionCodec::GZIP => Compression::GZIP,
parquet::CompressionCodec::LZO => Compression::LZO,
parquet::CompressionCodec::BROTLI => Compression::BROTLI,
parquet::CompressionCodec::LZ4 => Compression::LZ4,
parquet::CompressionCodec::ZSTD => Compression::ZSTD,
}
}
}
impl convert::From<Compression> for parquet::CompressionCodec {
fn from(value: Compression) -> Self {
match value {
Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
Compression::GZIP => parquet::CompressionCodec::GZIP,
Compression::LZO => parquet::CompressionCodec::LZO,
Compression::BROTLI => parquet::CompressionCodec::BROTLI,
Compression::LZ4 => parquet::CompressionCodec::LZ4,
Compression::ZSTD => parquet::CompressionCodec::ZSTD,
}
}
}
// ----------------------------------------------------------------------
// parquet::PageType <=> PageType conversion
impl convert::From<parquet::PageType> for PageType {
fn from(value: parquet::PageType) -> Self {
match value {
parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
}
}
}
impl convert::From<PageType> for parquet::PageType {
fn from(value: PageType) -> Self {
match value {
PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
}
}
}
// ----------------------------------------------------------------------
// String conversions for schema parsing.
impl str::FromStr for Repetition {
type Err = ParquetError;
fn from_str(s: &str) -> result::Result<Self, Self::Err> {
match s {
"REQUIRED" => Ok(Repetition::REQUIRED),
"OPTIONAL" => Ok(Repetition::OPTIONAL),
"REPEATED" => Ok(Repetition::REPEATED),
other => Err(general_err!("Invalid repetition {}", other)),
}
}
}
impl str::FromStr for Type {
type Err = ParquetError;
fn from_str(s: &str) -> result::Result<Self, Self::Err> {
match s {
"BOOLEAN" => Ok(Type::BOOLEAN),
"INT32" => Ok(Type::INT32),
"INT64" => Ok(Type::INT64),
"INT96" => Ok(Type::INT96),
"FLOAT" => Ok(Type::FLOAT),
"DOUBLE" => Ok(Type::DOUBLE),
"BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
"FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
other => Err(general_err!("Invalid type {}", other)),
}
}
}
impl str::FromStr for LogicalType {
type Err = ParquetError;
fn from_str(s: &str) -> result::Result<Self, Self::Err> {
match s {
"NONE" => Ok(LogicalType::NONE),
"UTF8" => Ok(LogicalType::UTF8),
"MAP" => Ok(LogicalType::MAP),
"MAP_KEY_VALUE" => Ok(LogicalType::MAP_KEY_VALUE),
"LIST" => Ok(LogicalType::LIST),
"ENUM" => Ok(LogicalType::ENUM),
"DECIMAL" => Ok(LogicalType::DECIMAL),
"DATE" => Ok(LogicalType::DATE),
"TIME_MILLIS" => Ok(LogicalType::TIME_MILLIS),
"TIME_MICROS" => Ok(LogicalType::TIME_MICROS),
"TIMESTAMP_MILLIS" => Ok(LogicalType::TIMESTAMP_MILLIS),
"TIMESTAMP_MICROS" => Ok(LogicalType::TIMESTAMP_MICROS),
"UINT_8" => Ok(LogicalType::UINT_8),
"UINT_16" => Ok(LogicalType::UINT_16),
"UINT_32" => Ok(LogicalType::UINT_32),
"UINT_64" => Ok(LogicalType::UINT_64),
"INT_8" => Ok(LogicalType::INT_8),
"INT_16" => Ok(LogicalType::INT_16),
"INT_32" => Ok(LogicalType::INT_32),
"INT_64" => Ok(LogicalType::INT_64),
"JSON" => Ok(LogicalType::JSON),
"BSON" => Ok(LogicalType::BSON),
"INTERVAL" => Ok(LogicalType::INTERVAL),
other => Err(general_err!("Invalid logical type {}", other)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_display_type() {
assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
assert_eq!(Type::INT32.to_string(), "INT32");
assert_eq!(Type::INT64.to_string(), "INT64");
assert_eq!(Type::INT96.to_string(), "INT96");
assert_eq!(Type::FLOAT.to_string(), "FLOAT");
assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
assert_eq!(
Type::FIXED_LEN_BYTE_ARRAY.to_string(),
"FIXED_LEN_BYTE_ARRAY"
);
}
#[test]
fn test_from_type() {
assert_eq!(Type::from(parquet::Type::BOOLEAN), Type::BOOLEAN);
assert_eq!(Type::from(parquet::Type::INT32), Type::INT32);
assert_eq!(Type::from(parquet::Type::INT64), Type::INT64);
assert_eq!(Type::from(parquet::Type::INT96), Type::INT96);
assert_eq!(Type::from(parquet::Type::FLOAT), Type::FLOAT);
assert_eq!(Type::from(parquet::Type::DOUBLE), Type::DOUBLE);
assert_eq!(Type::from(parquet::Type::BYTE_ARRAY), Type::BYTE_ARRAY);
assert_eq!(
Type::from(parquet::Type::FIXED_LEN_BYTE_ARRAY),
Type::FIXED_LEN_BYTE_ARRAY
);
}
#[test]
fn test_into_type() {
assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
assert_eq!(parquet::Type::INT32, Type::INT32.into());
assert_eq!(parquet::Type::INT64, Type::INT64.into());
assert_eq!(parquet::Type::INT96, Type::INT96.into());
assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
assert_eq!(
parquet::Type::FIXED_LEN_BYTE_ARRAY,
Type::FIXED_LEN_BYTE_ARRAY.into()
);
}
#[test]
fn test_from_string_into_type() {
assert_eq!(
Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
Type::BOOLEAN
);
assert_eq!(
Type::INT32.to_string().parse::<Type>().unwrap(),
Type::INT32
);
assert_eq!(
Type::INT64.to_string().parse::<Type>().unwrap(),
Type::INT64
);
assert_eq!(
Type::INT96.to_string().parse::<Type>().unwrap(),
Type::INT96
);
assert_eq!(
Type::FLOAT.to_string().parse::<Type>().unwrap(),
Type::FLOAT
);
assert_eq!(
Type::DOUBLE.to_string().parse::<Type>().unwrap(),
Type::DOUBLE
);
assert_eq!(
Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
Type::BYTE_ARRAY
);
assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
assert_eq!(
Type::FIXED_LEN_BYTE_ARRAY
.to_string()
.parse::<Type>()
.unwrap(),
Type::FIXED_LEN_BYTE_ARRAY
);
}
#[test]
fn test_display_logical_type() {
assert_eq!(LogicalType::NONE.to_string(), "NONE");
assert_eq!(LogicalType::UTF8.to_string(), "UTF8");
assert_eq!(LogicalType::MAP.to_string(), "MAP");
assert_eq!(LogicalType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
assert_eq!(LogicalType::LIST.to_string(), "LIST");
assert_eq!(LogicalType::ENUM.to_string(), "ENUM");
assert_eq!(LogicalType::DECIMAL.to_string(), "DECIMAL");
assert_eq!(LogicalType::DATE.to_string(), "DATE");
assert_eq!(LogicalType::TIME_MILLIS.to_string(), "TIME_MILLIS");
assert_eq!(LogicalType::DATE.to_string(), "DATE");
assert_eq!(LogicalType::TIME_MICROS.to_string(), "TIME_MICROS");
assert_eq!(
LogicalType::TIMESTAMP_MILLIS.to_string(),
"TIMESTAMP_MILLIS"
);
assert_eq!(
LogicalType::TIMESTAMP_MICROS.to_string(),
"TIMESTAMP_MICROS"
);
assert_eq!(LogicalType::UINT_8.to_string(), "UINT_8");
assert_eq!(LogicalType::UINT_16.to_string(), "UINT_16");
assert_eq!(LogicalType::UINT_32.to_string(), "UINT_32");
assert_eq!(LogicalType::UINT_64.to_string(), "UINT_64");
assert_eq!(LogicalType::INT_8.to_string(), "INT_8");
assert_eq!(LogicalType::INT_16.to_string(), "INT_16");
assert_eq!(LogicalType::INT_32.to_string(), "INT_32");
assert_eq!(LogicalType::INT_64.to_string(), "INT_64");
assert_eq!(LogicalType::JSON.to_string(), "JSON");
assert_eq!(LogicalType::BSON.to_string(), "BSON");
assert_eq!(LogicalType::INTERVAL.to_string(), "INTERVAL");
}
#[test]
fn test_from_logical_type() {
assert_eq!(LogicalType::from(None), LogicalType::NONE);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::UTF8)),
LogicalType::UTF8
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::MAP)),
LogicalType::MAP
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::MAP_KEY_VALUE)),
LogicalType::MAP_KEY_VALUE
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::LIST)),
LogicalType::LIST
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::ENUM)),
LogicalType::ENUM
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::DECIMAL)),
LogicalType::DECIMAL
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::DATE)),
LogicalType::DATE
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::TIME_MILLIS)),
LogicalType::TIME_MILLIS
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::TIME_MICROS)),
LogicalType::TIME_MICROS
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)),
LogicalType::TIMESTAMP_MILLIS
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)),
LogicalType::TIMESTAMP_MICROS
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::UINT_8)),
LogicalType::UINT_8
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::UINT_16)),
LogicalType::UINT_16
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::UINT_32)),
LogicalType::UINT_32
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::UINT_64)),
LogicalType::UINT_64
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::INT_8)),
LogicalType::INT_8
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::INT_16)),
LogicalType::INT_16
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::INT_32)),
LogicalType::INT_32
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::INT_64)),
LogicalType::INT_64
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::JSON)),
LogicalType::JSON
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::BSON)),
LogicalType::BSON
);
assert_eq!(
LogicalType::from(Some(parquet::ConvertedType::INTERVAL)),
LogicalType::INTERVAL
);
}
#[test]
fn test_into_logical_type() {
let converted_type: Option<parquet::ConvertedType> = None;
assert_eq!(converted_type, LogicalType::NONE.into());
assert_eq!(Some(parquet::ConvertedType::UTF8), LogicalType::UTF8.into());
assert_eq!(Some(parquet::ConvertedType::MAP), LogicalType::MAP.into());
assert_eq!(
Some(parquet::ConvertedType::MAP_KEY_VALUE),
LogicalType::MAP_KEY_VALUE.into()
);
assert_eq!(Some(parquet::ConvertedType::LIST), LogicalType::LIST.into());
assert_eq!(Some(parquet::ConvertedType::ENUM), LogicalType::ENUM.into());
assert_eq!(
Some(parquet::ConvertedType::DECIMAL),
LogicalType::DECIMAL.into()
);
assert_eq!(Some(parquet::ConvertedType::DATE), LogicalType::DATE.into());
assert_eq!(
Some(parquet::ConvertedType::TIME_MILLIS),
LogicalType::TIME_MILLIS.into()
);
assert_eq!(
Some(parquet::ConvertedType::TIME_MICROS),
LogicalType::TIME_MICROS.into()
);
assert_eq!(
Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
LogicalType::TIMESTAMP_MILLIS.into()
);
assert_eq!(
Some(parquet::ConvertedType::TIMESTAMP_MICROS),
LogicalType::TIMESTAMP_MICROS.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_8),
LogicalType::UINT_8.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_16),
LogicalType::UINT_16.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_32),
LogicalType::UINT_32.into()
);
assert_eq!(
Some(parquet::ConvertedType::UINT_64),
LogicalType::UINT_64.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_8),
LogicalType::INT_8.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_16),
LogicalType::INT_16.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_32),
LogicalType::INT_32.into()
);
assert_eq!(
Some(parquet::ConvertedType::INT_64),
LogicalType::INT_64.into()
);
assert_eq!(Some(parquet::ConvertedType::JSON), LogicalType::JSON.into());
assert_eq!(Some(parquet::ConvertedType::BSON), LogicalType::BSON.into());
assert_eq!(
Some(parquet::ConvertedType::INTERVAL),
LogicalType::INTERVAL.into()
);
}
#[test]
fn test_from_string_into_logical_type() {
assert_eq!(
LogicalType::NONE
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::NONE
);
assert_eq!(
LogicalType::UTF8
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::UTF8
);
assert_eq!(
LogicalType::MAP.to_string().parse::<LogicalType>().unwrap(),
LogicalType::MAP
);
assert_eq!(
LogicalType::MAP_KEY_VALUE
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::MAP_KEY_VALUE
);
assert_eq!(
LogicalType::LIST
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::LIST
);
assert_eq!(
LogicalType::ENUM
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::ENUM
);
assert_eq!(
LogicalType::DECIMAL
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::DECIMAL
);
assert_eq!(
LogicalType::DATE
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::DATE
);
assert_eq!(
LogicalType::TIME_MILLIS
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::TIME_MILLIS
);
assert_eq!(
LogicalType::TIME_MICROS
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::TIME_MICROS
);
assert_eq!(
LogicalType::TIMESTAMP_MILLIS
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::TIMESTAMP_MILLIS
);
assert_eq!(
LogicalType::TIMESTAMP_MICROS
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::TIMESTAMP_MICROS
);
assert_eq!(
LogicalType::UINT_8
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::UINT_8
);
assert_eq!(
LogicalType::UINT_16
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::UINT_16
);
assert_eq!(
LogicalType::UINT_32
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::UINT_32
);
assert_eq!(
LogicalType::UINT_64
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::UINT_64
);
assert_eq!(
LogicalType::INT_8
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::INT_8
);
assert_eq!(
LogicalType::INT_16
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::INT_16
);
assert_eq!(
LogicalType::INT_32
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::INT_32
);
assert_eq!(
LogicalType::INT_64
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::INT_64
);
assert_eq!(
LogicalType::JSON
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::JSON
);
assert_eq!(
LogicalType::BSON
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::BSON
);
assert_eq!(
LogicalType::INTERVAL
.to_string()
.parse::<LogicalType>()
.unwrap(),
LogicalType::INTERVAL
);
}
#[test]
fn test_display_repetition() {
assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
}
#[test]
fn test_from_repetition() {
assert_eq!(
Repetition::from(parquet::FieldRepetitionType::REQUIRED),
Repetition::REQUIRED
);
assert_eq!(
Repetition::from(parquet::FieldRepetitionType::OPTIONAL),
Repetition::OPTIONAL
);
assert_eq!(
Repetition::from(parquet::FieldRepetitionType::REPEATED),
Repetition::REPEATED
);
}
#[test]
fn test_into_repetition() {
assert_eq!(
parquet::FieldRepetitionType::REQUIRED,
Repetition::REQUIRED.into()
);
assert_eq!(
parquet::FieldRepetitionType::OPTIONAL,
Repetition::OPTIONAL.into()
);
assert_eq!(
parquet::FieldRepetitionType::REPEATED,
Repetition::REPEATED.into()
);
}
#[test]
fn test_from_string_into_repetition() {
assert_eq!(
Repetition::REQUIRED
.to_string()
.parse::<Repetition>()
.unwrap(),
Repetition::REQUIRED
);
assert_eq!(
Repetition::OPTIONAL
.to_string()
.parse::<Repetition>()
.unwrap(),
Repetition::OPTIONAL
);
assert_eq!(
Repetition::REPEATED
.to_string()
.parse::<Repetition>()
.unwrap(),
Repetition::REPEATED
);
}
#[test]
fn test_display_encoding() {
assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
assert_eq!(Encoding::RLE.to_string(), "RLE");
assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
assert_eq!(
Encoding::DELTA_BINARY_PACKED.to_string(),
"DELTA_BINARY_PACKED"
);
assert_eq!(
Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
"DELTA_LENGTH_BYTE_ARRAY"
);
assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
}
#[test]
fn test_from_encoding() {
assert_eq!(Encoding::from(parquet::Encoding::PLAIN), Encoding::PLAIN);
assert_eq!(
Encoding::from(parquet::Encoding::PLAIN_DICTIONARY),
Encoding::PLAIN_DICTIONARY
);
assert_eq!(Encoding::from(parquet::Encoding::RLE), Encoding::RLE);
assert_eq!(
Encoding::from(parquet::Encoding::BIT_PACKED),
Encoding::BIT_PACKED
);
assert_eq!(
Encoding::from(parquet::Encoding::DELTA_BINARY_PACKED),
Encoding::DELTA_BINARY_PACKED
);
assert_eq!(
Encoding::from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY),
Encoding::DELTA_LENGTH_BYTE_ARRAY
);
assert_eq!(
Encoding::from(parquet::Encoding::DELTA_BYTE_ARRAY),
Encoding::DELTA_BYTE_ARRAY
);
}
#[test]
fn test_into_encoding() {
assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
assert_eq!(
parquet::Encoding::PLAIN_DICTIONARY,
Encoding::PLAIN_DICTIONARY.into()
);
assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
assert_eq!(
parquet::Encoding::DELTA_BINARY_PACKED,
Encoding::DELTA_BINARY_PACKED.into()
);
assert_eq!(
parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
);
assert_eq!(
parquet::Encoding::DELTA_BYTE_ARRAY,
Encoding::DELTA_BYTE_ARRAY.into()
);
}
#[test]
fn test_display_compression() {
assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
assert_eq!(Compression::GZIP.to_string(), "GZIP");
assert_eq!(Compression::LZO.to_string(), "LZO");
assert_eq!(Compression::BROTLI.to_string(), "BROTLI");
assert_eq!(Compression::LZ4.to_string(), "LZ4");
assert_eq!(Compression::ZSTD.to_string(), "ZSTD");
}
#[test]
fn test_from_compression() {
assert_eq!(
Compression::from(parquet::CompressionCodec::UNCOMPRESSED),
Compression::UNCOMPRESSED
);
assert_eq!(
Compression::from(parquet::CompressionCodec::SNAPPY),
Compression::SNAPPY
);
assert_eq!(
Compression::from(parquet::CompressionCodec::GZIP),
Compression::GZIP
);
assert_eq!(
Compression::from(parquet::CompressionCodec::LZO),
Compression::LZO
);
assert_eq!(
Compression::from(parquet::CompressionCodec::BROTLI),
Compression::BROTLI
);
assert_eq!(
Compression::from(parquet::CompressionCodec::LZ4),
Compression::LZ4
);
assert_eq!(
Compression::from(parquet::CompressionCodec::ZSTD),
Compression::ZSTD
);
}
#[test]
fn test_into_compression() {
assert_eq!(
parquet::CompressionCodec::UNCOMPRESSED,
Compression::UNCOMPRESSED.into()
);
assert_eq!(
parquet::CompressionCodec::SNAPPY,
Compression::SNAPPY.into()
);
assert_eq!(parquet::CompressionCodec::GZIP, Compression::GZIP.into());
assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
assert_eq!(
parquet::CompressionCodec::BROTLI,
Compression::BROTLI.into()
);
assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
assert_eq!(parquet::CompressionCodec::ZSTD, Compression::ZSTD.into());
}
#[test]
fn test_display_page_type() {
assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
}
#[test]
fn test_from_page_type() {
assert_eq!(
PageType::from(parquet::PageType::DATA_PAGE),
PageType::DATA_PAGE
);
assert_eq!(
PageType::from(parquet::PageType::INDEX_PAGE),
PageType::INDEX_PAGE
);
assert_eq!(
PageType::from(parquet::PageType::DICTIONARY_PAGE),
PageType::DICTIONARY_PAGE
);
assert_eq!(
PageType::from(parquet::PageType::DATA_PAGE_V2),
PageType::DATA_PAGE_V2
);
}
#[test]
fn test_into_page_type() {
assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
assert_eq!(
parquet::PageType::DICTIONARY_PAGE,
PageType::DICTIONARY_PAGE.into()
);
assert_eq!(
parquet::PageType::DATA_PAGE_V2,
PageType::DATA_PAGE_V2.into()
);
}
#[test]
fn test_display_sort_order() {
assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
}
#[test]
fn test_display_column_order() {
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
"TYPE_DEFINED_ORDER(SIGNED)"
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
"TYPE_DEFINED_ORDER(UNSIGNED)"
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
"TYPE_DEFINED_ORDER(UNDEFINED)"
);
assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
}
#[test]
fn test_column_order_get_sort_order() {
// Helper to check the order in a list of values.
// Only logical type is checked.
fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
for tpe in types {
assert_eq!(
ColumnOrder::get_sort_order(tpe, Type::BYTE_ARRAY),
expected_order
);
}
}
// Unsigned comparison (physical type does not matter)
let unsigned = vec![
LogicalType::UTF8,
LogicalType::JSON,
LogicalType::BSON,
LogicalType::ENUM,
LogicalType::UINT_8,
LogicalType::UINT_16,
LogicalType::UINT_32,
LogicalType::UINT_64,
LogicalType::INTERVAL,
];
check_sort_order(unsigned, SortOrder::UNSIGNED);
// Signed comparison (physical type does not matter)
let signed = vec![
LogicalType::INT_8,
LogicalType::INT_16,
LogicalType::INT_32,
LogicalType::INT_64,
LogicalType::DECIMAL,
LogicalType::DATE,
LogicalType::TIME_MILLIS,
LogicalType::TIME_MICROS,
LogicalType::TIMESTAMP_MILLIS,
LogicalType::TIMESTAMP_MICROS,
];
check_sort_order(signed, SortOrder::SIGNED);
// Undefined comparison
let undefined = vec![
LogicalType::LIST,
LogicalType::MAP,
LogicalType::MAP_KEY_VALUE,
];
check_sort_order(undefined, SortOrder::UNDEFINED);
// Check None logical type
// This should return a sort order for byte array type.
check_sort_order(vec![LogicalType::NONE], SortOrder::UNSIGNED);
}
#[test]
fn test_column_order_get_default_sort_order() {
// Comparison based on physical type
assert_eq!(
ColumnOrder::get_default_sort_order(Type::BOOLEAN),
SortOrder::UNSIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::INT32),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::INT64),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::INT96),
SortOrder::UNDEFINED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::FLOAT),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::DOUBLE),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
SortOrder::UNSIGNED
);
assert_eq!(
ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
SortOrder::UNSIGNED
);
}
#[test]
fn test_column_order_sort_order() {
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
SortOrder::SIGNED
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
SortOrder::UNSIGNED
);
assert_eq!(
ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
SortOrder::UNDEFINED
);
assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
}
}