blob: 411016e7ce8fd6fdf9c66a170d3174929336ad27 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Contains Row enum that is used to represent record in Rust.
use std::fmt;
use chrono::{TimeZone, Utc};
use num_bigint::{BigInt, Sign};
use crate::basic::{ConvertedType, Type as PhysicalType};
use crate::data_type::{ByteArray, Decimal, Int96};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
#[cfg(feature = "cli")]
use serde_json::Value;
/// Macro as a shortcut to generate 'not yet implemented' panic error.
macro_rules! nyi {
($column_descr:ident, $value:ident) => {{
unimplemented!(
"Conversion for physical type {}, converted type {}, value {:?}",
$column_descr.physical_type(),
$column_descr.converted_type(),
$value
);
}};
}
/// `Row` represents a nested Parquet record.
#[derive(Clone, Debug, PartialEq)]
pub struct Row {
fields: Vec<(String, Field)>,
}
#[allow(clippy::len_without_is_empty)]
impl Row {
/// Get the number of fields in this row.
pub fn len(&self) -> usize {
self.fields.len()
}
/// Get an iterator to go through all columns in the row.
///
/// # Example
///
/// ```no_run
/// use std::fs::File;
/// use parquet::record::Row;
/// use parquet::file::reader::{FileReader, SerializedFileReader};
///
/// let file = File::open("/path/to/file").unwrap();
/// let reader = SerializedFileReader::new(file).unwrap();
/// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap();
/// for (idx, (name, field)) in row.get_column_iter().enumerate() {
/// println!("column index: {}, column name: {}, column value: {}", idx, name, field);
/// }
/// ```
pub fn get_column_iter(&self) -> RowColumnIter {
RowColumnIter {
fields: &self.fields,
curr: 0,
count: self.fields.len(),
}
}
#[cfg(feature = "cli")]
pub fn to_json_value(&self) -> Value {
Value::Object(
self.fields
.iter()
.map(|(key, field)| (key.to_owned(), field.to_json_value()))
.collect(),
)
}
}
pub struct RowColumnIter<'a> {
fields: &'a Vec<(String, Field)>,
curr: usize,
count: usize,
}
impl<'a> Iterator for RowColumnIter<'a> {
type Item = (&'a String, &'a Field);
fn next(&mut self) -> Option<Self::Item> {
let idx = self.curr;
if idx >= self.count {
return None;
}
self.curr += 1;
Some((&self.fields[idx].0, &self.fields[idx].1))
}
}
/// Trait for type-safe convenient access to fields within a Row.
pub trait RowAccessor {
fn get_bool(&self, i: usize) -> Result<bool>;
fn get_byte(&self, i: usize) -> Result<i8>;
fn get_short(&self, i: usize) -> Result<i16>;
fn get_int(&self, i: usize) -> Result<i32>;
fn get_long(&self, i: usize) -> Result<i64>;
fn get_ubyte(&self, i: usize) -> Result<u8>;
fn get_ushort(&self, i: usize) -> Result<u16>;
fn get_uint(&self, i: usize) -> Result<u32>;
fn get_ulong(&self, i: usize) -> Result<u64>;
fn get_float(&self, i: usize) -> Result<f32>;
fn get_double(&self, i: usize) -> Result<f64>;
fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
fn get_decimal(&self, i: usize) -> Result<&Decimal>;
fn get_string(&self, i: usize) -> Result<&String>;
fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
fn get_group(&self, i: usize) -> Result<&Row>;
fn get_list(&self, i: usize) -> Result<&List>;
fn get_map(&self, i: usize) -> Result<&Map>;
}
/// Trait for formating fields within a Row.
pub trait RowFormatter {
fn fmt(&self, i: usize) -> &dyn fmt::Display;
}
/// Macro to generate type-safe get_xxx methods for primitive types,
/// e.g. `get_bool`, `get_short`.
macro_rules! row_primitive_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<$TY> {
match self.fields[i].1 {
Field::$VARIANT(v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.fields[i].1.get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
/// Macro to generate type-safe get_xxx methods for reference types,
/// e.g. `get_list`, `get_map`.
macro_rules! row_complex_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<&$TY> {
match self.fields[i].1 {
Field::$VARIANT(ref v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.fields[i].1.get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
impl RowFormatter for Row {
/// Get Display reference for a given field.
fn fmt(&self, i: usize) -> &dyn fmt::Display {
&self.fields[i].1
}
}
impl RowAccessor for Row {
row_primitive_accessor!(get_bool, Bool, bool);
row_primitive_accessor!(get_byte, Byte, i8);
row_primitive_accessor!(get_short, Short, i16);
row_primitive_accessor!(get_int, Int, i32);
row_primitive_accessor!(get_long, Long, i64);
row_primitive_accessor!(get_ubyte, UByte, u8);
row_primitive_accessor!(get_ushort, UShort, u16);
row_primitive_accessor!(get_uint, UInt, u32);
row_primitive_accessor!(get_ulong, ULong, u64);
row_primitive_accessor!(get_float, Float, f32);
row_primitive_accessor!(get_double, Double, f64);
row_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
row_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
row_complex_accessor!(get_decimal, Decimal, Decimal);
row_complex_accessor!(get_string, Str, String);
row_complex_accessor!(get_bytes, Bytes, ByteArray);
row_complex_accessor!(get_group, Group, Row);
row_complex_accessor!(get_list, ListInternal, List);
row_complex_accessor!(get_map, MapInternal, Map);
}
/// Constructs a `Row` from the list of `fields` and returns it.
#[inline]
pub fn make_row(fields: Vec<(String, Field)>) -> Row {
Row { fields }
}
impl fmt::Display for Row {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
for (i, &(ref key, ref value)) in self.fields.iter().enumerate() {
key.fmt(f)?;
write!(f, ": ")?;
value.fmt(f)?;
if i < self.fields.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "}}")
}
}
/// `List` represents a list which contains an array of elements.
#[derive(Clone, Debug, PartialEq)]
pub struct List {
elements: Vec<Field>,
}
#[allow(clippy::len_without_is_empty)]
impl List {
/// Get the number of fields in this row
pub fn len(&self) -> usize {
self.elements.len()
}
pub fn elements(&self) -> &[Field] {
self.elements.as_slice()
}
}
/// Constructs a `List` from the list of `fields` and returns it.
#[inline]
pub fn make_list(elements: Vec<Field>) -> List {
List { elements }
}
/// Trait for type-safe access of an index for a `List`.
/// Note that the get_XXX methods do not do bound checking.
pub trait ListAccessor {
fn get_bool(&self, i: usize) -> Result<bool>;
fn get_byte(&self, i: usize) -> Result<i8>;
fn get_short(&self, i: usize) -> Result<i16>;
fn get_int(&self, i: usize) -> Result<i32>;
fn get_long(&self, i: usize) -> Result<i64>;
fn get_ubyte(&self, i: usize) -> Result<u8>;
fn get_ushort(&self, i: usize) -> Result<u16>;
fn get_uint(&self, i: usize) -> Result<u32>;
fn get_ulong(&self, i: usize) -> Result<u64>;
fn get_float(&self, i: usize) -> Result<f32>;
fn get_double(&self, i: usize) -> Result<f64>;
fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
fn get_decimal(&self, i: usize) -> Result<&Decimal>;
fn get_string(&self, i: usize) -> Result<&String>;
fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
fn get_group(&self, i: usize) -> Result<&Row>;
fn get_list(&self, i: usize) -> Result<&List>;
fn get_map(&self, i: usize) -> Result<&Map>;
}
/// Macro to generate type-safe get_xxx methods for primitive types,
/// e.g. get_bool, get_short
macro_rules! list_primitive_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<$TY> {
match self.elements[i] {
Field::$VARIANT(v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.elements[i].get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
/// Macro to generate type-safe get_xxx methods for reference types
/// e.g. get_list, get_map
macro_rules! list_complex_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<&$TY> {
match self.elements[i] {
Field::$VARIANT(ref v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.elements[i].get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
impl ListAccessor for List {
list_primitive_accessor!(get_bool, Bool, bool);
list_primitive_accessor!(get_byte, Byte, i8);
list_primitive_accessor!(get_short, Short, i16);
list_primitive_accessor!(get_int, Int, i32);
list_primitive_accessor!(get_long, Long, i64);
list_primitive_accessor!(get_ubyte, UByte, u8);
list_primitive_accessor!(get_ushort, UShort, u16);
list_primitive_accessor!(get_uint, UInt, u32);
list_primitive_accessor!(get_ulong, ULong, u64);
list_primitive_accessor!(get_float, Float, f32);
list_primitive_accessor!(get_double, Double, f64);
list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
list_complex_accessor!(get_decimal, Decimal, Decimal);
list_complex_accessor!(get_string, Str, String);
list_complex_accessor!(get_bytes, Bytes, ByteArray);
list_complex_accessor!(get_group, Group, Row);
list_complex_accessor!(get_list, ListInternal, List);
list_complex_accessor!(get_map, MapInternal, Map);
}
/// `Map` represents a map which contains a list of key->value pairs.
#[derive(Clone, Debug, PartialEq)]
pub struct Map {
entries: Vec<(Field, Field)>,
}
#[allow(clippy::len_without_is_empty)]
impl Map {
/// Get the number of fields in this row
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn entries(&self) -> &[(Field, Field)] {
self.entries.as_slice()
}
}
/// Constructs a `Map` from the list of `entries` and returns it.
#[inline]
pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
Map { entries }
}
/// Trait for type-safe access of an index for a `Map`
pub trait MapAccessor {
fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
}
struct MapList<'a> {
elements: Vec<&'a Field>,
}
/// Macro to generate type-safe get_xxx methods for primitive types,
/// e.g. get_bool, get_short
macro_rules! map_list_primitive_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<$TY> {
match self.elements[i] {
Field::$VARIANT(v) => Ok(*v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.elements[i].get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
impl<'a> ListAccessor for MapList<'a> {
map_list_primitive_accessor!(get_bool, Bool, bool);
map_list_primitive_accessor!(get_byte, Byte, i8);
map_list_primitive_accessor!(get_short, Short, i16);
map_list_primitive_accessor!(get_int, Int, i32);
map_list_primitive_accessor!(get_long, Long, i64);
map_list_primitive_accessor!(get_ubyte, UByte, u8);
map_list_primitive_accessor!(get_ushort, UShort, u16);
map_list_primitive_accessor!(get_uint, UInt, u32);
map_list_primitive_accessor!(get_ulong, ULong, u64);
map_list_primitive_accessor!(get_float, Float, f32);
map_list_primitive_accessor!(get_double, Double, f64);
map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
list_complex_accessor!(get_decimal, Decimal, Decimal);
list_complex_accessor!(get_string, Str, String);
list_complex_accessor!(get_bytes, Bytes, ByteArray);
list_complex_accessor!(get_group, Group, Row);
list_complex_accessor!(get_list, ListInternal, List);
list_complex_accessor!(get_map, MapInternal, Map);
}
impl MapAccessor for Map {
fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
let map_list = MapList {
elements: self.entries.iter().map(|v| &v.0).collect(),
};
Box::new(map_list)
}
fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
let map_list = MapList {
elements: self.entries.iter().map(|v| &v.1).collect(),
};
Box::new(map_list)
}
}
/// API to represent a single field in a `Row`.
#[derive(Clone, Debug, PartialEq)]
pub enum Field {
// Primitive types
/// Null value.
Null,
/// Boolean value (`true`, `false`).
Bool(bool),
/// Signed integer INT_8.
Byte(i8),
/// Signed integer INT_16.
Short(i16),
/// Signed integer INT_32.
Int(i32),
/// Signed integer INT_64.
Long(i64),
// Unsigned integer UINT_8.
UByte(u8),
// Unsigned integer UINT_16.
UShort(u16),
// Unsigned integer UINT_32.
UInt(u32),
// Unsigned integer UINT_64.
ULong(u64),
/// IEEE 32-bit floating point value.
Float(f32),
/// IEEE 64-bit floating point value.
Double(f64),
/// Decimal value.
Decimal(Decimal),
/// UTF-8 encoded character string.
Str(String),
/// General binary value.
Bytes(ByteArray),
/// Date without a time of day, stores the number of days from the
/// Unix epoch, 1 January 1970.
Date(u32),
/// Milliseconds from the Unix epoch, 1 January 1970.
TimestampMillis(u64),
/// Microseconds from the Unix epoch, 1 Janiary 1970.
TimestampMicros(u64),
// ----------------------------------------------------------------------
// Complex types
/// Struct, child elements are tuples of field-value pairs.
Group(Row),
/// List of elements.
ListInternal(List),
/// List of key-value pairs.
MapInternal(Map),
}
impl Field {
/// Get the type name.
fn get_type_name(&self) -> &'static str {
match *self {
Field::Null => "Null",
Field::Bool(_) => "Bool",
Field::Byte(_) => "Byte",
Field::Short(_) => "Short",
Field::Int(_) => "Int",
Field::Long(_) => "Long",
Field::UByte(_) => "UByte",
Field::UShort(_) => "UShort",
Field::UInt(_) => "UInt",
Field::ULong(_) => "ULong",
Field::Float(_) => "Float",
Field::Double(_) => "Double",
Field::Decimal(_) => "Decimal",
Field::Date(_) => "Date",
Field::Str(_) => "Str",
Field::Bytes(_) => "Bytes",
Field::TimestampMillis(_) => "TimestampMillis",
Field::TimestampMicros(_) => "TimestampMicros",
Field::Group(_) => "Group",
Field::ListInternal(_) => "ListInternal",
Field::MapInternal(_) => "MapInternal",
}
}
/// Determines if this Row represents a primitive value.
pub fn is_primitive(&self) -> bool {
!matches!(
*self,
Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
)
}
/// Converts Parquet BOOLEAN type with logical type into `bool` value.
#[inline]
pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
Field::Bool(value)
}
/// Converts Parquet INT32 type with converted type into `i32` value.
#[inline]
pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
match descr.converted_type() {
ConvertedType::INT_8 => Field::Byte(value as i8),
ConvertedType::INT_16 => Field::Short(value as i16),
ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
ConvertedType::UINT_8 => Field::UByte(value as u8),
ConvertedType::UINT_16 => Field::UShort(value as u16),
ConvertedType::UINT_32 => Field::UInt(value as u32),
ConvertedType::DATE => Field::Date(value as u32),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
value,
descr.type_precision(),
descr.type_scale(),
)),
_ => nyi!(descr, value),
}
}
/// Converts Parquet INT64 type with converted type into `i64` value.
#[inline]
pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
match descr.converted_type() {
ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
ConvertedType::UINT_64 => Field::ULong(value as u64),
ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value as u64),
ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value as u64),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
value,
descr.type_precision(),
descr.type_scale(),
)),
_ => nyi!(descr, value),
}
}
/// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
/// `Timestamp` value.
#[inline]
pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
Field::TimestampMillis(value.to_i64() as u64)
}
/// Converts Parquet FLOAT type with logical type into `f32` value.
#[inline]
pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
Field::Float(value)
}
/// Converts Parquet DOUBLE type with converted type into `f64` value.
#[inline]
pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
Field::Double(value)
}
/// Converts Parquet BYTE_ARRAY type with converted type into either UTF8 string or
/// array of bytes.
#[inline]
pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self {
match descr.physical_type() {
PhysicalType::BYTE_ARRAY => match descr.converted_type() {
ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
let value = String::from_utf8(value.data().to_vec()).unwrap();
Field::Str(value)
}
ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
value,
descr.type_precision(),
descr.type_scale(),
)),
_ => nyi!(descr, value),
},
PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
value,
descr.type_precision(),
descr.type_scale(),
)),
ConvertedType::NONE => Field::Bytes(value),
_ => nyi!(descr, value),
},
_ => nyi!(descr, value),
}
}
#[cfg(feature = "cli")]
pub fn to_json_value(&self) -> Value {
match &self {
Field::Null => Value::Null,
Field::Bool(b) => Value::Bool(*b),
Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
.map(Value::Number)
.unwrap_or(Value::Null),
Field::Double(n) => serde_json::Number::from_f64(*n)
.map(Value::Number)
.unwrap_or(Value::Null),
Field::Decimal(n) => Value::String(convert_decimal_to_string(&n)),
Field::Str(s) => Value::String(s.to_owned()),
Field::Bytes(b) => Value::String(base64::encode(b.data())),
Field::Date(d) => Value::String(convert_date_to_string(*d)),
Field::TimestampMillis(ts) => {
Value::String(convert_timestamp_millis_to_string(*ts))
}
Field::TimestampMicros(ts) => {
Value::String(convert_timestamp_micros_to_string(*ts))
}
Field::Group(row) => row.to_json_value(),
Field::ListInternal(fields) => {
Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
}
Field::MapInternal(map) => Value::Object(
map.entries
.iter()
.map(|(key_field, value_field)| {
let key_val = key_field.to_json_value();
let key_str = key_val
.as_str()
.map(|s| s.to_owned())
.unwrap_or_else(|| key_val.to_string());
(key_str, value_field.to_json_value())
})
.collect(),
),
}
}
}
impl fmt::Display for Field {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Field::Null => write!(f, "null"),
Field::Bool(value) => write!(f, "{}", value),
Field::Byte(value) => write!(f, "{}", value),
Field::Short(value) => write!(f, "{}", value),
Field::Int(value) => write!(f, "{}", value),
Field::Long(value) => write!(f, "{}", value),
Field::UByte(value) => write!(f, "{}", value),
Field::UShort(value) => write!(f, "{}", value),
Field::UInt(value) => write!(f, "{}", value),
Field::ULong(value) => write!(f, "{}", value),
Field::Float(value) => {
if !(1e-15..=1e19).contains(&value) {
write!(f, "{:E}", value)
} else {
write!(f, "{:?}", value)
}
}
Field::Double(value) => {
if !(1e-15..=1e19).contains(&value) {
write!(f, "{:E}", value)
} else {
write!(f, "{:?}", value)
}
}
Field::Decimal(ref value) => {
write!(f, "{}", convert_decimal_to_string(value))
}
Field::Str(ref value) => write!(f, "\"{}\"", value),
Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
Field::TimestampMillis(value) => {
write!(f, "{}", convert_timestamp_millis_to_string(value))
}
Field::TimestampMicros(value) => {
write!(f, "{}", convert_timestamp_micros_to_string(value))
}
Field::Group(ref fields) => write!(f, "{}", fields),
Field::ListInternal(ref list) => {
let elems = &list.elements;
write!(f, "[")?;
for (i, field) in elems.iter().enumerate() {
field.fmt(f)?;
if i < elems.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "]")
}
Field::MapInternal(ref map) => {
let entries = &map.entries;
write!(f, "{{")?;
for (i, &(ref key, ref value)) in entries.iter().enumerate() {
key.fmt(f)?;
write!(f, " -> ")?;
value.fmt(f)?;
if i < entries.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "}}")
}
}
}
}
/// Helper method to convert Parquet date into a string.
/// Input `value` is a number of days since the epoch in UTC.
/// Date is displayed in local timezone.
#[inline]
fn convert_date_to_string(value: u32) -> String {
static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
let dt = Utc.timestamp(value as i64 * NUM_SECONDS_IN_DAY, 0).date();
format!("{}", dt.format("%Y-%m-%d %:z"))
}
/// Helper method to convert Parquet timestamp into a string.
/// Input `value` is a number of milliseconds since the epoch in UTC.
/// Datetime is displayed in local timezone.
#[inline]
fn convert_timestamp_millis_to_string(value: u64) -> String {
let dt = Utc.timestamp((value / 1000) as i64, 0);
format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
}
/// Helper method to convert Parquet timestamp into a string.
/// Input `value` is a number of microseconds since the epoch in UTC.
/// Datetime is displayed in local timezone.
#[inline]
fn convert_timestamp_micros_to_string(value: u64) -> String {
convert_timestamp_millis_to_string(value / 1000)
}
/// Helper method to convert Parquet decimal into a string.
/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
/// when constructing Parquet schema.
#[inline]
fn convert_decimal_to_string(decimal: &Decimal) -> String {
assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
// Specify as signed bytes to resolve sign as part of conversion.
let num = BigInt::from_signed_bytes_be(decimal.data());
// Offset of the first digit in a string.
let negative = if num.sign() == Sign::Minus { 1 } else { 0 };
let mut num_str = num.to_string();
let mut point = num_str.len() as i32 - decimal.scale() - negative;
// Convert to string form without scientific notation.
if point <= 0 {
// Zeros need to be prepended to the unscaled value.
while point < 0 {
num_str.insert(negative as usize, '0');
point += 1;
}
num_str.insert_str(negative as usize, "0.");
} else {
// No zeroes need to be prepended to the unscaled value, simply insert decimal
// point.
num_str.insert((point + negative) as usize, '.');
}
num_str
}
#[cfg(test)]
#[allow(clippy::approx_constant, clippy::many_single_char_names)]
mod tests {
use super::*;
use std::sync::Arc;
use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
/// Creates test column descriptor based on provided type parameters.
macro_rules! make_column_descr {
($physical_type:expr, $logical_type:expr) => {{
let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
.with_converted_type($logical_type)
.build()
.unwrap();
Arc::new(ColumnDescriptor::new(
Arc::new(tpe),
0,
0,
ColumnPath::from("col"),
))
}};
($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
.with_converted_type($logical_type)
.with_length($len)
.with_precision($prec)
.with_scale($scale)
.build()
.unwrap();
Arc::new(ColumnDescriptor::new(
Arc::new(tpe),
0,
0,
ColumnPath::from("col"),
))
}};
}
#[test]
fn test_row_convert_bool() {
// BOOLEAN value does not depend on logical type
let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
let row = Field::convert_bool(&descr, true);
assert_eq!(row, Field::Bool(true));
let row = Field::convert_bool(&descr, false);
assert_eq!(row, Field::Bool(false));
}
#[test]
fn test_row_convert_int32() {
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
let row = Field::convert_int32(&descr, 111);
assert_eq!(row, Field::Byte(111));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
let row = Field::convert_int32(&descr, 222);
assert_eq!(row, Field::Short(222));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
let row = Field::convert_int32(&descr, 333);
assert_eq!(row, Field::Int(333));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
let row = Field::convert_int32(&descr, -1);
assert_eq!(row, Field::UByte(255));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
let row = Field::convert_int32(&descr, 256);
assert_eq!(row, Field::UShort(256));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
let row = Field::convert_int32(&descr, 1234);
assert_eq!(row, Field::UInt(1234));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
let row = Field::convert_int32(&descr, 444);
assert_eq!(row, Field::Int(444));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
let row = Field::convert_int32(&descr, 14611);
assert_eq!(row, Field::Date(14611));
let descr =
make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
let row = Field::convert_int32(&descr, 444);
assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
}
#[test]
fn test_row_convert_int64() {
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
let row = Field::convert_int64(&descr, 1111);
assert_eq!(row, Field::Long(1111));
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
let row = Field::convert_int64(&descr, 78239823);
assert_eq!(row, Field::ULong(78239823));
let descr =
make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
let row = Field::convert_int64(&descr, 1541186529153);
assert_eq!(row, Field::TimestampMillis(1541186529153));
let descr =
make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
let row = Field::convert_int64(&descr, 1541186529153123);
assert_eq!(row, Field::TimestampMicros(1541186529153123));
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
let row = Field::convert_int64(&descr, 2222);
assert_eq!(row, Field::Long(2222));
let descr =
make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
let row = Field::convert_int64(&descr, 3333);
assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
}
#[test]
fn test_row_convert_int96() {
// INT96 value does not depend on logical type
let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
let value = Int96::from(vec![0, 0, 2454923]);
let row = Field::convert_int96(&descr, value);
assert_eq!(row, Field::TimestampMillis(1238544000000));
let value = Int96::from(vec![4165425152, 13, 2454923]);
let row = Field::convert_int96(&descr, value);
assert_eq!(row, Field::TimestampMillis(1238544060000));
}
#[test]
fn test_row_convert_float() {
// FLOAT value does not depend on logical type
let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
let row = Field::convert_float(&descr, 2.31);
assert_eq!(row, Field::Float(2.31));
}
#[test]
fn test_row_convert_double() {
// DOUBLE value does not depend on logical type
let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
let row = Field::convert_double(&descr, 1.56);
assert_eq!(row, Field::Double(1.56));
}
#[test]
fn test_row_convert_byte_array() {
// UTF8
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
let row = Field::convert_byte_array(&descr, value);
assert_eq!(row, Field::Str("ABCD".to_string()));
// ENUM
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
let value = ByteArray::from(vec![b'1', b'2', b'3']);
let row = Field::convert_byte_array(&descr, value);
assert_eq!(row, Field::Str("123".to_string()));
// JSON
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
let row = Field::convert_byte_array(&descr, value);
assert_eq!(row, Field::Str("{\"a\":1}".to_string()));
// NONE
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row, Field::Bytes(value));
// BSON
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row, Field::Bytes(value));
// DECIMAL
let descr =
make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
let value = ByteArray::from(vec![207, 200]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 8, 2)));
// DECIMAL (FIXED_LEN_BYTE_ARRAY)
let descr = make_column_descr![
PhysicalType::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL,
8,
17,
5
];
let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 17, 5)));
// NONE (FIXED_LEN_BYTE_ARRAY)
let descr = make_column_descr![
PhysicalType::FIXED_LEN_BYTE_ARRAY,
ConvertedType::NONE,
6,
0,
0
];
let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row, Field::Bytes(value));
}
#[test]
fn test_convert_date_to_string() {
fn check_date_conversion(y: u32, m: u32, d: u32) {
let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(0, 0, 0);
let dt = Utc.from_utc_datetime(&datetime);
let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as u32);
let exp = format!("{}", dt.format("%Y-%m-%d %:z"));
assert_eq!(res, exp);
}
check_date_conversion(2010, 1, 2);
check_date_conversion(2014, 5, 1);
check_date_conversion(2016, 2, 29);
check_date_conversion(2017, 9, 12);
check_date_conversion(2018, 3, 31);
}
#[test]
fn test_convert_timestamp_to_string() {
fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(h, mi, s);
let dt = Utc.from_utc_datetime(&datetime);
let res = convert_timestamp_millis_to_string(dt.timestamp_millis() as u64);
let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
assert_eq!(res, exp);
}
check_datetime_conversion(2010, 1, 2, 13, 12, 54);
check_datetime_conversion(2011, 1, 3, 8, 23, 1);
check_datetime_conversion(2012, 4, 5, 11, 6, 32);
check_datetime_conversion(2013, 5, 12, 16, 38, 0);
check_datetime_conversion(2014, 11, 28, 21, 15, 12);
}
#[test]
fn test_convert_float_to_string() {
assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
}
#[test]
fn test_convert_double_to_string() {
assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
assert_eq!(
format!("{}", Field::Double(1.79769313486E308)),
"1.79769313486E308"
);
assert_eq!(
format!("{}", Field::Double(-1.79769313486E308)),
"-1.79769313486E308"
);
}
#[test]
fn test_convert_decimal_to_string() {
// Helper method to compare decimal
fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
assert_eq!(convert_decimal_to_string(&decimal), res);
}
// This example previously used to fail in some engines
check_decimal(
vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
38,
18,
"1.000000000000000000",
);
check_decimal(
vec![
249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
],
36,
28,
"-12344.0242342304923409234234293432",
);
check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
check_decimal(vec![207, 200], 10, 2, "-123.44");
check_decimal(vec![207, 200], 10, 8, "-0.00012344");
}
#[test]
fn test_row_display() {
// Primitive types
assert_eq!(format!("{}", Field::Null), "null");
assert_eq!(format!("{}", Field::Bool(true)), "true");
assert_eq!(format!("{}", Field::Bool(false)), "false");
assert_eq!(format!("{}", Field::Byte(1)), "1");
assert_eq!(format!("{}", Field::Short(2)), "2");
assert_eq!(format!("{}", Field::Int(3)), "3");
assert_eq!(format!("{}", Field::Long(4)), "4");
assert_eq!(format!("{}", Field::UByte(1)), "1");
assert_eq!(format!("{}", Field::UShort(2)), "2");
assert_eq!(format!("{}", Field::UInt(3)), "3");
assert_eq!(format!("{}", Field::ULong(4)), "4");
assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
assert_eq!(
format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
"[1, 2, 3]"
);
assert_eq!(
format!("{}", Field::Date(14611)),
convert_date_to_string(14611)
);
assert_eq!(
format!("{}", Field::TimestampMillis(1262391174000)),
convert_timestamp_millis_to_string(1262391174000)
);
assert_eq!(
format!("{}", Field::TimestampMicros(1262391174000000)),
convert_timestamp_micros_to_string(1262391174000000)
);
assert_eq!(
format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
);
// Complex types
let fields = vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
("z".to_string(), Field::Float(3.1)),
("a".to_string(), Field::Str("abc".to_string())),
];
let row = Field::Group(make_row(fields));
assert_eq!(format!("{}", row), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
let row = Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
]));
assert_eq!(format!("{}", row), "[2, 1, null, 12]");
let row = Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
]));
assert_eq!(format!("{}", row), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
}
#[test]
fn test_is_primitive() {
// primitives
assert!(Field::Null.is_primitive());
assert!(Field::Bool(true).is_primitive());
assert!(Field::Bool(false).is_primitive());
assert!(Field::Byte(1).is_primitive());
assert!(Field::Short(2).is_primitive());
assert!(Field::Int(3).is_primitive());
assert!(Field::Long(4).is_primitive());
assert!(Field::UByte(1).is_primitive());
assert!(Field::UShort(2).is_primitive());
assert!(Field::UInt(3).is_primitive());
assert!(Field::ULong(4).is_primitive());
assert!(Field::Float(5.0).is_primitive());
assert!(Field::Float(5.1234).is_primitive());
assert!(Field::Double(6.0).is_primitive());
assert!(Field::Double(6.1234).is_primitive());
assert!(Field::Str("abc".to_string()).is_primitive());
assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
assert!(Field::TimestampMillis(12345678).is_primitive());
assert!(Field::TimestampMicros(12345678901).is_primitive());
assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
// complex types
assert_eq!(
false,
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
("z".to_string(), Field::Float(3.1)),
("a".to_string(), Field::Str("abc".to_string()))
]))
.is_primitive()
);
assert_eq!(
false,
Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12)
]))
.is_primitive()
);
assert_eq!(
false,
Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3))
]))
.is_primitive()
);
}
#[test]
fn test_row_primitive_field_fmt() {
// Primitives types
let row = make_row(vec![
("00".to_string(), Field::Null),
("01".to_string(), Field::Bool(false)),
("02".to_string(), Field::Byte(3)),
("03".to_string(), Field::Short(4)),
("04".to_string(), Field::Int(5)),
("05".to_string(), Field::Long(6)),
("06".to_string(), Field::UByte(7)),
("07".to_string(), Field::UShort(8)),
("08".to_string(), Field::UInt(9)),
("09".to_string(), Field::ULong(10)),
("10".to_string(), Field::Float(11.1)),
("11".to_string(), Field::Double(12.1)),
("12".to_string(), Field::Str("abc".to_string())),
(
"13".to_string(),
Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
),
("14".to_string(), Field::Date(14611)),
("15".to_string(), Field::TimestampMillis(1262391174000)),
("16".to_string(), Field::TimestampMicros(1262391174000000)),
("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
]);
assert_eq!("null", format!("{}", row.fmt(0)));
assert_eq!("false", format!("{}", row.fmt(1)));
assert_eq!("3", format!("{}", row.fmt(2)));
assert_eq!("4", format!("{}", row.fmt(3)));
assert_eq!("5", format!("{}", row.fmt(4)));
assert_eq!("6", format!("{}", row.fmt(5)));
assert_eq!("7", format!("{}", row.fmt(6)));
assert_eq!("8", format!("{}", row.fmt(7)));
assert_eq!("9", format!("{}", row.fmt(8)));
assert_eq!("10", format!("{}", row.fmt(9)));
assert_eq!("11.1", format!("{}", row.fmt(10)));
assert_eq!("12.1", format!("{}", row.fmt(11)));
assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
assert_eq!(
convert_timestamp_millis_to_string(1262391174000),
format!("{}", row.fmt(15))
);
assert_eq!(
convert_timestamp_micros_to_string(1262391174000000),
format!("{}", row.fmt(16))
);
assert_eq!("0.04", format!("{}", row.fmt(17)));
}
#[test]
fn test_row_complex_field_fmt() {
// Complex types
let row = make_row(vec![
(
"00".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
),
(
"01".to_string(),
Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
])),
),
(
"02".to_string(),
Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
])),
),
]);
assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
}
#[test]
fn test_row_primitive_accessors() {
// primitives
let row = make_row(vec![
("a".to_string(), Field::Null),
("b".to_string(), Field::Bool(false)),
("c".to_string(), Field::Byte(3)),
("d".to_string(), Field::Short(4)),
("e".to_string(), Field::Int(5)),
("f".to_string(), Field::Long(6)),
("g".to_string(), Field::UByte(3)),
("h".to_string(), Field::UShort(4)),
("i".to_string(), Field::UInt(5)),
("j".to_string(), Field::ULong(6)),
("k".to_string(), Field::Float(7.1)),
("l".to_string(), Field::Double(8.1)),
("m".to_string(), Field::Str("abc".to_string())),
(
"n".to_string(),
Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
),
("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
]);
assert_eq!(false, row.get_bool(1).unwrap());
assert_eq!(3, row.get_byte(2).unwrap());
assert_eq!(4, row.get_short(3).unwrap());
assert_eq!(5, row.get_int(4).unwrap());
assert_eq!(6, row.get_long(5).unwrap());
assert_eq!(3, row.get_ubyte(6).unwrap());
assert_eq!(4, row.get_ushort(7).unwrap());
assert_eq!(5, row.get_uint(8).unwrap());
assert_eq!(6, row.get_ulong(9).unwrap());
assert!(7.1 - row.get_float(10).unwrap() < f32::EPSILON);
assert!(8.1 - row.get_double(11).unwrap() < f64::EPSILON);
assert_eq!("abc", row.get_string(12).unwrap());
assert_eq!(5, row.get_bytes(13).unwrap().len());
assert_eq!(7, row.get_decimal(14).unwrap().precision());
}
#[test]
fn test_row_primitive_invalid_accessors() {
// primitives
let row = make_row(vec![
("a".to_string(), Field::Null),
("b".to_string(), Field::Bool(false)),
("c".to_string(), Field::Byte(3)),
("d".to_string(), Field::Short(4)),
("e".to_string(), Field::Int(5)),
("f".to_string(), Field::Long(6)),
("g".to_string(), Field::UByte(3)),
("h".to_string(), Field::UShort(4)),
("i".to_string(), Field::UInt(5)),
("j".to_string(), Field::ULong(6)),
("k".to_string(), Field::Float(7.1)),
("l".to_string(), Field::Double(8.1)),
("m".to_string(), Field::Str("abc".to_string())),
(
"n".to_string(),
Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
),
("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
]);
for i in 0..row.len() {
assert!(row.get_group(i).is_err());
}
}
#[test]
fn test_row_complex_accessors() {
let row = make_row(vec![
(
"a".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
),
(
"b".to_string(),
Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
])),
),
(
"c".to_string(),
Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
])),
),
]);
assert_eq!(2, row.get_group(0).unwrap().len());
assert_eq!(4, row.get_list(1).unwrap().len());
assert_eq!(3, row.get_map(2).unwrap().len());
}
#[test]
fn test_row_complex_invalid_accessors() {
let row = make_row(vec![
(
"a".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
),
(
"b".to_string(),
Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
])),
),
(
"c".to_string(),
Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
])),
),
]);
assert_eq!(
ParquetError::General("Cannot access Group as Float".to_string()),
row.get_float(0).unwrap_err()
);
assert_eq!(
ParquetError::General("Cannot access ListInternal as Float".to_string()),
row.get_float(1).unwrap_err()
);
assert_eq!(
ParquetError::General("Cannot access MapInternal as Float".to_string()),
row.get_float(2).unwrap_err()
);
}
#[test]
fn test_list_primitive_accessors() {
// primitives
let list = make_list(vec![Field::Bool(false)]);
assert_eq!(false, list.get_bool(0).unwrap());
let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
assert_eq!(4, list.get_byte(1).unwrap());
let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
assert_eq!(6, list.get_short(2).unwrap());
let list = make_list(vec![Field::Int(5)]);
assert_eq!(5, list.get_int(0).unwrap());
let list = make_list(vec![Field::Long(6), Field::Long(7)]);
assert_eq!(7, list.get_long(1).unwrap());
let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
assert_eq!(4, list.get_ubyte(1).unwrap());
let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
assert_eq!(6, list.get_ushort(2).unwrap());
let list = make_list(vec![Field::UInt(5)]);
assert_eq!(5, list.get_uint(0).unwrap());
let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
assert_eq!(7, list.get_ulong(1).unwrap());
let list = make_list(vec![
Field::Float(8.1),
Field::Float(9.2),
Field::Float(10.3),
]);
assert!(10.3 - list.get_float(2).unwrap() < f32::EPSILON);
let list = make_list(vec![Field::Double(3.1415)]);
assert!(3.1415 - list.get_double(0).unwrap() < f64::EPSILON);
let list = make_list(vec![Field::Str("abc".to_string())]);
assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
}
#[test]
fn test_list_primitive_invalid_accessors() {
// primitives
let list = make_list(vec![Field::Bool(false)]);
assert!(list.get_byte(0).is_err());
let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
assert!(list.get_short(1).is_err());
let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
assert!(list.get_int(2).is_err());
let list = make_list(vec![Field::Int(5)]);
assert!(list.get_long(0).is_err());
let list = make_list(vec![Field::Long(6), Field::Long(7)]);
assert!(list.get_float(1).is_err());
let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
assert!(list.get_short(1).is_err());
let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
assert!(list.get_int(2).is_err());
let list = make_list(vec![Field::UInt(5)]);
assert!(list.get_long(0).is_err());
let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
assert!(list.get_float(1).is_err());
let list = make_list(vec![
Field::Float(8.1),
Field::Float(9.2),
Field::Float(10.3),
]);
assert!(list.get_double(2).is_err());
let list = make_list(vec![Field::Double(3.1415)]);
assert!(list.get_string(0).is_err());
let list = make_list(vec![Field::Str("abc".to_string())]);
assert!(list.get_bytes(0).is_err());
let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
assert!(list.get_bool(0).is_err());
let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
assert!(list.get_bool(0).is_err());
}
#[test]
fn test_list_complex_accessors() {
let list = make_list(vec![Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
]))]);
assert_eq!(2, list.get_group(0).unwrap().len());
let list = make_list(vec![Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
]))]);
assert_eq!(4, list.get_list(0).unwrap().len());
let list = make_list(vec![Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
]))]);
assert_eq!(3, list.get_map(0).unwrap().len());
}
#[test]
fn test_list_complex_invalid_accessors() {
let list = make_list(vec![Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
]))]);
assert_eq!(
general_err!("Cannot access Group as Float".to_string()),
list.get_float(0).unwrap_err()
);
let list = make_list(vec![Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
]))]);
assert_eq!(
general_err!("Cannot access ListInternal as Float".to_string()),
list.get_float(0).unwrap_err()
);
let list = make_list(vec![Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
]))]);
assert_eq!(
general_err!("Cannot access MapInternal as Float".to_string()),
list.get_float(0).unwrap_err()
);
}
#[test]
fn test_map_accessors() {
// a map from int to string
let map = make_map(vec![
(Field::Int(1), Field::Str("a".to_string())),
(Field::Int(2), Field::Str("b".to_string())),
(Field::Int(3), Field::Str("c".to_string())),
(Field::Int(4), Field::Str("d".to_string())),
(Field::Int(5), Field::Str("e".to_string())),
]);
assert_eq!(5, map.len());
for i in 0..5 {
assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
assert_eq!(
&((i as u8 + b'a') as char).to_string(),
map.get_values().get_string(i).unwrap()
);
}
}
#[test]
#[cfg(feature = "cli")]
fn test_to_json_value() {
assert_eq!(Field::Null.to_json_value(), Value::Null);
assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
assert_eq!(
Field::Byte(1).to_json_value(),
Value::Number(serde_json::Number::from(1))
);
assert_eq!(
Field::Short(2).to_json_value(),
Value::Number(serde_json::Number::from(2))
);
assert_eq!(
Field::Int(3).to_json_value(),
Value::Number(serde_json::Number::from(3))
);
assert_eq!(
Field::Long(4).to_json_value(),
Value::Number(serde_json::Number::from(4))
);
assert_eq!(
Field::UByte(1).to_json_value(),
Value::Number(serde_json::Number::from(1))
);
assert_eq!(
Field::UShort(2).to_json_value(),
Value::Number(serde_json::Number::from(2))
);
assert_eq!(
Field::UInt(3).to_json_value(),
Value::Number(serde_json::Number::from(3))
);
assert_eq!(
Field::ULong(4).to_json_value(),
Value::Number(serde_json::Number::from(4))
);
assert_eq!(
Field::Float(5.0).to_json_value(),
Value::Number(serde_json::Number::from_f64(f64::from(5.0 as f32)).unwrap())
);
assert_eq!(
Field::Float(5.1234).to_json_value(),
Value::Number(
serde_json::Number::from_f64(f64::from(5.1234 as f32)).unwrap()
)
);
assert_eq!(
Field::Double(6.0).to_json_value(),
Value::Number(serde_json::Number::from_f64(6.0 as f64).unwrap())
);
assert_eq!(
Field::Double(6.1234).to_json_value(),
Value::Number(serde_json::Number::from_f64(6.1234 as f64).unwrap())
);
assert_eq!(
Field::Str("abc".to_string()).to_json_value(),
Value::String(String::from("abc"))
);
assert_eq!(
Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
Value::String(String::from("0.04"))
);
assert_eq!(
Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
Value::String(String::from("AQID"))
);
assert_eq!(
Field::TimestampMillis(12345678).to_json_value(),
Value::String("1970-01-01 03:25:45 +00:00".to_string())
);
assert_eq!(
Field::TimestampMicros(12345678901).to_json_value(),
Value::String(convert_timestamp_micros_to_string(12345678901))
);
let fields = vec![
("X".to_string(), Field::Int(1)),
("Y".to_string(), Field::Double(2.2)),
("Z".to_string(), Field::Str("abc".to_string())),
];
let row = Field::Group(make_row(fields));
assert_eq!(
row.to_json_value(),
serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
);
let row = Field::ListInternal(make_list(vec![
Field::Int(1),
Field::Int(12),
Field::Null,
]));
let array = vec![
Value::Number(serde_json::Number::from(1)),
Value::Number(serde_json::Number::from(12)),
Value::Null,
];
assert_eq!(row.to_json_value(), Value::Array(array));
let row = Field::MapInternal(make_map(vec![
(Field::Str("k1".to_string()), Field::Double(1.2)),
(Field::Str("k2".to_string()), Field::Double(3.4)),
(Field::Str("k3".to_string()), Field::Double(4.5)),
]));
assert_eq!(
row.to_json_value(),
serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
);
}
}
#[cfg(test)]
#[allow(clippy::approx_constant, clippy::many_single_char_names)]
mod api_tests {
use super::{make_list, make_map, make_row};
use crate::record::Field;
#[test]
fn test_field_visibility() {
let row = make_row(vec![(
"a".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
)]);
match row.get_column_iter().next() {
Some(column) => {
assert_eq!("a", column.0);
match column.1 {
Field::Group(r) => {
assert_eq!(
&make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
]),
r
);
}
_ => panic!("Expected the first column to be Field::Group"),
}
}
None => panic!("Expected at least one column"),
}
}
#[test]
fn test_list_element_access() {
let expected = vec![
Field::Int(1),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
];
let list = make_list(expected.clone());
assert_eq!(expected.as_slice(), list.elements());
}
#[test]
fn test_map_entry_access() {
let expected = vec![
(Field::Str("one".to_owned()), Field::Int(1)),
(Field::Str("two".to_owned()), Field::Int(2)),
];
let map = make_map(expected.clone());
assert_eq!(expected.as_slice(), map.entries());
}
}