blob: 92b0024bfd2d901959ce2c12385736f359b1e845 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Extension types.
//!
//! <div class="warning">This module is experimental. There might be breaking changes between minor releases.</div>
#[cfg(feature = "canonical_extension_types")]
mod canonical;
#[cfg(feature = "canonical_extension_types")]
pub use canonical::*;
use crate::{ArrowError, DataType};
/// The metadata key for the string name identifying an [`ExtensionType`].
pub const EXTENSION_TYPE_NAME_KEY: &str = "ARROW:extension:name";
/// The metadata key for a serialized representation of the [`ExtensionType`]
/// necessary to reconstruct the custom type.
pub const EXTENSION_TYPE_METADATA_KEY: &str = "ARROW:extension:metadata";
/// Extension types.
///
/// User-defined “extension” types can be defined setting certain key value
/// pairs in the [`Field`] metadata structure. These extension keys are:
/// - [`EXTENSION_TYPE_NAME_KEY`]
/// - [`EXTENSION_TYPE_METADATA_KEY`]
///
/// Canonical extension types support in this crate requires the
/// `canonical_extension_types` feature.
///
/// Extension types may or may not use the [`EXTENSION_TYPE_METADATA_KEY`]
/// field.
///
/// # Example
///
/// The example below demonstrates how to implement this trait for a `Uuid`
/// type. Note this is not the canonical extension type for `Uuid`, which does
/// not include information about the `Uuid` version.
///
/// ```
/// # use arrow_schema::ArrowError;
/// # fn main() -> Result<(), ArrowError> {
/// use arrow_schema::{DataType, extension::ExtensionType, Field};
/// use std::{fmt, str::FromStr};
///
/// /// The different Uuid versions.
/// #[derive(Clone, Copy, Debug, PartialEq)]
/// enum UuidVersion {
/// V1,
/// V2,
/// V3,
/// V4,
/// V5,
/// V6,
/// V7,
/// V8,
/// }
///
/// // We'll use `Display` to serialize.
/// impl fmt::Display for UuidVersion {
/// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
/// write!(
/// f,
/// "{}",
/// match self {
/// Self::V1 => "V1",
/// Self::V2 => "V2",
/// Self::V3 => "V3",
/// Self::V4 => "V4",
/// Self::V5 => "V5",
/// Self::V6 => "V6",
/// Self::V7 => "V7",
/// Self::V8 => "V8",
/// }
/// )
/// }
/// }
///
/// // And `FromStr` to deserialize.
/// impl FromStr for UuidVersion {
/// type Err = ArrowError;
///
/// fn from_str(s: &str) -> Result<Self, Self::Err> {
/// match s {
/// "V1" => Ok(Self::V1),
/// "V2" => Ok(Self::V2),
/// "V3" => Ok(Self::V3),
/// "V4" => Ok(Self::V4),
/// "V5" => Ok(Self::V5),
/// "V6" => Ok(Self::V6),
/// "V7" => Ok(Self::V7),
/// "V8" => Ok(Self::V8),
/// _ => Err(ArrowError::ParseError("Invalid UuidVersion".to_owned())),
/// }
/// }
/// }
///
/// /// This is the extension type, not the container for Uuid values. It
/// /// stores the Uuid version (this is the metadata of this extension type).
/// #[derive(Clone, Copy, Debug, PartialEq)]
/// struct Uuid(UuidVersion);
///
/// impl ExtensionType for Uuid {
/// // We use a namespace as suggested by the specification.
/// const NAME: &'static str = "myorg.example.uuid";
///
/// // The metadata type is the Uuid version.
/// type Metadata = UuidVersion;
///
/// // We just return a reference to the Uuid version.
/// fn metadata(&self) -> &Self::Metadata {
/// &self.0
/// }
///
/// // We use the `Display` implementation to serialize the Uuid
/// // version.
/// fn serialize_metadata(&self) -> Option<String> {
/// Some(self.0.to_string())
/// }
///
/// // We use the `FromStr` implementation to deserialize the Uuid
/// // version.
/// fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
/// metadata.map_or_else(
/// || {
/// Err(ArrowError::InvalidArgumentError(
/// "Uuid extension type metadata missing".to_owned(),
/// ))
/// },
/// str::parse,
/// )
/// }
///
/// // The only supported data type is `FixedSizeBinary(16)`.
/// fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
/// match data_type {
/// DataType::FixedSizeBinary(16) => Ok(()),
/// data_type => Err(ArrowError::InvalidArgumentError(format!(
/// "Uuid data type mismatch, expected FixedSizeBinary(16), found {data_type}"
/// ))),
/// }
/// }
///
/// // We should always check if the data type is supported before
/// // constructing the extension type.
/// fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
/// let uuid = Self(metadata);
/// uuid.supports_data_type(data_type)?;
/// Ok(uuid)
/// }
/// }
///
/// // We can now construct the extension type.
/// let uuid_v1 = Uuid(UuidVersion::V1);
///
/// // And add it to a field.
/// let mut field =
/// Field::new("", DataType::FixedSizeBinary(16), false).with_extension_type(uuid_v1);
///
/// // And extract it from this field.
/// assert_eq!(field.try_extension_type::<Uuid>()?, uuid_v1);
///
/// // When we try to add this to a field with an unsupported data type we
/// // get an error.
/// let result = Field::new("", DataType::Null, false).try_with_extension_type(uuid_v1);
/// assert!(result.is_err());
/// # Ok(()) }
/// ```
///
/// <https://arrow.apache.org/docs/format/Columnar.html#extension-types>
///
/// [`Field`]: crate::Field
pub trait ExtensionType: Sized {
/// The name identifying this extension type.
///
/// This is the string value that is used for the
/// [`EXTENSION_TYPE_NAME_KEY`] in the [`Field::metadata`] of a [`Field`]
/// to identify this extension type.
///
/// We recommend that you use a “namespace”-style prefix for extension
/// type names to minimize the possibility of conflicts with multiple Arrow
/// readers and writers in the same application. For example, use
/// `myorg.name_of_type` instead of simply `name_of_type`.
///
/// Extension names beginning with `arrow.` are reserved for canonical
/// extension types, they should not be used for third-party extension
/// types.
///
/// Extension names are case-sensitive.
///
/// [`Field`]: crate::Field
/// [`Field::metadata`]: crate::Field::metadata
const NAME: &'static str;
/// The metadata type of this extension type.
///
/// Implementations can use strongly or loosly typed data structures here
/// depending on the complexity of the metadata.
///
/// Implementations can also use `Self` here if the extension type can be
/// constructed directly from its metadata.
///
/// If an extension type defines no metadata it should use `()` to indicate
/// this.
type Metadata;
/// Returns a reference to the metadata of this extension type, or `&()` if
/// if this extension type defines no metadata (`Self::Metadata=()`).
fn metadata(&self) -> &Self::Metadata;
/// Returns the serialized representation of the metadata of this extension
/// type, or `None` if this extension type defines no metadata
/// (`Self::Metadata=()`).
///
/// This is string value that is used for the
/// [`EXTENSION_TYPE_METADATA_KEY`] in the [`Field::metadata`] of a
/// [`Field`].
///
/// [`Field`]: crate::Field
/// [`Field::metadata`]: crate::Field::metadata
fn serialize_metadata(&self) -> Option<String>;
/// Deserialize the metadata of this extension type from the serialized
/// representation of the metadata. An extension type that defines no
/// metadata should expect `None` for the serialized metadata and return
/// `Ok(())`.
///
/// This function should return an error when
/// - expected metadata is missing (for extensions types with non-optional
/// metadata)
/// - unexpected metadata is set (for extension types without metadata)
/// - deserialization of metadata fails
fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError>;
/// Returns `Ok(())` iff the given data type is supported by this extension
/// type.
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError>;
/// Construct this extension type for a field with the given data type and
/// metadata.
///
/// This should return an error if the given data type is not supported by
/// this extension type.
fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError>;
}