blob: 3a10cc2b42f9f7b98c09b66e8f573f6ca1eb0068 [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use std::{collections::BTreeMap, sync::Arc};
use arrow::datatypes::{DataType, Field};
use hashbrown::HashMap;
use crate::{error::_plan_err, DataFusionError, ScalarValue};
/// A [`ScalarValue`] with optional [`FieldMetadata`]
#[derive(Debug, Clone)]
pub struct ScalarAndMetadata {
pub value: ScalarValue,
pub metadata: Option<FieldMetadata>,
}
impl ScalarAndMetadata {
/// Create a new Literal from a scalar value with optional [`FieldMetadata`]
pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
Self { value, metadata }
}
/// Access the underlying [ScalarValue] storage
pub fn value(&self) -> &ScalarValue {
&self.value
}
/// Access the [FieldMetadata] attached to this value, if any
pub fn metadata(&self) -> Option<&FieldMetadata> {
self.metadata.as_ref()
}
/// Consume self and return components
pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
(self.value, self.metadata)
}
/// Cast this values's storage type
///
/// This operation assumes that if the underlying [ScalarValue] can be casted
/// to a given type that any extension type represented by the metadata is also
/// valid.
pub fn cast_storage_to(
&self,
target_type: &DataType,
) -> Result<Self, DataFusionError> {
let new_value = self.value().cast_to(target_type)?;
Ok(Self::new(new_value, self.metadata.clone()))
}
}
/// create a new ScalarAndMetadata from a ScalarValue without
/// any metadata
impl From<ScalarValue> for ScalarAndMetadata {
fn from(value: ScalarValue) -> Self {
Self::new(value, None)
}
}
/// Assert equality of data types where one or both sides may have field metadata
///
/// This currently compares absent metadata (e.g., one side was a DataType) and
/// empty metadata (e.g., one side was a field where the field had no metadata)
/// as equal and uses byte-for-byte comparison for the keys and values of the
/// fields, even though this is potentially too strict for some cases (e.g.,
/// extension types where extension metadata is represented by JSON, or cases
/// where field metadata is orthogonal to the interpretation of the data type).
///
/// Returns a planning error with suitably formatted type representations if
/// actual and expected do not compare to equal.
pub fn check_metadata_with_storage_equal(
actual: (
&DataType,
Option<&std::collections::HashMap<String, String>>,
),
expected: (
&DataType,
Option<&std::collections::HashMap<String, String>>,
),
what: &str,
context: &str,
) -> Result<(), DataFusionError> {
if actual.0 != expected.0 {
return _plan_err!(
"Expected {what} of type {}, got {}{context}",
format_type_and_metadata(expected.0, expected.1),
format_type_and_metadata(actual.0, actual.1)
);
}
let metadata_equal = match (actual.1, expected.1) {
(None, None) => true,
(None, Some(expected_metadata)) => expected_metadata.is_empty(),
(Some(actual_metadata), None) => actual_metadata.is_empty(),
(Some(actual_metadata), Some(expected_metadata)) => {
actual_metadata == expected_metadata
}
};
if !metadata_equal {
return _plan_err!(
"Expected {what} of type {}, got {}{context}",
format_type_and_metadata(expected.0, expected.1),
format_type_and_metadata(actual.0, actual.1)
);
}
Ok(())
}
/// Given a data type represented by storage and optional metadata, generate
/// a user-facing string
///
/// This function exists to reduce the number of Field debug strings that are
/// used to communicate type information in error messages and plan explain
/// renderings.
pub fn format_type_and_metadata(
data_type: &DataType,
metadata: Option<&std::collections::HashMap<String, String>>,
) -> String {
match metadata {
Some(metadata) if !metadata.is_empty() => {
format!("{data_type}<{metadata:?}>")
}
_ => data_type.to_string(),
}
}
/// Literal metadata
///
/// Stores metadata associated with a literal expressions
/// and is designed to be fast to `clone`.
///
/// This structure is used to store metadata associated with a literal expression, and it
/// corresponds to the `metadata` field on [`Field`].
///
/// # Example: Create [`FieldMetadata`] from a [`Field`]
/// ```
/// # use std::collections::HashMap;
/// # use datafusion_common::metadata::FieldMetadata;
/// # use arrow::datatypes::{Field, DataType};
/// # let field = Field::new("c1", DataType::Int32, true)
/// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
/// // Create a new `FieldMetadata` instance from a `Field`
/// let metadata = FieldMetadata::new_from_field(&field);
/// // There is also a `From` impl:
/// let metadata = FieldMetadata::from(&field);
/// ```
///
/// # Example: Update a [`Field`] with [`FieldMetadata`]
/// ```
/// # use datafusion_common::metadata::FieldMetadata;
/// # use arrow::datatypes::{Field, DataType};
/// # let field = Field::new("c1", DataType::Int32, true);
/// # let metadata = FieldMetadata::new_from_field(&field);
/// // Add any metadata from `FieldMetadata` to `Field`
/// let updated_field = metadata.add_to_field(field);
/// ```
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
pub struct FieldMetadata {
/// The inner metadata of a literal expression, which is a map of string
/// keys to string values.
///
/// Note this is not a `HashMap` because `HashMap` does not provide
/// implementations for traits like `Debug` and `Hash`.
inner: Arc<BTreeMap<String, String>>,
}
impl Default for FieldMetadata {
fn default() -> Self {
Self::new_empty()
}
}
impl FieldMetadata {
/// Create a new empty metadata instance.
pub fn new_empty() -> Self {
Self {
inner: Arc::new(BTreeMap::new()),
}
}
/// Merges two optional `FieldMetadata` instances, overwriting any existing
/// keys in `m` with keys from `n` if present.
///
/// This function is commonly used in alias operations, particularly for literals
/// with metadata. When creating an alias expression, the metadata from the original
/// expression (such as a literal) is combined with any metadata specified on the alias.
///
/// # Arguments
///
/// * `m` - The first metadata (typically from the original expression like a literal)
/// * `n` - The second metadata (typically from the alias definition)
///
/// # Merge Strategy
///
/// - If both metadata instances exist, they are merged with `n` taking precedence
/// - Keys from `n` will overwrite keys from `m` if they have the same name
/// - If only one metadata instance exists, it is returned unchanged
/// - If neither exists, `None` is returned
///
/// # Example usage
/// ```rust
/// use datafusion_common::metadata::FieldMetadata;
/// use std::collections::BTreeMap;
///
/// // Create metadata for a literal expression
/// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
/// ("source".to_string(), "constant".to_string()),
/// ("type".to_string(), "int".to_string()),
/// ])));
///
/// // Create metadata for an alias
/// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
/// ("description".to_string(), "answer".to_string()),
/// ("source".to_string(), "user".to_string()), // This will override literal's "source"
/// ])));
///
/// // Merge the metadata
/// let merged = FieldMetadata::merge_options(
/// literal_metadata.as_ref(),
/// alias_metadata.as_ref(),
/// );
///
/// // Result contains: {"source": "user", "type": "int", "description": "answer"}
/// assert!(merged.is_some());
/// ```
pub fn merge_options(
m: Option<&FieldMetadata>,
n: Option<&FieldMetadata>,
) -> Option<FieldMetadata> {
match (m, n) {
(Some(m), Some(n)) => {
let mut merged = m.clone();
merged.extend(n.clone());
Some(merged)
}
(Some(m), None) => Some(m.clone()),
(None, Some(n)) => Some(n.clone()),
(None, None) => None,
}
}
/// Create a new metadata instance from a `Field`'s metadata.
pub fn new_from_field(field: &Field) -> Self {
let inner = field
.metadata()
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
Self {
inner: Arc::new(inner),
}
}
/// Create a new metadata instance from a map of string keys to string values.
pub fn new(inner: BTreeMap<String, String>) -> Self {
Self {
inner: Arc::new(inner),
}
}
/// Get the inner metadata as a reference to a `BTreeMap`.
pub fn inner(&self) -> &BTreeMap<String, String> {
&self.inner
}
/// Return the inner metadata
pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
self.inner
}
/// Adds metadata from `other` into `self`, overwriting any existing keys.
pub fn extend(&mut self, other: Self) {
if other.is_empty() {
return;
}
let other = Arc::unwrap_or_clone(other.into_inner());
Arc::make_mut(&mut self.inner).extend(other);
}
/// Returns true if the metadata is empty.
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
/// Returns the number of key-value pairs in the metadata.
pub fn len(&self) -> usize {
self.inner.len()
}
/// Convert this `FieldMetadata` into a `HashMap<String, String>`
pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
self.inner
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect()
}
/// Updates the metadata on the Field with this metadata, if it is not empty.
pub fn add_to_field(&self, field: Field) -> Field {
if self.inner.is_empty() {
return field;
}
field.with_metadata(self.to_hashmap())
}
}
impl From<&Field> for FieldMetadata {
fn from(field: &Field) -> Self {
Self::new_from_field(field)
}
}
impl From<BTreeMap<String, String>> for FieldMetadata {
fn from(inner: BTreeMap<String, String>) -> Self {
Self::new(inner)
}
}
impl From<std::collections::HashMap<String, String>> for FieldMetadata {
fn from(map: std::collections::HashMap<String, String>) -> Self {
Self::new(map.into_iter().collect())
}
}
/// From reference
impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
fn from(map: &std::collections::HashMap<String, String>) -> Self {
let inner = map
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
Self::new(inner)
}
}
/// From hashbrown map
impl From<HashMap<String, String>> for FieldMetadata {
fn from(map: HashMap<String, String>) -> Self {
let inner = map.into_iter().collect();
Self::new(inner)
}
}
impl From<&HashMap<String, String>> for FieldMetadata {
fn from(map: &HashMap<String, String>) -> Self {
let inner = map
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
Self::new(inner)
}
}