blob: 1857e6af9b5594a9c3d61c03dd8c7022ed2159a4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use std::path::PathBuf;
use arrow_schema::SchemaRef;
use datafusion::error::{DataFusionError, Result};
pub mod cars;
pub mod regex;
/// Describes example datasets used across DataFusion examples.
///
/// This enum provides a single, discoverable place to define
/// dataset-specific metadata such as file paths and schemas.
#[derive(Debug)]
pub enum ExampleDataset {
Cars,
Regex,
}
impl ExampleDataset {
pub fn file_stem(&self) -> &'static str {
match self {
Self::Cars => "cars",
Self::Regex => "regex",
}
}
pub fn path(&self) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("csv")
.join(format!("{}.csv", self.file_stem()))
}
pub fn path_str(&self) -> Result<String> {
let path = self.path();
path.to_str().map(String::from).ok_or_else(|| {
DataFusionError::Execution(format!(
"CSV directory path is not valid UTF-8: {}",
path.display()
))
})
}
pub fn schema(&self) -> SchemaRef {
match self {
Self::Cars => cars::schema(),
Self::Regex => regex::schema(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use arrow::datatypes::{DataType, TimeUnit};
#[test]
fn example_dataset_file_stem() {
assert_eq!(ExampleDataset::Cars.file_stem(), "cars");
assert_eq!(ExampleDataset::Regex.file_stem(), "regex");
}
#[test]
fn example_dataset_path_points_to_csv() {
let path = ExampleDataset::Cars.path();
assert!(path.ends_with("data/csv/cars.csv"));
let path = ExampleDataset::Regex.path();
assert!(path.ends_with("data/csv/regex.csv"));
}
#[test]
fn example_dataset_path_str_is_valid_utf8() {
let path = ExampleDataset::Cars.path_str().unwrap();
assert!(path.ends_with("cars.csv"));
let path = ExampleDataset::Regex.path_str().unwrap();
assert!(path.ends_with("regex.csv"));
}
#[test]
fn cars_schema_is_stable() {
let schema = ExampleDataset::Cars.schema();
let fields: Vec<_> = schema
.fields()
.iter()
.map(|f| (f.name().as_str(), f.data_type().clone()))
.collect();
assert_eq!(
fields,
vec![
("car", DataType::Utf8),
("speed", DataType::Float64),
("time", DataType::Timestamp(TimeUnit::Nanosecond, None)),
]
);
}
#[test]
fn regex_schema_is_stable() {
let schema = ExampleDataset::Regex.schema();
let fields: Vec<_> = schema
.fields()
.iter()
.map(|f| (f.name().as_str(), f.data_type().clone()))
.collect();
assert_eq!(
fields,
vec![
("values", DataType::Utf8),
("patterns", DataType::Utf8),
("replacement", DataType::Utf8),
("flags", DataType::Utf8),
]
);
}
}