blob: 11416d141eb74f0d8a2c1f71a7dcdd4acd0e3a9b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Domain model for DataFusion example documentation.
//!
//! This module defines the core data structures used to represent
//! example groups, individual examples, and their categorization
//! as parsed from `main.rs` documentation comments.
use std::path::Path;
use datafusion::error::{DataFusionError, Result};
use crate::utils::example_metadata::parse_main_rs_docs;
/// Well-known abbreviations used to preserve correct capitalization
/// when generating human-readable documentation titles.
const ABBREVIATIONS: &[(&str, &str)] = &[
("dataframe", "DataFrame"),
("io", "IO"),
("sql", "SQL"),
("udf", "UDF"),
];
/// A group of related examples (e.g. `builtin_functions`, `udf`).
///
/// Each group corresponds to a directory containing a `main.rs` file
/// with structured documentation comments.
#[derive(Debug)]
pub struct ExampleGroup {
pub name: GroupName,
pub examples: Vec<ExampleEntry>,
pub category: Category,
}
impl ExampleGroup {
/// Parses an example group from its directory.
///
/// The group name is derived from the directory name, and example
/// entries are extracted from `main.rs`.
pub fn from_dir(dir: &Path, category: Category) -> Result<Self> {
let raw_name = dir
.file_name()
.and_then(|s| s.to_str())
.ok_or_else(|| {
DataFusionError::Execution("Invalid example group dir".to_string())
})?
.to_string();
let name = GroupName::from_dir_name(raw_name);
let main_rs = dir.join("main.rs");
let examples = parse_main_rs_docs(&main_rs)?;
Ok(Self {
name,
examples,
category,
})
}
}
/// Represents an example group name in both raw and human-readable forms.
///
/// For example:
/// - raw: `builtin_functions`
/// - title: `Builtin Functions`
#[derive(Debug)]
pub struct GroupName {
raw: String,
title: String,
}
impl GroupName {
/// Creates a group name from a directory name.
pub fn from_dir_name(raw: String) -> Self {
let title = raw
.split('_')
.map(format_part)
.collect::<Vec<_>>()
.join(" ");
Self { raw, title }
}
/// Returns the raw group name (directory name).
pub fn raw(&self) -> &str {
&self.raw
}
/// Returns a title-cased name for documentation.
pub fn title(&self) -> &str {
&self.title
}
}
/// A single runnable example within a group.
///
/// Each entry corresponds to a subcommand documented in `main.rs`.
#[derive(Debug)]
pub struct ExampleEntry {
/// CLI subcommand name.
pub subcommand: String,
/// Rust source file name.
pub file: String,
/// Human-readable description.
pub desc: String,
}
/// Execution category of an example group.
#[derive(Debug, Default)]
pub enum Category {
/// Runs in a single process.
#[default]
SingleProcess,
/// Requires a distributed setup.
Distributed,
}
impl Category {
/// Returns the display name used in documentation.
pub fn name(&self) -> &str {
match self {
Self::SingleProcess => "Single Process",
Self::Distributed => "Distributed",
}
}
/// Determines the category for a group by name.
pub fn for_group(name: &str) -> Self {
match name {
"flight" => Category::Distributed,
_ => Category::SingleProcess,
}
}
}
/// Formats a single group-name segment for display.
///
/// This function applies DataFusion-specific capitalization rules:
/// - Known abbreviations (e.g. `sql`, `io`, `udf`) are rendered in all caps
/// - All other segments fall back to standard Title Case
fn format_part(part: &str) -> String {
let lower = part.to_ascii_lowercase();
if let Some((_, replacement)) = ABBREVIATIONS.iter().find(|(k, _)| *k == lower) {
return replacement.to_string();
}
let mut chars = part.chars();
match chars.next() {
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
None => String::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::example_metadata::test_utils::{
assert_exec_err_contains, example_group_from_docs,
};
use std::fs;
use tempfile::TempDir;
#[test]
fn category_for_group_works() {
assert!(matches!(
Category::for_group("flight"),
Category::Distributed
));
assert!(matches!(
Category::for_group("anything_else"),
Category::SingleProcess
));
}
#[test]
fn all_subcommand_is_ignored() -> Result<()> {
let group = example_group_from_docs(
r#"
//! - `all` — run all examples included in this module
//!
//! - `foo`
//! (file: foo.rs, desc: foo example)
"#,
)?;
assert_eq!(group.examples.len(), 1);
assert_eq!(group.examples[0].subcommand, "foo");
Ok(())
}
#[test]
fn metadata_without_subcommand_fails() {
let err = example_group_from_docs("//! (file: foo.rs, desc: missing subcommand)")
.unwrap_err();
assert_exec_err_contains(err, "Metadata without preceding subcommand");
}
#[test]
fn group_name_handles_abbreviations() {
assert_eq!(
GroupName::from_dir_name("dataframe".to_string()).title(),
"DataFrame"
);
assert_eq!(
GroupName::from_dir_name("data_io".to_string()).title(),
"Data IO"
);
assert_eq!(
GroupName::from_dir_name("sql_ops".to_string()).title(),
"SQL Ops"
);
assert_eq!(GroupName::from_dir_name("udf".to_string()).title(), "UDF");
}
#[test]
fn group_name_title_cases() {
let cases = [
("very_long_group_name", "Very Long Group Name"),
("foo", "Foo"),
("dataframe", "DataFrame"),
("data_io", "Data IO"),
("sql_ops", "SQL Ops"),
("udf", "UDF"),
];
for (input, expected) in cases {
let name = GroupName::from_dir_name(input.to_string());
assert_eq!(name.title(), expected);
}
}
#[test]
fn parse_group_example_works() -> Result<()> {
let tmp = TempDir::new().unwrap();
// Simulate: examples/builtin_functions/
let group_dir = tmp.path().join("builtin_functions");
fs::create_dir(&group_dir)?;
// Write a fake main.rs with docs
let main_rs = group_dir.join("main.rs");
fs::write(
&main_rs,
r#"
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
//! # These are miscellaneous function-related examples
//!
//! These examples demonstrate miscellaneous function-related features.
//!
//! ## Usage
//! ```bash
//! cargo run --example builtin_functions -- [all|date_time|function_factory|regexp]
//! ```
//!
//! Each subcommand runs a corresponding example:
//! - `all` — run all examples included in this module
//!
//! - `date_time`
//! (file: date_time.rs, desc: Examples of date-time related functions and queries)
//!
//! - `function_factory`
//! (file: function_factory.rs, desc: Register `CREATE FUNCTION` handler to implement SQL macros)
//!
//! - `regexp`
//! (file: regexp.rs, desc: Examples of using regular expression functions)
"#,
)?;
let group = ExampleGroup::from_dir(&group_dir, Category::SingleProcess)?;
// Assert group-level data
assert_eq!(group.name.title(), "Builtin Functions");
assert_eq!(group.examples.len(), 3);
// Assert 1 example
assert_eq!(group.examples[0].subcommand, "date_time");
assert_eq!(group.examples[0].file, "date_time.rs");
assert_eq!(
group.examples[0].desc,
"Examples of date-time related functions and queries"
);
// Assert 2 example
assert_eq!(group.examples[1].subcommand, "function_factory");
assert_eq!(group.examples[1].file, "function_factory.rs");
assert_eq!(
group.examples[1].desc,
"Register `CREATE FUNCTION` handler to implement SQL macros"
);
// Assert 3 example
assert_eq!(group.examples[2].subcommand, "regexp");
assert_eq!(group.examples[2].file, "regexp.rs");
assert_eq!(
group.examples[2].desc,
"Examples of using regular expression functions"
);
Ok(())
}
#[test]
fn duplicate_metadata_without_repeating_subcommand_fails() {
let err = example_group_from_docs(
r#"
//! - `foo`
//! (file: a.rs, desc: first)
//! (file: b.rs, desc: second)
"#,
)
.unwrap_err();
assert_exec_err_contains(err, "Metadata without preceding subcommand");
}
#[test]
fn duplicate_metadata_for_same_subcommand_fails() {
let err = example_group_from_docs(
r#"
//! - `foo`
//! (file: a.rs, desc: first)
//!
//! - `foo`
//! (file: b.rs, desc: second)
"#,
)
.unwrap_err();
assert_exec_err_contains(err, "Duplicate metadata for subcommand `foo`");
}
#[test]
fn metadata_must_follow_subcommand() {
let err = example_group_from_docs(
r#"
//! - `foo`
//! some unrelated comment
//! (file: foo.rs, desc: test)
"#,
)
.unwrap_err();
assert_exec_err_contains(err, "Metadata without preceding subcommand");
}
#[test]
fn preserves_example_order_from_main_rs() -> Result<()> {
let group = example_group_from_docs(
r#"
//! - `second`
//! (file: second.rs, desc: second example)
//!
//! - `first`
//! (file: first.rs, desc: first example)
//!
//! - `third`
//! (file: third.rs, desc: third example)
"#,
)?;
let subcommands: Vec<&str> = group
.examples
.iter()
.map(|e| e.subcommand.as_str())
.collect();
assert_eq!(
subcommands,
vec!["second", "first", "third"],
"examples must preserve the order defined in main.rs"
);
Ok(())
}
#[test]
fn metadata_can_follow_blank_doc_line() -> Result<()> {
let group = example_group_from_docs(
r#"
//! - `foo`
//!
//! (file: foo.rs, desc: test)
"#,
)?;
assert_eq!(group.examples.len(), 1);
Ok(())
}
}