| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| //! Parser for example metadata embedded in `main.rs` documentation comments. |
| //! |
| //! This module scans `//!` doc comments to extract example subcommands |
| //! and their associated metadata (file name and description), enforcing |
| //! a strict ordering and structure to avoid ambiguous documentation. |
| |
| use std::{collections::HashSet, fs, path::Path}; |
| |
| use datafusion::common::exec_err; |
| use datafusion::error::Result; |
| use nom::{ |
| Err, IResult, Parser, |
| bytes::complete::{tag, take_until, take_while}, |
| character::complete::multispace0, |
| combinator::all_consuming, |
| error::{Error, ErrorKind}, |
| sequence::{delimited, preceded}, |
| }; |
| |
| use crate::utils::example_metadata::ExampleEntry; |
| |
| /// Parsing state machine used while scanning `main.rs` docs. |
| /// |
| /// This makes the "subcommand - metadata" relationship explicit: |
| /// metadata is only valid immediately after a subcommand has been seen. |
| enum ParserState<'a> { |
| /// Not currently expecting metadata. |
| Idle, |
| /// A subcommand was just parsed; the next valid metadata (if any) |
| /// must belong to this subcommand. |
| SeenSubcommand(&'a str), |
| } |
| |
| /// Parses a subcommand declaration line from `main.rs` docs. |
| /// |
| /// Expected format: |
| /// ```text |
| /// //! - `<subcommand>` |
| /// ``` |
| fn parse_subcommand_line(input: &str) -> IResult<&str, &str> { |
| let parser = preceded( |
| multispace0, |
| delimited(tag("//! - `"), take_until("`"), tag("`")), |
| ); |
| all_consuming(parser).parse(input) |
| } |
| |
| /// Parses example metadata (file name and description) from `main.rs` docs. |
| /// |
| /// Expected format: |
| /// ```text |
| /// //! (file: <file>.rs, desc: <description>) |
| /// ``` |
| fn parse_metadata_line(input: &str) -> IResult<&str, (&str, &str)> { |
| let parser = preceded( |
| multispace0, |
| preceded(tag("//!"), preceded(multispace0, take_while(|_| true))), |
| ); |
| let (rest, payload) = all_consuming(parser).parse(input)?; |
| |
| let content = payload |
| .strip_prefix("(") |
| .and_then(|s| s.strip_suffix(")")) |
| .ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?; |
| |
| let (file, desc) = content |
| .strip_prefix("file:") |
| .ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))? |
| .split_once(", desc:") |
| .ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?; |
| |
| Ok((rest, (file.trim(), desc.trim()))) |
| } |
| |
| /// Parses example entries from a group's `main.rs` file. |
| pub fn parse_main_rs_docs(path: &Path) -> Result<Vec<ExampleEntry>> { |
| let content = fs::read_to_string(path)?; |
| let mut entries = vec![]; |
| let mut state = ParserState::Idle; |
| let mut seen_subcommands = HashSet::new(); |
| |
| for (line_no, raw_line) in content.lines().enumerate() { |
| let line = raw_line.trim(); |
| |
| // Try parsing subcommand, excluding `all` because it's not used in README |
| if let Ok((_, sub)) = parse_subcommand_line(line) { |
| state = if sub == "all" { |
| ParserState::Idle |
| } else { |
| ParserState::SeenSubcommand(sub) |
| }; |
| continue; |
| } |
| |
| // Try parsing metadata |
| if let Ok((_, (file, desc))) = parse_metadata_line(line) { |
| let subcommand = match state { |
| ParserState::SeenSubcommand(s) => s, |
| ParserState::Idle => { |
| return exec_err!( |
| "Metadata without preceding subcommand at {}:{}", |
| path.display(), |
| line_no + 1 |
| ); |
| } |
| }; |
| |
| if !seen_subcommands.insert(subcommand) { |
| return exec_err!("Duplicate metadata for subcommand `{subcommand}`"); |
| } |
| |
| entries.push(ExampleEntry { |
| subcommand: subcommand.to_string(), |
| file: file.to_string(), |
| desc: desc.to_string(), |
| }); |
| |
| state = ParserState::Idle; |
| continue; |
| } |
| |
| // If a non-blank doc line interrupts a pending subcommand, reset the state |
| if let ParserState::SeenSubcommand(_) = state |
| && is_non_blank_doc_line(line) |
| { |
| state = ParserState::Idle; |
| } |
| } |
| |
| Ok(entries) |
| } |
| |
| /// Returns `true` for non-blank Rust doc comment lines (`//!`). |
| /// |
| /// Used to detect when a subcommand is interrupted by unrelated documentation, |
| /// so metadata is only accepted immediately after a subcommand (blank doc lines |
| /// are allowed in between). |
| fn is_non_blank_doc_line(line: &str) -> bool { |
| line.starts_with("//!") && !line.trim_start_matches("//!").trim().is_empty() |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| use tempfile::TempDir; |
| |
| #[test] |
| fn parse_subcommand_line_accepts_valid_input() { |
| let line = "//! - `date_time`"; |
| let sub = parse_subcommand_line(line); |
| assert_eq!(sub, Ok(("", "date_time"))); |
| } |
| |
| #[test] |
| fn parse_subcommand_line_invalid_inputs() { |
| let err_lines = [ |
| "//! - ", |
| "//! - foo", |
| "//! - `foo` bar", |
| "//! --", |
| "//!-", |
| "//!--", |
| "//!", |
| "//", |
| "/", |
| "", |
| ]; |
| for line in err_lines { |
| assert!( |
| parse_subcommand_line(line).is_err(), |
| "expected error for input: {line}" |
| ); |
| } |
| } |
| |
| #[test] |
| fn parse_metadata_line_accepts_valid_input() { |
| let line = |
| "//! (file: date_time.rs, desc: Examples of date-time related functions)"; |
| let res = parse_metadata_line(line); |
| assert_eq!( |
| res, |
| Ok(( |
| "", |
| ("date_time.rs", "Examples of date-time related functions") |
| )) |
| ); |
| |
| let line = "//! (file: foo.rs, desc: Foo, bar, baz)"; |
| let res = parse_metadata_line(line); |
| assert_eq!(res, Ok(("", ("foo.rs", "Foo, bar, baz")))); |
| |
| let line = "//! (file: foo.rs, desc: Foo(FOO))"; |
| let res = parse_metadata_line(line); |
| assert_eq!(res, Ok(("", ("foo.rs", "Foo(FOO)")))); |
| } |
| |
| #[test] |
| fn parse_metadata_line_invalid_inputs() { |
| let bad_lines = [ |
| "//! (file: foo.rs)", |
| "//! (desc: missing file)", |
| "//! file: foo.rs, desc: test", |
| "//! file: foo.rs,desc: test", |
| "//! (file: foo.rs desc: test)", |
| "//! (file: foo.rs,desc: test)", |
| "//! (desc: test, file: foo.rs)", |
| "//! ()", |
| "//! (file: foo.rs, desc: test) extra", |
| "", |
| ]; |
| for line in bad_lines { |
| assert!( |
| parse_metadata_line(line).is_err(), |
| "expected error for input: {line}" |
| ); |
| } |
| } |
| |
| #[test] |
| fn parse_main_rs_docs_extracts_entries() -> Result<()> { |
| let tmp = TempDir::new().unwrap(); |
| let main_rs = tmp.path().join("main.rs"); |
| |
| fs::write( |
| &main_rs, |
| r#" |
| //! - `foo` |
| //! (file: foo.rs, desc: first example) |
| //! |
| //! - `bar` |
| //! (file: bar.rs, desc: second example) |
| "#, |
| )?; |
| |
| let entries = parse_main_rs_docs(&main_rs)?; |
| |
| assert_eq!(entries.len(), 2); |
| |
| assert_eq!(entries[0].subcommand, "foo"); |
| assert_eq!(entries[0].file, "foo.rs"); |
| assert_eq!(entries[0].desc, "first example"); |
| |
| assert_eq!(entries[1].subcommand, "bar"); |
| assert_eq!(entries[1].file, "bar.rs"); |
| assert_eq!(entries[1].desc, "second example"); |
| Ok(()) |
| } |
| } |