blob: 4ead3e5a2ae9f60321a8767e504706cfb3696c36 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Parser for example metadata embedded in `main.rs` documentation comments.
//!
//! This module scans `//!` doc comments to extract example subcommands
//! and their associated metadata (file name and description), enforcing
//! a strict ordering and structure to avoid ambiguous documentation.
use std::{collections::HashSet, fs, path::Path};
use datafusion::common::exec_err;
use datafusion::error::Result;
use nom::{
Err, IResult, Parser,
bytes::complete::{tag, take_until, take_while},
character::complete::multispace0,
combinator::all_consuming,
error::{Error, ErrorKind},
sequence::{delimited, preceded},
};
use crate::utils::example_metadata::ExampleEntry;
/// Parsing state machine used while scanning `main.rs` docs.
///
/// This makes the "subcommand - metadata" relationship explicit:
/// metadata is only valid immediately after a subcommand has been seen.
enum ParserState<'a> {
/// Not currently expecting metadata.
Idle,
/// A subcommand was just parsed; the next valid metadata (if any)
/// must belong to this subcommand.
SeenSubcommand(&'a str),
}
/// Parses a subcommand declaration line from `main.rs` docs.
///
/// Expected format:
/// ```text
/// //! - `<subcommand>`
/// ```
fn parse_subcommand_line(input: &str) -> IResult<&str, &str> {
let parser = preceded(
multispace0,
delimited(tag("//! - `"), take_until("`"), tag("`")),
);
all_consuming(parser).parse(input)
}
/// Parses example metadata (file name and description) from `main.rs` docs.
///
/// Expected format:
/// ```text
/// //! (file: <file>.rs, desc: <description>)
/// ```
fn parse_metadata_line(input: &str) -> IResult<&str, (&str, &str)> {
let parser = preceded(
multispace0,
preceded(tag("//!"), preceded(multispace0, take_while(|_| true))),
);
let (rest, payload) = all_consuming(parser).parse(input)?;
let content = payload
.strip_prefix("(")
.and_then(|s| s.strip_suffix(")"))
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?;
let (file, desc) = content
.strip_prefix("file:")
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?
.split_once(", desc:")
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?;
Ok((rest, (file.trim(), desc.trim())))
}
/// Parses example entries from a group's `main.rs` file.
pub fn parse_main_rs_docs(path: &Path) -> Result<Vec<ExampleEntry>> {
let content = fs::read_to_string(path)?;
let mut entries = vec![];
let mut state = ParserState::Idle;
let mut seen_subcommands = HashSet::new();
for (line_no, raw_line) in content.lines().enumerate() {
let line = raw_line.trim();
// Try parsing subcommand, excluding `all` because it's not used in README
if let Ok((_, sub)) = parse_subcommand_line(line) {
state = if sub == "all" {
ParserState::Idle
} else {
ParserState::SeenSubcommand(sub)
};
continue;
}
// Try parsing metadata
if let Ok((_, (file, desc))) = parse_metadata_line(line) {
let subcommand = match state {
ParserState::SeenSubcommand(s) => s,
ParserState::Idle => {
return exec_err!(
"Metadata without preceding subcommand at {}:{}",
path.display(),
line_no + 1
);
}
};
if !seen_subcommands.insert(subcommand) {
return exec_err!("Duplicate metadata for subcommand `{subcommand}`");
}
entries.push(ExampleEntry {
subcommand: subcommand.to_string(),
file: file.to_string(),
desc: desc.to_string(),
});
state = ParserState::Idle;
continue;
}
// If a non-blank doc line interrupts a pending subcommand, reset the state
if let ParserState::SeenSubcommand(_) = state
&& is_non_blank_doc_line(line)
{
state = ParserState::Idle;
}
}
Ok(entries)
}
/// Returns `true` for non-blank Rust doc comment lines (`//!`).
///
/// Used to detect when a subcommand is interrupted by unrelated documentation,
/// so metadata is only accepted immediately after a subcommand (blank doc lines
/// are allowed in between).
fn is_non_blank_doc_line(line: &str) -> bool {
line.starts_with("//!") && !line.trim_start_matches("//!").trim().is_empty()
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn parse_subcommand_line_accepts_valid_input() {
let line = "//! - `date_time`";
let sub = parse_subcommand_line(line);
assert_eq!(sub, Ok(("", "date_time")));
}
#[test]
fn parse_subcommand_line_invalid_inputs() {
let err_lines = [
"//! - ",
"//! - foo",
"//! - `foo` bar",
"//! --",
"//!-",
"//!--",
"//!",
"//",
"/",
"",
];
for line in err_lines {
assert!(
parse_subcommand_line(line).is_err(),
"expected error for input: {line}"
);
}
}
#[test]
fn parse_metadata_line_accepts_valid_input() {
let line =
"//! (file: date_time.rs, desc: Examples of date-time related functions)";
let res = parse_metadata_line(line);
assert_eq!(
res,
Ok((
"",
("date_time.rs", "Examples of date-time related functions")
))
);
let line = "//! (file: foo.rs, desc: Foo, bar, baz)";
let res = parse_metadata_line(line);
assert_eq!(res, Ok(("", ("foo.rs", "Foo, bar, baz"))));
let line = "//! (file: foo.rs, desc: Foo(FOO))";
let res = parse_metadata_line(line);
assert_eq!(res, Ok(("", ("foo.rs", "Foo(FOO)"))));
}
#[test]
fn parse_metadata_line_invalid_inputs() {
let bad_lines = [
"//! (file: foo.rs)",
"//! (desc: missing file)",
"//! file: foo.rs, desc: test",
"//! file: foo.rs,desc: test",
"//! (file: foo.rs desc: test)",
"//! (file: foo.rs,desc: test)",
"//! (desc: test, file: foo.rs)",
"//! ()",
"//! (file: foo.rs, desc: test) extra",
"",
];
for line in bad_lines {
assert!(
parse_metadata_line(line).is_err(),
"expected error for input: {line}"
);
}
}
#[test]
fn parse_main_rs_docs_extracts_entries() -> Result<()> {
let tmp = TempDir::new().unwrap();
let main_rs = tmp.path().join("main.rs");
fs::write(
&main_rs,
r#"
//! - `foo`
//! (file: foo.rs, desc: first example)
//!
//! - `bar`
//! (file: bar.rs, desc: second example)
"#,
)?;
let entries = parse_main_rs_docs(&main_rs)?;
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].subcommand, "foo");
assert_eq!(entries[0].file, "foo.rs");
assert_eq!(entries[0].desc, "first example");
assert_eq!(entries[1].subcommand, "bar");
assert_eq!(entries[1].file, "bar.rs");
assert_eq!(entries[1].desc, "second example");
Ok(())
}
}