blob: 2484eddb47fc650c4fd3d7f431974d74101a4844 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use anyhow::{Context, anyhow};
use anyhow::{Result, bail};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::fs::read_dir;
use std::str::FromStr;
use syn::{
Expr, ExprLit, Field, GenericArgument, Item, Lit, LitStr, Meta, PathArguments, Type, TypePath,
};
pub type Services = HashMap<String, Service>;
pub fn sorted_services(services: Services, test: fn(&str) -> bool) -> Services {
let mut srvs = Services::new();
for (k, srv) in services.into_iter() {
if !test(k.as_str()) {
continue;
}
// Sort configs by name (lexicographically)
let mut sorted = srv.config;
sorted.sort_by(|a, b| a.name.cmp(&b.name));
srvs.insert(k, Service { config: sorted });
}
srvs
}
/// Service represents a service supported by opendal core, like `s3` and `fs`
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct Service {
/// All configurations for this service.
pub config: Vec<Config>,
}
/// Config represents a configuration item for a service.
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct Config {
/// The name of this config, for example, `access_key_id` and `secret_access_key`
pub name: String,
/// The value type this config.
pub value: ConfigType,
/// If given config is optional or not.
pub optional: bool,
/// if this field is deprecated, a deprecated message will be provided.
pub deprecated: Option<AttrDeprecated>,
/// The comments for this config.
///
/// All white spaces and extra new lines will be trimmed.
pub comments: String,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub enum ConfigType {
/// Mapping to rust's `bool`
Bool,
/// Mapping to rust's `String`
String,
/// Mapping to rust's `Duration`
Duration,
/// Mapping to rust's `usize`
Usize,
/// Mapping to rust's `u64`
U64,
/// Mapping to rust's `i64`
I64,
/// Mapping to rust's `u32`
U32,
/// Mapping to rust's `u16`
U16,
/// Mapping to rust's `Vec`
///
/// Please note, all vec in config are `,` separated string.
Vec,
}
impl FromStr for ConfigType {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self> {
Ok(match s {
"bool" => ConfigType::Bool,
"String" => ConfigType::String,
"Duration" => ConfigType::Duration,
"usize" => ConfigType::Usize,
"u64" => ConfigType::U64,
"i64" => ConfigType::I64,
"u32" => ConfigType::U32,
"u16" => ConfigType::U16,
"Vec" => ConfigType::Vec,
v => bail!("unsupported config type {v}"),
})
}
}
/// The deprecated attribute for a field.
///
/// For given field:
///
/// ```text
/// #[deprecated(
/// since = "0.52.0",
/// note = "Please use `delete_max_size` instead of `batch_max_operations`"
/// )]
/// pub batch_max_operations: Option<usize>,
/// ```
///
/// We will have:
///
/// ```text
/// AttrDeprecated {
/// since: "0.52.0",
/// note: "Please use `delete_max_size` instead of `batch_max_operations`"
/// }
/// ```
///
/// - since = "0.52.0"
#[derive(Debug, Default, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct AttrDeprecated {
/// The `since` of this deprecated field.
pub since: String,
/// The `note` for this deprecated field.
pub note: String,
}
/// List and parse given path to a `Services` struct.
pub fn parse(path: &str) -> Result<Services> {
let mut map = HashMap::default();
for dir in read_dir(path)? {
let dir = dir?;
if dir.file_type()?.is_file() {
continue;
}
let path = dir.path().join("config.rs");
let content = fs::read_to_string(&path)?;
let parser = ServiceParser {
service: dir.file_name().to_string_lossy().to_string(),
path: path.to_string_lossy().to_string(),
content,
};
let service = parser.parse().context(format!("path: {path:?}"))?;
map.insert(parser.service, service);
}
Ok(map)
}
/// ServiceParser is used to parse a service config file.
pub struct ServiceParser {
service: String,
path: String,
content: String,
}
/// A typical service config will look like this:
///
/// ```
/// #[derive(Default, Serialize, Deserialize, Clone, PartialEq, Eq)]
/// #[serde(default)]
/// #[non_exhaustive]
/// pub struct S3Config {
/// /// root of this backend.
/// ///
/// /// All operations will happen under this root.
/// ///
/// /// default to `/` if not set.
/// pub root: Option<String>,
/// /// bucket name of this backend.
/// ///
/// /// required.
/// pub bucket: String,
/// /// is bucket versioning enabled for this bucket
/// pub enable_versioning: bool,
/// }
/// ```
impl ServiceParser {
/// Parse the content of this service.
fn parse(&self) -> Result<Service> {
log::debug!("service {} parse started", self.service);
let ast = syn::parse_file(&self.content)?;
let config_struct = ast
.items
.iter()
.find_map(|v| {
if let Item::Struct(v) = v {
if v.ident.to_string().contains("Config") {
return Some(v.clone());
}
}
None
})
.ok_or_else(|| anyhow!("there is no Config in {}", &self.path))?;
let mut config = Vec::with_capacity(config_struct.fields.len());
for field in config_struct.fields {
let field = Self::parse_field(field)?;
config.push(field);
}
log::debug!("service {} parse finished", self.service);
Ok(Service { config })
}
/// TODO: Add comment parse support.
fn parse_field(field: Field) -> Result<Config> {
let name = field
.ident
.clone()
.ok_or_else(|| anyhow!("field name is missing for {:?}", &field))?;
let deprecated = Self::parse_attr_deprecated(&field)?;
let comments = Self::parse_attr_comments(&field)?;
let (cfg_type, optional) = match &field.ty {
Type::Path(TypePath { path, .. }) => {
let segment = path
.segments
.last()
.ok_or_else(|| anyhow!("config type must be provided for {field:?}"))?;
let optional = segment.ident == "Option";
let type_name = if optional {
if let PathArguments::AngleBracketed(args) = &segment.arguments {
if let Some(GenericArgument::Type(Type::Path(inner_path))) =
args.args.first()
{
if let Some(inner_segment) = inner_path.path.segments.last() {
inner_segment.ident.to_string()
} else {
unreachable!("Option must have segment")
}
} else {
unreachable!("Option must have GenericArgument")
}
} else {
unreachable!("Option must have angle bracketed arguments")
}
} else {
segment.ident.to_string()
};
let typ = type_name.as_str().parse()?;
let optional = optional || typ == ConfigType::Bool;
(typ, optional)
}
v => return Err(anyhow!("unsupported config type {v:?}")),
};
Ok(Config {
name: name.to_string(),
value: cfg_type,
optional,
deprecated,
comments,
})
}
/// Parse the comments attr from the field.
///
/// This comment may have multiple lines.
fn parse_attr_comments(field: &Field) -> Result<String> {
Ok(field
.attrs
.iter()
.filter(|attr| attr.path().is_ident("doc"))
.filter_map(|attr| {
if let Meta::NameValue(meta) = &attr.meta {
if let Expr::Lit(ExprLit {
lit: Lit::Str(s), ..
}) = &meta.value
{
return Some(s.value().trim().to_string());
}
}
None
})
.collect::<Vec<_>>()
.join("\n"))
}
/// Parse the deprecated attr from the field.
///
/// ```text
/// #[deprecated(
/// since = "0.52.0",
/// note = "Please use `delete_max_size` instead of `batch_max_operations`"
/// )]
/// pub batch_max_operations: Option<usize>,
/// ```
fn parse_attr_deprecated(field: &Field) -> Result<Option<AttrDeprecated>> {
let deprecated: Vec<_> = field
.attrs
.iter()
.filter(|attr| attr.path().is_ident("deprecated"))
.collect();
if deprecated.len() > 1 {
return Err(anyhow!("only one deprecated attribute is allowed"));
}
let Some(attr) = deprecated.first() else {
return Ok(None);
};
let mut result = AttrDeprecated::default();
attr.parse_nested_meta(|meta| {
// this parses the `since`
if meta.path.is_ident("since") {
// this parses the `=`
let value = meta.value()?;
// this parses the value
let s: LitStr = value.parse()?;
result.since = s.value();
}
// this parses the `note`
if meta.path.is_ident("note") {
// this parses the `=`
let value = meta.value()?;
// this parses the value
let s: LitStr = value.parse()?;
result.note = s.value();
}
Ok(())
})?;
Ok(Some(result))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::workspace_dir;
use pretty_assertions::assert_eq;
use syn::ItemStruct;
#[test]
fn test_parse_field() {
let cases = vec![
(
"pub root: Option<String>",
Config {
name: "root".to_string(),
value: ConfigType::String,
optional: true,
deprecated: None,
comments: "".to_string(),
},
),
(
"root: String",
Config {
name: "root".to_string(),
value: ConfigType::String,
optional: false,
deprecated: None,
comments: "".to_string(),
},
),
];
for (input, expected) in cases {
let input = format!("struct Test {{ {input} }}");
let x: ItemStruct = syn::parse_str(&input).unwrap();
let actual =
ServiceParser::parse_field(x.fields.iter().next().unwrap().clone()).unwrap();
assert_eq!(actual, expected);
}
}
#[test]
fn test_parse_field_comments() {
let cases = vec![(
r"
/// root of this backend.
///
/// All operations will happen under this root.
///
/// default to `/` if not set.
pub root: Option<String>
",
r"root of this backend.
All operations will happen under this root.
default to `/` if not set.",
)];
for (input, expected) in cases {
let input = format!("struct Test {{ {input} }}");
let x: ItemStruct = syn::parse_str(&input).unwrap();
let actual =
ServiceParser::parse_attr_comments(&x.fields.iter().next().unwrap().clone())
.unwrap();
assert_eq!(actual, expected);
}
}
#[test]
fn test_parse_service() {
let content = r#"
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use std::fmt::Debug;
use std::fmt::Formatter;
use serde::Deserialize;
use serde::Serialize;
/// Config for Aws S3 and compatible services (including minio, digitalocean space, Tencent Cloud Object Storage(COS) and so on) support.
#[derive(Default, Serialize, Deserialize, Clone, PartialEq, Eq)]
#[serde(default)]
#[non_exhaustive]
pub struct S3Config {
/// root of this backend.
///
/// All operations will happen under this root.
///
/// default to `/` if not set.
pub root: Option<String>,
/// bucket name of this backend.
///
/// required.
pub bucket: String,
/// is bucket versioning enabled for this bucket
pub enable_versioning: bool,
/// endpoint of this backend.
///
/// Endpoint must be full uri, e.g.
///
/// - AWS S3: `https://s3.amazonaws.com` or `https://s3.{region}.amazonaws.com`
/// - Cloudflare R2: `https://<ACCOUNT_ID>.r2.cloudflarestorage.com`
/// - Aliyun OSS: `https://{region}.aliyuncs.com`
/// - Tencent COS: `https://cos.{region}.myqcloud.com`
/// - Minio: `http://127.0.0.1:9000`
///
/// If user inputs endpoint without scheme like "s3.amazonaws.com", we
/// will prepend "https://" before it.
///
/// - If endpoint is set, we will take user's input first.
/// - If not, we will try to load it from environment.
/// - If still not set, default to `https://s3.amazonaws.com`.
pub endpoint: Option<String>,
/// Region represent the signing region of this endpoint. This is required
/// if you are using the default AWS S3 endpoint.
///
/// If using a custom endpoint,
/// - If region is set, we will take user's input first.
/// - If not, we will try to load it from environment.
pub region: Option<String>,
/// access_key_id of this backend.
///
/// - If access_key_id is set, we will take user's input first.
/// - If not, we will try to load it from environment.
pub access_key_id: Option<String>,
/// secret_access_key of this backend.
///
/// - If secret_access_key is set, we will take user's input first.
/// - If not, we will try to load it from environment.
pub secret_access_key: Option<String>,
/// session_token (aka, security token) of this backend.
///
/// This token will expire after sometime, it's recommended to set session_token
/// by hand.
pub session_token: Option<String>,
/// role_arn for this backend.
///
/// If `role_arn` is set, we will use already known config as source
/// credential to assume role with `role_arn`.
pub role_arn: Option<String>,
/// external_id for this backend.
pub external_id: Option<String>,
/// role_session_name for this backend.
pub role_session_name: Option<String>,
/// Disable config load so that opendal will not load config from
/// environment.
///
/// For examples:
///
/// - envs like `AWS_ACCESS_KEY_ID`
/// - files like `~/.aws/config`
pub disable_config_load: bool,
/// Disable load credential from ec2 metadata.
///
/// This option is used to disable the default behavior of opendal
/// to load credential from ec2 metadata, a.k.a, IMDSv2
pub disable_ec2_metadata: bool,
/// Allow anonymous will allow opendal to send request without signing
/// when credential is not loaded.
pub allow_anonymous: bool,
/// server_side_encryption for this backend.
///
/// Available values: `AES256`, `aws:kms`.
pub server_side_encryption: Option<String>,
/// server_side_encryption_aws_kms_key_id for this backend
///
/// - If `server_side_encryption` set to `aws:kms`, and `server_side_encryption_aws_kms_key_id`
/// is not set, S3 will use aws managed kms key to encrypt data.
/// - If `server_side_encryption` set to `aws:kms`, and `server_side_encryption_aws_kms_key_id`
/// is a valid kms key id, S3 will use the provided kms key to encrypt data.
/// - If the `server_side_encryption_aws_kms_key_id` is invalid or not found, an error will be
/// returned.
/// - If `server_side_encryption` is not `aws:kms`, setting `server_side_encryption_aws_kms_key_id`
/// is a noop.
pub server_side_encryption_aws_kms_key_id: Option<String>,
/// server_side_encryption_customer_algorithm for this backend.
///
/// Available values: `AES256`.
pub server_side_encryption_customer_algorithm: Option<String>,
/// server_side_encryption_customer_key for this backend.
///
/// # Value
///
/// base64 encoded key that matches algorithm specified in
/// `server_side_encryption_customer_algorithm`.
pub server_side_encryption_customer_key: Option<String>,
/// Set server_side_encryption_customer_key_md5 for this backend.
///
/// # Value
///
/// MD5 digest of key specified in `server_side_encryption_customer_key`.
pub server_side_encryption_customer_key_md5: Option<String>,
/// default storage_class for this backend.
///
/// Available values:
/// - `DEEP_ARCHIVE`
/// - `GLACIER`
/// - `GLACIER_IR`
/// - `INTELLIGENT_TIERING`
/// - `ONEZONE_IA`
/// - `OUTPOSTS`
/// - `REDUCED_REDUNDANCY`
/// - `STANDARD`
/// - `STANDARD_IA`
///
/// S3 compatible services don't support all of them
pub default_storage_class: Option<String>,
/// Enable virtual host style so that opendal will send API requests
/// in virtual host style instead of path style.
///
/// - By default, opendal will send API to `https://s3.us-east-1.amazonaws.com/bucket_name`
/// - Enabled, opendal will send API to `https://bucket_name.s3.us-east-1.amazonaws.com`
pub enable_virtual_host_style: bool,
/// Set maximum batch operations of this backend.
///
/// Some compatible services have a limit on the number of operations in a batch request.
/// For example, R2 could return `Internal Error` while batch delete 1000 files.
///
/// Please tune this value based on services' document.
#[deprecated(
since = "0.52.0",
note = "Please use `delete_max_size` instead of `batch_max_operations`"
)]
pub batch_max_operations: Option<usize>,
/// Set the maximum delete size of this backend.
///
/// Some compatible services have a limit on the number of operations in a batch request.
/// For example, R2 could return `Internal Error` while batch delete 1000 files.
///
/// Please tune this value based on services' document.
pub delete_max_size: Option<usize>,
/// Disable stat with override so that opendal will not send stat request with override queries.
///
/// For example, R2 doesn't support stat with `response_content_type` query.
pub disable_stat_with_override: bool,
/// Checksum Algorithm to use when sending checksums in HTTP headers.
/// This is necessary when writing to AWS S3 Buckets with Object Lock enabled for example.
///
/// Available options:
/// - "crc32c"
pub checksum_algorithm: Option<String>,
/// Disable write with if match so that opendal will not send write request with if match headers.
///
/// For example, Ceph RADOS S3 doesn't support write with if match.
pub disable_write_with_if_match: bool,
}
impl Debug for S3Config {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut d = f.debug_struct("S3Config");
d.field("root", &self.root)
.field("bucket", &self.bucket)
.field("endpoint", &self.endpoint)
.field("region", &self.region);
d.finish_non_exhaustive()
}
}
"#;
let parser = ServiceParser {
service: "s3".to_string(),
path: "test".to_string(),
content: content.to_string(),
};
let service = parser.parse().unwrap();
assert_eq!(service.config.len(), 26);
assert_eq!(
service.config[21],
Config {
name: "batch_max_operations".to_string(),
value: ConfigType::Usize,
optional: true,
deprecated: Some(AttrDeprecated {
since: "0.52.0".to_string(),
note: "Please use `delete_max_size` instead of `batch_max_operations`".into(),
}),
comments: r"Set maximum batch operations of this backend.
Some compatible services have a limit on the number of operations in a batch request.
For example, R2 could return `Internal Error` while batch delete 1000 files.
Please tune this value based on services' document."
.to_string(),
},
);
assert_eq!(
service.config[25],
Config {
name: "disable_write_with_if_match".to_string(),
value: ConfigType::Bool,
optional: true,
deprecated: None,
comments: r"Disable write with if match so that opendal will not send write request with if match headers.
For example, Ceph RADOS S3 doesn't support write with if match.".to_string(),
},
);
}
#[test]
fn test_parse() {
let path = workspace_dir()
.join("core/core/src/services")
.canonicalize()
.unwrap();
// Parse should just pass.
let _ = parse(&path.to_string_lossy()).unwrap();
}
}