blob: e73b184fc15c90f12fcc970c3060931921f1f40c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS};
use std::borrow::Cow;
use crate::path::DELIMITER_BYTE;
use snafu::Snafu;
/// Error returned by [`PathPart::parse`]
#[derive(Debug, Snafu)]
#[snafu(display("Invalid path segment - got \"{}\" expected: \"{}\"", actual, expected))]
#[allow(missing_copy_implementations)]
pub struct InvalidPart {
actual: String,
expected: String,
}
/// The PathPart type exists to validate the directory/file names that form part
/// of a path.
///
/// A PathPart instance is guaranteed to to contain no illegal characters (e.g. `/`)
/// as it can only be constructed by going through the `from` impl.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct PathPart<'a> {
pub(super) raw: Cow<'a, str>,
}
impl<'a> PathPart<'a> {
/// Parse the provided path segment as a [`PathPart`] returning an error if invalid
pub fn parse(segment: &'a str) -> Result<Self, InvalidPart> {
let decoded: Cow<'a, [u8]> = percent_decode(segment.as_bytes()).into();
let part = PathPart::from(decoded.as_ref());
if segment != part.as_ref() {
return Err(InvalidPart {
actual: segment.to_string(),
expected: part.raw.to_string(),
});
}
Ok(Self {
raw: segment.into(),
})
}
}
/// Characters we want to encode.
const INVALID: &AsciiSet = &CONTROLS
// The delimiter we are reserving for internal hierarchy
.add(DELIMITER_BYTE)
// Characters AWS recommends avoiding for object keys
// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
.add(b'\\')
.add(b'{')
.add(b'^')
.add(b'}')
.add(b'%')
.add(b'`')
.add(b']')
.add(b'"') // " <-- my editor is confused about double quotes within single quotes
.add(b'>')
.add(b'[')
.add(b'~')
.add(b'<')
.add(b'#')
.add(b'|')
// Characters Google Cloud Storage recommends avoiding for object names
// https://cloud.google.com/storage/docs/naming-objects
.add(b'\r')
.add(b'\n')
.add(b'*')
.add(b'?');
impl<'a> From<&'a [u8]> for PathPart<'a> {
fn from(v: &'a [u8]) -> Self {
let inner = match v {
// We don't want to encode `.` generally, but we do want to disallow parts of paths
// to be equal to `.` or `..` to prevent file system traversal shenanigans.
b"." => "%2E".into(),
b".." => "%2E%2E".into(),
other => percent_encode(other, INVALID).into(),
};
Self { raw: inner }
}
}
impl<'a> From<&'a str> for PathPart<'a> {
fn from(v: &'a str) -> Self {
Self::from(v.as_bytes())
}
}
impl From<String> for PathPart<'static> {
fn from(s: String) -> Self {
Self {
raw: Cow::Owned(PathPart::from(s.as_str()).raw.into_owned()),
}
}
}
impl<'a> AsRef<str> for PathPart<'a> {
fn as_ref(&self) -> &str {
self.raw.as_ref()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn path_part_delimiter_gets_encoded() {
let part: PathPart<'_> = "foo/bar".into();
assert_eq!(part.raw, "foo%2Fbar");
}
#[test]
fn path_part_given_already_encoded_string() {
let part: PathPart<'_> = "foo%2Fbar".into();
assert_eq!(part.raw, "foo%252Fbar");
}
#[test]
fn path_part_cant_be_one_dot() {
let part: PathPart<'_> = ".".into();
assert_eq!(part.raw, "%2E");
}
#[test]
fn path_part_cant_be_two_dots() {
let part: PathPart<'_> = "..".into();
assert_eq!(part.raw, "%2E%2E");
}
}