blob: 7233f0f2b6b20ef8a07504f6541697e40991b4db [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils/file/FilePattern.h"
#include "utils/file/FileUtils.h"
#include "utils/StringUtils.h"
#include "core/logging/LoggerConfiguration.h"
namespace org::apache::nifi::minifi::utils::file {
static bool isGlobPattern(const std::string& pattern) {
return pattern.find_first_of("?*") != std::string::npos;
}
FilePattern::FilePatternSegment::FilePatternSegment(std::string pattern) {
pattern = utils::StringUtils::trim(pattern);
excluding_ = false;
if (!pattern.empty() && pattern[0] == '!') {
excluding_ = true;
pattern = utils::StringUtils::trim(pattern.substr(1));
}
if (pattern.empty()) {
throw FilePatternSegmentError("Empty pattern");
}
std::filesystem::path exe_dir = get_executable_dir();
std::filesystem::path path = pattern;
if (!exe_dir.is_absolute() && path.is_relative()) {
throw FilePatternSegmentError("Couldn't determine executable dir, relative pattern not supported");
}
path = exe_dir / path;
file_pattern_ = path.filename().string();
if (file_pattern_.empty()) {
throw FilePatternSegmentError("Empty file pattern");
}
if (file_pattern_ == "**") {
file_pattern_ = "*";
// include the "**" in the directory pattern
directory_pattern_ = path;
} else {
directory_pattern_ = path.parent_path();
}
if (file_pattern_ == "." || file_pattern_ == "..") {
throw FilePatternSegmentError("Invalid file pattern '" + file_pattern_ + "'");
}
bool after_wildcard = false;
for (const auto& segment : directory_pattern_) {
if (after_wildcard && segment == "..") {
throw FilePatternSegmentError("Parent accessor is not supported after wildcards");
}
if (isGlobPattern(segment.string())) {
after_wildcard = true;
}
}
}
std::filesystem::path FilePattern::FilePatternSegment::getBaseDirectory() const {
std::filesystem::path base_dir;
for (const auto& segment : directory_pattern_) {
// ignore segments at or after wildcards
if (isGlobPattern(segment.string())) {
break;
}
base_dir /= segment;
}
return base_dir;
}
FilePattern::FilePattern(const std::string &pattern, const ErrorHandler& error_handler) {
for (const auto& segment : StringUtils::split(pattern, ",")) {
try {
segments_.emplace_back(segment);
} catch (const FilePatternSegmentError& segment_error) {
error_handler(segment, segment_error.what());
}
}
}
template<typename It>
static bool advance_if_not_equal(It& it, const It& end) {
if (it == end) {
return false;
}
++it;
return true;
}
static bool is_this_dir(const std::filesystem::path& dir) {
return dir.empty() || dir == ".";
}
template<typename It, typename Fn>
static void skip_if(It& it, const It& end, const Fn& fn) {
while (it != end && fn(*it)) {
++it;
}
}
static bool matchGlob(std::string_view pattern, std::string_view value) {
// match * and ?
size_t value_idx = 0;
for (size_t pattern_idx = 0; pattern_idx != pattern.length(); ++pattern_idx) {
if (pattern[pattern_idx] == '*') {
do {
if (matchGlob(pattern.substr(pattern_idx + 1), value.substr(value_idx))) {
return true;
}
} while (advance_if_not_equal(value_idx, value.length()));
return false;
}
if (value_idx == value.length()) {
return false;
}
if (pattern[pattern_idx] != '?' && pattern[pattern_idx] != value[value_idx]) {
return false;
}
++value_idx;
}
return value_idx == value.length();
}
auto FilePattern::FilePatternSegment::matchDirectory(DirIt pattern_it, const DirIt& pattern_end, DirIt value_it, const DirIt& value_end) -> DirMatchResult {
for (; pattern_it != pattern_end; ++pattern_it) {
if (is_this_dir(*pattern_it)) {
continue;
}
if (*pattern_it == "**") {
if (std::next(pattern_it) == pattern_end) {
return DirMatchResult::TREE;
}
bool matched_parent = false;
// any number of nested directories
do {
skip_if(value_it, value_end, is_this_dir);
auto result = matchDirectory(std::next(pattern_it), pattern_end, value_it, value_end);
if (result == DirMatchResult::TREE || result == DirMatchResult::EXACT) {
return result;
}
if (result == DirMatchResult::PARENT) {
// even though we have a parent match, there may be a "better" (exact, tree) match
matched_parent = true;
}
} while (advance_if_not_equal(value_it, value_end));
if (matched_parent) {
return DirMatchResult::PARENT;
}
return DirMatchResult::NONE;
}
skip_if(value_it, value_end, is_this_dir);
if (value_it == value_end) {
// we used up all the value segments but there are still pattern segments
return DirMatchResult::PARENT;
}
if (!matchGlob(pattern_it->string(), value_it->string())) {
return DirMatchResult::NONE;
}
++value_it;
}
skip_if(value_it, value_end, is_this_dir);
if (value_it == value_end) {
// used up all pattern and value segments
return DirMatchResult::EXACT;
} else {
// used up all pattern segments, but we still have value segments
return DirMatchResult::NONE;
}
}
auto FilePattern::FilePatternSegment::matchDir(const std::filesystem::path& directory) const -> MatchResult {
auto result = matchDirectory(directory_pattern_.begin(), directory_pattern_.end(), directory.begin(), directory.end());
if (excluding_) {
if (result == DirMatchResult::TREE && file_pattern_ == "*") {
// all files are excluded in this directory
return MatchResult::EXCLUDE;
}
return MatchResult::NOT_MATCHING;
}
return result != DirMatchResult::NONE ? MatchResult::INCLUDE : MatchResult::NOT_MATCHING;
}
auto FilePattern::FilePatternSegment::matchFile(const std::filesystem::path& directory, const std::filesystem::path& filename) const -> MatchResult {
auto result = matchDirectory(directory_pattern_.begin(), directory_pattern_.end(), directory.begin(), directory.end());
if (result != DirMatchResult::EXACT && result != DirMatchResult::TREE) {
// we only match a file if the directory fully matches
return MatchResult::NOT_MATCHING;
}
if (matchGlob(file_pattern_, filename.string())) {
return excluding_ ? MatchResult::EXCLUDE : MatchResult::INCLUDE;
}
return MatchResult::NOT_MATCHING;
}
auto FilePattern::FilePatternSegment::match(const std::filesystem::path& path) const -> MatchResult {
if (path.has_filename()) {
return matchFile(path.parent_path(), path.filename());
}
return matchDir(path.parent_path());
}
static std::shared_ptr<core::logging::Logger> logger = core::logging::LoggerFactory<FilePattern>::getLogger();
std::set<std::filesystem::path> match(const FilePattern& pattern) {
using FilePatternSegment = FilePattern::FilePatternSegment;
std::set<std::filesystem::path> files;
for (auto it = pattern.segments_.begin(); it != pattern.segments_.end(); ++it) {
if (it->isExcluding()) continue;
const auto match_file = [&] (const std::filesystem::path& dir, const std::filesystem::path& file) -> bool {
if (it->matchFile(dir, file) != FilePatternSegment::MatchResult::INCLUDE) {
// our main pattern does not explicitly command us to process this file
// keep iterating
return true;
}
// check all subsequent patterns in reverse (later ones have higher precedence)
for (auto rit = pattern.segments_.rbegin(); rit.base() != it + 1; ++rit) {
const auto result = rit->matchFile(dir, file);
if (result == FilePatternSegment::MatchResult::INCLUDE) {
break;
} else if (result == FilePatternSegment::MatchResult::EXCLUDE) {
// keep on processing the rest of the files in the current directory
return true;
}
}
files.insert(dir / file);
return true;
};
const auto descend_into_directory = [&] (const std::filesystem::path& dir) -> bool {
if (it->matchDir(dir) != FilePatternSegment::MatchResult::INCLUDE) {
// our main pattern does not explicitly command us to process this directory
// do not descend into this directory
return false;
}
// check all subsequent patterns in reverse (later ones have higher precedence)
for (auto rit = pattern.segments_.rbegin(); rit.base() != it + 1; ++rit) {
const auto result = rit->matchDir(dir);
if (result == FilePatternSegment::MatchResult::INCLUDE) {
break;
} else if (result == FilePatternSegment::MatchResult::EXCLUDE) {
// do not descend into this directory
return false;
}
}
return true;
};
list_dir(it->getBaseDirectory().string(), match_file, logger, descend_into_directory);
}
return files;
}
} // namespace org::apache::nifi::minifi::utils::file