blob: b2bc42d5e700a34bd2219bb963b5560dfbac7260 [file] [log] [blame]
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/**
* @file configs.cc
* @brief Plugin configuration.
*/
#include <fstream> /* std::ifstream */
#include <sstream> /* std::istringstream */
#include <getopt.h> /* getopt_long() */
#include <strings.h> /* strncasecmp() */
#include <cstring> /* strlen() */
#include "configs.h"
template <typename ContainerType>
static void
commaSeparateString(ContainerType &c, const String &input)
{
std::istringstream istr(input);
String token;
while (std::getline(istr, token, ',')) {
c.insert(c.end(), token);
}
}
static bool
isTrue(const char *arg)
{
return (nullptr == arg || 0 == strncasecmp("true", arg, 4) || 0 == strncasecmp("1", arg, 1) || 0 == strncasecmp("yes", arg, 3));
}
void
ConfigElements::setExclude(const char *arg)
{
::commaSeparateString<StringSet>(_exclude, arg);
}
void
ConfigElements::setInclude(const char *arg)
{
::commaSeparateString<StringSet>(_include, arg);
}
static void
setPattern(MultiPattern &multiPattern, const char *arg)
{
Pattern *p = new Pattern();
if (nullptr != p && p->init(arg)) {
multiPattern.add(p);
} else {
delete p;
}
}
bool
ConfigElements::setCapture(const String &name, const String &pattern)
{
auto it = _captures.find(name);
if (_captures.end() == it) {
auto mp = new MultiPattern(name);
if (nullptr != mp) {
_captures[name] = mp;
} else {
return false;
}
}
setPattern(*_captures[name], pattern.c_str());
CacheKeyDebug("added capture pattern '%s' for element '%s'", pattern.c_str(), name.c_str());
return true;
}
void
ConfigElements::addCapture(const char *arg)
{
StringView args(arg);
StringView::size_type pos = args.find_first_of(':');
if (StringView::npos != pos) {
String name(args.substr(0, pos));
if (!name.empty()) {
String pattern(args.substr(pos + 1));
if (!pattern.empty()) {
if (!setCapture(name, pattern)) {
CacheKeyError("failed to add capture: '%s'", arg);
}
} else {
CacheKeyError("missing pattern in capture: '%s'", arg);
}
} else {
CacheKeyError("missing element name in capture: %s", arg);
}
} else {
CacheKeyError("invalid capture: %s, should be 'name:<capture_definition>", arg);
}
}
void
ConfigElements::setExcludePatterns(const char *arg)
{
setPattern(_excludePatterns, arg);
}
void
ConfigElements::setIncludePatterns(const char *arg)
{
setPattern(_includePatterns, arg);
}
void
ConfigElements::setSort(const char *arg)
{
_sort = ::isTrue(arg);
}
void
ConfigElements::setRemove(const char *arg)
{
_remove = ::isTrue(arg);
}
bool
ConfigElements::toBeRemoved() const
{
return _remove;
}
bool
ConfigElements::toBeSkipped() const
{
return _skip;
}
bool
ConfigElements::toBeSorted() const
{
return _sort;
}
bool
ConfigElements::toBeAdded(const String &element) const
{
/* Exclude the element if it is in the exclusion list. If the list is empty don't exclude anything. */
bool exclude = (!_exclude.empty() && _exclude.find(element) != _exclude.end()) ||
(!_excludePatterns.empty() && _excludePatterns.match(element));
CacheKeyDebug("%s '%s' %s the 'exclude' rule", name().c_str(), element.c_str(), exclude ? "matches" : "does not match");
/* Include the element only if it is in the inclusion list. If the list is empty include everything. */
bool include =
((_include.empty() && _includePatterns.empty()) || _include.find(element) != _include.end()) || _includePatterns.match(element);
CacheKeyDebug("%s '%s' %s the 'include' rule", name().c_str(), element.c_str(), include ? "matches" : "do not match");
if (include && !exclude) {
CacheKeyDebug("%s '%s' should be added to cache key", name().c_str(), element.c_str());
return true;
}
CacheKeyDebug("%s '%s' should not be added to cache key", name().c_str(), element.c_str());
return false;
}
inline bool
ConfigElements::noIncludeExcludeRules() const
{
return _exclude.empty() && _excludePatterns.empty() && _include.empty() && _includePatterns.empty();
}
ConfigElements::~ConfigElements()
{
for (auto &_capture : _captures) {
delete _capture.second;
}
}
/**
* @brief finalizes the query parameters related configuration.
*
* If we don't have any inclusions or exclusions and don't have to sort, we don't need to do anything
* with the query string. Include the whole original query in the cache key.
*/
bool
ConfigQuery::finalize()
{
_skip = noIncludeExcludeRules() && !_sort;
return true;
}
const String ConfigQuery::_NAME = "query parameter";
inline const String &
ConfigQuery::name() const
{
return _NAME;
}
/**
* @briefs finalizes the headers related configuration.
*
* If the all include and exclude lists are empty, including patterns, then there is no headers to be included.
*/
bool
ConfigHeaders::finalize()
{
_remove = noIncludeExcludeRules();
return true;
}
const String ConfigHeaders::_NAME = "header";
inline const String &
ConfigHeaders::name() const
{
return _NAME;
}
/**
* @brief finalizes the cookies related configuration.
*
* If the all include and exclude lists are empty, including pattern, then there is no cookies to be included.
*/
bool
ConfigCookies::finalize()
{
_remove = noIncludeExcludeRules();
return true;
}
const String ConfigCookies::_NAME = "cookie";
inline const String &
ConfigCookies::name() const
{
return _NAME;
}
/**
* @brief Accessor method for getting include list only for headers config.
*
* We would not need to drill this hole in the design if there was an efficient way to iterate through the headers in the traffic
* server API (inefficiency mentioned in ts/ts.h), iterating through the "include" list should be good enough work-around.
*/
const StringSet &
ConfigHeaders::getInclude() const
{
return _include;
}
/**
* @brief Rebase a relative path onto the configuration directory.
*/
static String
makeConfigPath(const String &path)
{
if (path.empty() || path[0] == '/') {
return path;
}
return String(TSConfigDirGet()) + "/" + path;
}
/**
* @brief a helper function which loads the classifier from files.
* @param args classname + filename in '<classname>:<filename>' format.
* @param denylist true - load as a denylist classifier, false - allowlist.
* @return true if successful, false otherwise.
*/
bool
Configs::loadClassifiers(const String &args, bool denylist)
{
static const char *EXPECTED_FORMAT = "<classname>:<filename>";
std::size_t d = args.find(':');
if (String::npos == d) {
CacheKeyError("failed to parse classifier string '%s', expected format: '%s'", optarg ? optarg : "null", EXPECTED_FORMAT);
return false;
}
String classname(optarg, 0, d);
String filename(optarg, d + 1, String::npos);
if (classname.empty() || filename.empty()) {
CacheKeyError("'<classname>' and '<filename>' in '%s' cannot be empty, expected format: '%s'", optarg ? optarg : "null",
EXPECTED_FORMAT);
return false;
}
String path(makeConfigPath(filename));
std::ifstream ifstr;
String regex;
unsigned lineno = 0;
ifstr.open(path.c_str());
if (!ifstr) {
CacheKeyError("failed to load classifier '%s' from '%s'", classname.c_str(), path.c_str());
return false;
}
MultiPattern *multiPattern;
if (denylist) {
multiPattern = new NonMatchingMultiPattern(classname);
} else {
multiPattern = new MultiPattern(classname);
}
if (nullptr == multiPattern) {
CacheKeyError("failed to allocate classifier '%s'", classname.c_str());
return false;
}
CacheKeyDebug("loading classifier '%s' from '%s'", classname.c_str(), path.c_str());
while (std::getline(ifstr, regex)) {
Pattern *p;
String::size_type pos;
++lineno;
// Allow #-prefixed comments.
pos = regex.find_first_of('#');
if (pos != String::npos) {
regex.resize(pos);
}
if (regex.empty()) {
continue;
}
p = new Pattern();
if (nullptr != p && p->init(regex)) {
if (denylist) {
CacheKeyDebug("Added pattern '%s' to deny list '%s'", regex.c_str(), classname.c_str());
multiPattern->add(p);
} else {
CacheKeyDebug("Added pattern '%s' to allow list '%s'", regex.c_str(), classname.c_str());
multiPattern->add(p);
}
} else {
CacheKeyError("%s:%u: failed to parse regex '%s'", path.c_str(), lineno, regex.c_str());
delete p;
}
}
ifstr.close();
if (!multiPattern->empty()) {
_classifier.add(multiPattern);
} else {
delete multiPattern;
}
return true;
}
/**
* @brief initializes plugin configuration.
* @param argc number of plugin parameters
* @param argv plugin parameters
* @param perRemapConfig boolean showing if this is per-remap config (vs global config).
*
*/
bool
Configs::init(int argc, const char *argv[], bool perRemapConfig)
{
static const struct option longopt[] = {
{const_cast<char *>("exclude-params"), optional_argument, nullptr, 'a'},
{const_cast<char *>("include-params"), optional_argument, nullptr, 'b'},
{const_cast<char *>("include-match-params"), optional_argument, nullptr, 'c'},
{const_cast<char *>("exclude-match-params"), optional_argument, nullptr, 'd'},
{const_cast<char *>("sort-params"), optional_argument, nullptr, 'e'},
{const_cast<char *>("remove-all-params"), optional_argument, nullptr, 'f'},
{const_cast<char *>("include-headers"), optional_argument, nullptr, 'g'},
{const_cast<char *>("include-cookies"), optional_argument, nullptr, 'h'},
{const_cast<char *>("ua-capture"), optional_argument, nullptr, 'i'},
{const_cast<char *>("ua-allowlist"), optional_argument, nullptr, 'j'},
{const_cast<char *>("ua-denylist"), optional_argument, nullptr, 'k'},
{const_cast<char *>("static-prefix"), optional_argument, nullptr, 'l'},
{const_cast<char *>("capture-prefix"), optional_argument, nullptr, 'm'},
{const_cast<char *>("capture-prefix-uri"), optional_argument, nullptr, 'n'},
{const_cast<char *>("capture-path"), optional_argument, nullptr, 'o'},
{const_cast<char *>("capture-path-uri"), optional_argument, nullptr, 'p'},
{const_cast<char *>("remove-prefix"), optional_argument, nullptr, 'q'},
{const_cast<char *>("remove-path"), optional_argument, nullptr, 'r'},
{const_cast<char *>("separator"), optional_argument, nullptr, 's'},
{const_cast<char *>("uri-type"), optional_argument, nullptr, 't'},
{const_cast<char *>("key-type"), optional_argument, nullptr, 'u'},
{const_cast<char *>("capture-header"), optional_argument, nullptr, 'v'},
{const_cast<char *>("canonical-prefix"), optional_argument, nullptr, 'w'},
/* reserve 'z' for 'config' files */
{nullptr, 0, nullptr, 0},
};
bool status = true;
/* For remap.config: argv contains the "to" and "from" URLs. Skip the first so that the second one poses as the program name.
* For plugin.config: argv contains the plugin shared object name. Don't skip any */
if (perRemapConfig) {
argc--;
argv++;
}
for (;;) {
int opt;
opt = getopt_long(argc, const_cast<char *const *>(argv), "", longopt, nullptr);
if (opt == -1) {
break;
}
CacheKeyDebug("processing %s", argv[optind - 1]);
switch (opt) {
case 'a': /* exclude-params */
_query.setExclude(optarg);
break;
case 'b': /* include-params */
_query.setInclude(optarg);
break;
case 'c': /* include-match-params */
_query.setIncludePatterns(optarg);
break;
case 'd': /* exclude-match-params */
_query.setExcludePatterns(optarg);
break;
case 'e': /* sort-params */
_query.setSort(optarg);
break;
case 'f': /* remove-all-params */
_query.setRemove(optarg);
break;
case 'g': /* include-headers */
_headers.setInclude(optarg);
break;
case 'h': /* include-cookies */
_cookies.setInclude(optarg);
break;
case 'i': /* ua-capture */
if (!_uaCapture.init(optarg)) {
CacheKeyError("failed to initialize User-Agent capture pattern '%s'", optarg);
status = false;
}
break;
case 'j': /* ua-allowlist */
if (!loadClassifiers(optarg, /* denylist = */ false)) {
CacheKeyError("failed to load User-Agent pattern allow-list '%s'", optarg);
status = false;
}
break;
case 'k': /* ua-denylist */
if (!loadClassifiers(optarg, /* denylist = */ true)) {
CacheKeyError("failed to load User-Agent pattern deny-list '%s'", optarg);
status = false;
}
break;
case 'l': /* static-prefix */
_prefix.assign(optarg);
CacheKeyDebug("prefix='%s'", _prefix.c_str());
break;
case 'm': /* capture-prefix */
if (!_prefixCapture.init(optarg)) {
CacheKeyError("failed to initialize prefix URI host:port capture pattern '%s'", optarg);
status = false;
}
break;
case 'n': /* capture-prefix-uri */
if (!_prefixCaptureUri.init(optarg)) {
CacheKeyError("failed to initialize prefix URI capture pattern '%s'", optarg);
status = false;
}
break;
case 'o': /* capture-path */
if (!_pathCapture.init(optarg)) {
CacheKeyError("failed to initialize path capture pattern '%s'", optarg);
status = false;
}
break;
case 'p': /* capture-path-uri */
if (!_pathCaptureUri.init(optarg)) {
CacheKeyError("failed to initialize path URI capture pattern '%s'", optarg);
status = false;
}
break;
case 'q': /* remove-prefix */
_prefixToBeRemoved = isTrue(optarg);
break;
case 'r': /* remove-path */
_pathToBeRemoved = isTrue(optarg);
break;
case 's': /* separator */
setSeparator(optarg);
break;
case 't': /* uri-type */
setUriType(optarg);
break;
case 'u': /* key-type */
setKeyType(optarg);
break;
case 'v': /* capture-header */
_headers.addCapture(optarg);
break;
case 'w': /* canonical-prefix */
_canonicalPrefix = isTrue(optarg);
break;
}
}
status &= finalize();
return status;
}
/**
* @brief provides means for post-processing of the plugin parameters to finalize the configuration or to "cache" some of the
* decisions for later use.
* @return true if successful, false if failure.
*/
bool
Configs::finalize()
{
if (_keyTypes.empty()) {
CacheKeyDebug("setting cache key");
_keyTypes = {CACHE_KEY};
}
return _query.finalize() && _headers.finalize() && _cookies.finalize();
}
bool
Configs::prefixToBeRemoved()
{
return _prefixToBeRemoved;
}
bool
Configs::pathToBeRemoved()
{
return _pathToBeRemoved;
}
bool
Configs::canonicalPrefix()
{
return _canonicalPrefix;
}
void
Configs::setSeparator(const char *arg)
{
if (nullptr != arg) {
_separator.assign(arg);
}
}
const String &
Configs::getSeparator()
{
return _separator;
}
void
Configs::setUriType(const char *arg)
{
if (nullptr != arg) {
if (5 == strlen(arg) && 0 == strncasecmp(arg, "remap", 5)) {
_uriType = CacheKeyUriType::REMAP;
CacheKeyDebug("using remap URI type");
} else if (8 == strlen(arg) && 0 == strncasecmp(arg, "pristine", 8)) {
_uriType = CacheKeyUriType::PRISTINE;
CacheKeyDebug("using pristine URI type");
} else {
CacheKeyError("unrecognized URI type '%s', using default 'remap'", arg);
}
} else {
CacheKeyError("found an empty URI type, using default 'remap'");
}
}
void
Configs::setKeyType(const char *arg)
{
if (nullptr != arg) {
StringVector types;
::commaSeparateString<StringVector>(types, arg);
for (auto type : types) {
if (9 == type.length() && 0 == strncasecmp(type.c_str(), "cache_key", 9)) {
_keyTypes.insert(CacheKeyKeyType::CACHE_KEY);
CacheKeyDebug("setting cache key");
} else if (20 == type.length() && 0 == strncasecmp(type.c_str(), "parent_selection_url", 20)) {
_keyTypes.insert(CacheKeyKeyType::PARENT_SELECTION_URL);
CacheKeyDebug("setting parent selection URL");
} else {
CacheKeyError("unrecognized key type '%s', using default 'cache_key'", arg);
}
}
} else {
CacheKeyError("found an empty key type, using default 'cache_key'");
}
}
CacheKeyUriType
Configs::getUriType()
{
return _uriType;
}
CacheKeyKeyTypeSet &
Configs::getKeyType()
{
return _keyTypes;
}
const char *
getCacheKeyUriTypeName(CacheKeyUriType type)
{
switch (type) {
case REMAP:
return "remap";
case PRISTINE:
return "pristine";
default:
return "unknown";
}
}
const char *
getCacheKeyKeyTypeName(CacheKeyKeyType type)
{
switch (type) {
case CACHE_KEY:
return "cache key";
case PARENT_SELECTION_URL:
return "parent selection url";
default:
return "unknown";
}
}