blob: 497d944368ba0e36a742fe525425c39e21897bec [file] [log] [blame]
/*
* Copyright 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: morlovich@google.com (Maksim Orlovich)
//
// This provides basic parsing and evaluation of a (subset of)
// Content-Security-Policy that's relevant for PageSpeed Automatic.
// CspContext is the main class.
//
// Limitations versus the full spec:
// 1) We don't fully parse some kinds of source expressions, like nonce and
// hash ones.
// 2) Only some of the directives are parsed.
// 3) URL matching doesn't support WebSocket (ws: and wss:) schemes, since
// mod_pagespeed doesn't, and they make for some really ugly conditionals.
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CSP_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CSP_H_
#include <memory>
#include <string>
#include <vector>
#include "net/instaweb/rewriter/public/csp_directive.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/http/google_url.h"
namespace net_instaweb {
class CspSourceExpression {
public:
enum Kind {
kSelf, kSchemeSource, kHostSource,
kUnsafeInline, kUnsafeEval, kStrictDynamic, kUnsafeHashedAttributes,
kHashOrNonce, kUnknown
};
struct UrlData {
UrlData() : path_exact_match(false) {}
// Constructor for tests, assumes already normalized.
UrlData(StringPiece in_scheme, StringPiece in_host,
StringPiece in_port, StringPiece in_path,
bool exact_match = false)
: scheme_part(in_scheme.as_string()),
host_part(in_host.as_string()),
port_part(in_port.as_string()),
path_exact_match(exact_match) {
StringPieceVector portions;
SplitStringPieceToVector(in_path, "/", &portions, true);
for (StringPiece p : portions) {
path_part.push_back(p.as_string());
}
}
// All the components here are stored in a manner that matches the way
// GoogleUrl stores their corresponding portions, to make it easy to
// compare against incoming URLs:
// 1) The case-insensitive scheme and host portions are lowercased.
// 2) The case-sensitive path doesn't have its case changed, but the
// % escaping is normalized. We also pre-split it since we have
// to check per-component.
GoogleString scheme_part; // doesn't include :
GoogleString host_part;
GoogleString port_part;
// separated by /
std::vector<GoogleString> path_part;
bool path_exact_match;
GoogleString DebugString() const {
return StrCat("scheme:", scheme_part, " host:", host_part,
" port:", port_part,
" path:", JoinCollection(path_part, "/"),
" path_exact_match:", BoolToString(path_exact_match));
}
// For convenience of unit testing.
bool operator==(const UrlData& other) const {
return scheme_part == other.scheme_part &&
host_part == other.host_part &&
port_part == other.port_part &&
path_part == other.path_part &&
path_exact_match == other.path_exact_match;
}
};
CspSourceExpression() : kind_(kUnknown) {}
explicit CspSourceExpression(Kind kind): kind_(kind) {}
CspSourceExpression(Kind kind, const UrlData& url_data) : kind_(kind) {
*mutable_url_data() = url_data;
}
static CspSourceExpression Parse(StringPiece input);
bool Matches(const GoogleUrl& origin_url, const GoogleUrl& url) const;
GoogleString DebugString() const {
return StrCat("kind:", IntegerToString(kind_),
" url_data:{", url_data().DebugString(), "}");
}
bool operator==(const CspSourceExpression& other) const {
return (kind_ == other.kind_) && (url_data() == other.url_data());
}
Kind kind() const { return kind_; }
const UrlData& url_data() const {
if (url_data_.get() == nullptr) {
url_data_.reset(new UrlData());
}
return *url_data_.get();
}
private:
// input here is without the quotes, and non-empty.
static CspSourceExpression ParseQuoted(StringPiece input);
// Returns true if input matches the base64-value production in CSP spec.
static bool ParseBase64(StringPiece input);
// Tries to see if the input is either an entire scheme-source, or the
// scheme-part portion of a host-source, filling in url_data->scheme_part
// appropriately. Returns true only if this is a scheme-source, however.
bool TryParseScheme(StringPiece* input);
static bool HasDefaultPortForScheme(const GoogleUrl& url);
UrlData* mutable_url_data() {
if (url_data_.get() == nullptr) {
url_data_.reset(new UrlData());
}
return url_data_.get();
}
Kind kind_;
mutable std::unique_ptr<UrlData> url_data_;
};
class CspSourceList {
public:
CspSourceList()
: saw_unsafe_inline_(false), saw_unsafe_eval_(false),
saw_strict_dynamic_(false), saw_unsafe_hashed_attributes_(false),
saw_hash_or_nonce_(false) {}
static std::unique_ptr<CspSourceList> Parse(StringPiece input);
const std::vector<CspSourceExpression>& expressions() const {
return expressions_;
}
bool saw_unsafe_inline() const { return saw_unsafe_inline_; }
bool saw_unsafe_eval() const { return saw_unsafe_eval_; }
bool saw_strict_dynamic() const { return saw_strict_dynamic_; }
bool saw_unsafe_hashed_attributes() const {
return saw_unsafe_hashed_attributes_;
}
bool saw_hash_or_nonce() const { return saw_hash_or_nonce_; }
bool Matches(const GoogleUrl& origin_url, const GoogleUrl& url) const;
private:
std::vector<CspSourceExpression> expressions_;
bool saw_unsafe_inline_;
bool saw_unsafe_eval_;
bool saw_strict_dynamic_;
bool saw_unsafe_hashed_attributes_;
bool saw_hash_or_nonce_;
};
// An individual policy. Note that a page is constrained by an intersection
// of some number of these.
class CspPolicy {
public:
CspPolicy();
// May return null.
static std::unique_ptr<CspPolicy> Parse(StringPiece input);
// May return null.
const CspSourceList* SourceListFor(CspDirective directive) const {
return policies_[static_cast<int>(directive)].get();
}
bool PermitsEval() const;
bool PermitsInlineScript() const;
bool PermitsInlineScriptAttribute() const;
bool PermitsInlineStyle() const;
bool PermitsInlineStyleAttribute() const;
// Tests whether 'url' can be loaded within 'origin_url' as 'role', where
// 'role' should be kStyleSrc, kScriptSrc or kImgSrc.
bool CanLoadUrl(CspDirective role, const GoogleUrl& origin_url,
const GoogleUrl& url) const;
bool IsBasePermitted(const GoogleUrl& previous_origin,
const GoogleUrl& base_candidate) const;
private:
// The expectation is that some of these may be null.
std::vector<std::unique_ptr<CspSourceList>> policies_;
};
// A set of all policies (maybe none!) on the page. Note that we do not track
// those with report disposition, only those that actually enforce --- reporting
// seems like it would keep the page author informed about our effects as it is.
class CspContext {
public:
bool PermitsEval() const {
return AllPermit(&CspPolicy::PermitsEval);
}
bool PermitsInlineScript() const {
return AllPermit(&CspPolicy::PermitsInlineScript);
}
bool PermitsInlineScriptAttribute() const {
return AllPermit(&CspPolicy::PermitsInlineScriptAttribute);
}
bool PermitsInlineStyle() const {
return AllPermit(&CspPolicy::PermitsInlineStyle);
}
bool PermitsInlineStyleAttribute() const {
return AllPermit(&CspPolicy::PermitsInlineStyleAttribute);
}
bool CanLoadUrl(CspDirective role, const GoogleUrl& origin_url,
const GoogleUrl& url) {
// All policies must OK, with base case being 'true'.
for (const auto& policy : policies_) {
if (!policy->CanLoadUrl(role, origin_url, url)) {
return false;
}
}
return true;
}
bool IsBasePermitted(const GoogleUrl& previous_origin,
const GoogleUrl& base_candidate) const {
for (const auto& policy : policies_) {
if (!policy->IsBasePermitted(previous_origin, base_candidate)) {
return false;
}
}
return true;
}
bool HasDirective(CspDirective directive) const {
for (const auto& policy : policies_) {
if (policy->SourceListFor(directive) != nullptr) {
return true;
}
}
return false;
}
bool HasDirectiveOrDefaultSrc(CspDirective directive) const {
for (const auto& policy : policies_) {
if (policy->SourceListFor(directive) != nullptr ||
policy->SourceListFor(CspDirective::kDefaultSrc) != nullptr) {
return true;
}
}
return false;
}
void AddPolicy(std::unique_ptr<CspPolicy> policy);
void Clear() { policies_.clear(); }
size_t policies_size() const { return policies_.size(); }
bool empty() const { return policies_.empty(); }
private:
typedef bool (CspPolicy::*SimplePredicateFn)() const;
bool AllPermit(SimplePredicateFn predicate) const {
// Note that empty policies_ means "true" --- there is no policy whatsoever,
// so everything is permitted. If there is more than that, all policies
// must agree, too.
for (const auto& policy : policies_) {
if (!(policy.get()->*predicate)()) {
return false;
}
}
return true;
}
std::vector<std::unique_ptr<CspPolicy>> policies_;
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CSP_H_