blob: 24597a48ac8dbd018a9f9ec8aab9b19756692852 [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#include "net/instaweb/rewriter/public/url_partnership.h"
#include <algorithm> // for std::min
#include <string>
#include "net/instaweb/rewriter/public/domain_lawyer.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/stl_util.h"
namespace net_instaweb {
UrlPartnership::UrlPartnership(const RewriteOptions* rewrite_options,
const GURL& original_request)
: rewrite_options_(rewrite_options) {
if (original_request.is_valid()) {
original_origin_and_path_ = GoogleUrl::Create(
GoogleUrl::AllExceptLeaf(original_request));
}
}
UrlPartnership::~UrlPartnership() {
STLDeleteElements(&gurl_vector_);
}
// Adds a URL to a combination. If it can be legally added, consulting
// the DomainLawyer, then true is returned. AddUrl cannot be called
// after Resolve (CHECK failure).
bool UrlPartnership::AddUrl(const StringPiece& untrimmed_resource_url,
MessageHandler* handler) {
std::string resource_url, mapped_domain_name;
bool ret = false;
TrimWhitespace(untrimmed_resource_url, &resource_url);
if (resource_url.empty()) {
handler->Message(kInfo, "Cannot rewrite empty URL relative to %s",
original_origin_and_path_.possibly_invalid_spec().c_str());
}
else if (!original_origin_and_path_.is_valid()) {
handler->Message(kInfo, "Cannot rewrite %s relative to invalid url %s",
resource_url.c_str(),
original_origin_and_path_.possibly_invalid_spec().c_str());
} else {
// First resolve the original request to ensure that it is allowed by the
// options.
GURL resolved_request = GoogleUrl::Resolve(original_origin_and_path_,
resource_url);
if (!resolved_request.is_valid()) {
handler->Message(
kInfo, "URL %s cannot be resolved relative to base URL %s",
resource_url.c_str(), original_origin_and_path_.spec().c_str());
} else if (!rewrite_options_->IsAllowed(
GoogleUrl::Spec(resolved_request))) {
handler->Message(kInfo,
"Rewriting URL %s is disallowed via configuration",
GoogleUrl::Spec(resolved_request).c_str());
} else if (rewrite_options_->domain_lawyer()->MapRequestToDomain(
original_origin_and_path_, resource_url, &mapped_domain_name,
&resolved_request, handler)) {
if (gurl_vector_.empty()) {
domain_.swap(mapped_domain_name);
domain_gurl_ = GoogleUrl::Create(domain_).Resolve(
GoogleUrl::Path(original_origin_and_path_));
ret = true;
} else {
ret = (domain_ == mapped_domain_name);
}
if (ret) {
gurl_vector_.push_back(new GURL(resolved_request));
int index = gurl_vector_.size() - 1;
IncrementalResolve(index);
}
}
}
return ret;
}
void UrlPartnership::RemoveLast() {
CHECK(!gurl_vector_.empty());
int last = gurl_vector_.size() - 1;
delete gurl_vector_[last];
gurl_vector_.resize(last);
// Re-resolve the entire partnership in the absense of the influence of the
// ex-partner, by re-adding the GURLs one at a time.
common_components_.clear();
for (int i = 0, n = gurl_vector_.size(); i < n; ++i) {
IncrementalResolve(i);
}
}
void UrlPartnership::IncrementalResolve(int index) {
CHECK_LE(0, index);
CHECK_LT(index, static_cast<int>(gurl_vector_.size()));
// When tokenizing a URL, we don't want to omit empty segments
// because we need to avoid aliasing "http://x" with "/http:/x".
bool omit_empty = false;
std::vector<StringPiece> components;
if (index == 0) {
std::string base = GoogleUrl::AllExceptLeaf(*gurl_vector_[0]);
SplitStringPieceToVector(base, "/", &components, omit_empty);
components.pop_back(); // base ends with "/"
CHECK_LE(3U, components.size()); // expect {"http:", "", "x"...}
for (size_t i = 0; i < components.size(); ++i) {
const StringPiece& sp = components[i];
common_components_.push_back(std::string(sp.data(), sp.size()));
}
} else {
// Split each string on / boundaries, then compare these path elements
// until one doesn't match, then shortening common_components.
std::string all_but_leaf = GoogleUrl::AllExceptLeaf(*gurl_vector_[index]);
SplitStringPieceToVector(all_but_leaf, "/", &components, omit_empty);
components.pop_back(); // base ends with "/"
CHECK_LE(3U, components.size()); // expect {"http:", "", "x"...}
if (components.size() < common_components_.size()) {
common_components_.resize(components.size());
}
for (size_t c = 0; c < common_components_.size(); ++c) {
if (common_components_[c] != components[c]) {
common_components_.resize(c);
break;
}
}
}
}
std::string UrlPartnership::ResolvedBase() const {
std::string ret;
if (!common_components_.empty()) {
for (size_t c = 0; c < common_components_.size(); ++c) {
const std::string& component = common_components_[c];
ret += component;
ret += "/"; // initial segment is "http" with no leading /
}
}
return ret;
}
// Returns the relative path of a particular URL that was added into
// the partnership. This requires that Resolve() be called first.
std::string UrlPartnership::RelativePath(int index) const {
std::string resolved_base = ResolvedBase();
std::string spec = gurl_vector_[index]->spec();
CHECK_GE(spec.size(), resolved_base.size());
CHECK_EQ(StringPiece(spec.data(), resolved_base.size()),
StringPiece(resolved_base));
return std::string(spec.data() + resolved_base.size(),
spec.size() - resolved_base.size());
}
} // namespace net_instaweb