blob: a1b944c4c9647d3c2e9a41b737290ad330a8892a [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#include "net/instaweb/rewriter/public/url_partnership.h"
#include <cstddef>
#include "base/logging.h"
#include "net/instaweb/rewriter/public/domain_lawyer.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/url_namer.h"
#include "pagespeed/kernel/base/message_handler.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/stl_util.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/http/google_url.h"
namespace net_instaweb {
UrlPartnership::UrlPartnership(const RewriteDriver* driver)
: rewrite_options_(driver->options()),
url_namer_(driver->server_context()->url_namer()) {
}
UrlPartnership::~UrlPartnership() {
STLDeleteElements(&url_vector_);
}
// Adds a URL to a combination. If it can be legally added, consulting
// the DomainLawyer, then true is returned. AddUrl cannot be called
// after Resolve (CHECK failure).
bool UrlPartnership::AddUrl(const StringPiece& untrimmed_resource_url,
MessageHandler* handler) {
GoogleString resource_url, mapped_domain_name;
bool ret = false;
TrimWhitespace(untrimmed_resource_url, &resource_url);
if (resource_url.empty()) {
handler->Message(
kInfo, "Cannot rewrite empty URL relative to %s",
original_origin_and_path_.spec_c_str());
} else if (!original_origin_and_path_.IsWebValid()) {
handler->Message(
kInfo, "Cannot rewrite %s relative to invalid url %s",
resource_url.c_str(),
original_origin_and_path_.spec_c_str());
} else {
// First resolve the original request to ensure that it is allowed by the
// options.
scoped_ptr<GoogleUrl> resolved_request(
new GoogleUrl(original_origin_and_path_, resource_url));
if (!resolved_request->IsWebValid()) {
handler->Message(
kInfo, "URL %s cannot be resolved relative to base URL %s",
resource_url.c_str(),
original_origin_and_path_.spec_c_str());
} else if (!rewrite_options_->IsAllowed(resolved_request->Spec())) {
handler->Message(kInfo,
"Rewriting URL %s is disallowed via configuration",
resolved_request->spec_c_str());
} else if (FindResourceDomain(original_origin_and_path_,
url_namer_,
rewrite_options_,
resolved_request.get(),
&mapped_domain_name,
handler)) {
if (url_vector_.empty()) {
domain_and_path_prefix_.swap(mapped_domain_name);
ret = true;
} else {
GoogleUrl domain_url(domain_and_path_prefix_);
GoogleUrl mapped_url(mapped_domain_name);
ret = (domain_url.Origin() == mapped_url.Origin());
if (ret && !rewrite_options_->combine_across_paths()) {
ret = (ResolvedBase() == resolved_request->AllExceptLeaf());
}
}
if (ret) {
url_vector_.push_back(resolved_request.release());
int index = url_vector_.size() - 1;
IncrementalResolve(index);
}
}
}
return ret;
}
bool UrlPartnership::FindResourceDomain(const GoogleUrl& base_url,
const UrlNamer* url_namer,
const RewriteOptions* rewrite_options,
GoogleUrl* resource,
GoogleString* domain,
MessageHandler* handler) {
bool ret = false;
GoogleString resource_url;
if (url_namer->Decode(*resource, rewrite_options, NULL, &resource_url)) {
resource->Reset(resource_url);
ret = resource->IsWebValid();
resource->Origin().CopyToString(domain);
} else {
ret = rewrite_options->domain_lawyer()->MapRequestToDomain(
base_url, resource->Spec(), domain,
resource, handler);
}
return ret;
}
void UrlPartnership::RemoveLast() {
CHECK(!url_vector_.empty());
int last = url_vector_.size() - 1;
delete url_vector_[last];
url_vector_.resize(last);
// Re-resolve the entire partnership in the absense of the influence of the
// ex-partner, by re-adding the GURLs one at a time.
common_components_.clear();
for (int i = 0, n = url_vector_.size(); i < n; ++i) {
IncrementalResolve(i);
}
}
void UrlPartnership::Reset(const GoogleUrl& original_request) {
STLDeleteElements(&url_vector_);
url_vector_.clear();
common_components_.clear();
if (original_request.IsWebValid()) {
original_origin_and_path_.Reset(original_request.AllExceptLeaf());
}
}
void UrlPartnership::IncrementalResolve(int index) {
CHECK_LE(0, index);
CHECK_LT(index, static_cast<int>(url_vector_.size()));
// When tokenizing a URL, we don't want to omit empty segments
// because we need to avoid aliasing "http://x" with "/http:/x".
bool omit_empty = false;
StringPieceVector components;
if (index == 0) {
StringPiece base = url_vector_[0]->AllExceptLeaf();
SplitStringPieceToVector(base, "/", &components, omit_empty);
components.pop_back(); // base ends with "/"
CHECK_LE(3U, components.size()); // expect {"http:", "", "x"...}
for (size_t i = 0; i < components.size(); ++i) {
const StringPiece& sp = components[i];
common_components_.push_back(GoogleString(sp.data(), sp.size()));
}
} else {
// Split each string on / boundaries, then compare these path elements
// until one doesn't match, then shortening common_components.
StringPiece all_but_leaf = url_vector_[index]->AllExceptLeaf();
SplitStringPieceToVector(all_but_leaf, "/", &components, omit_empty);
components.pop_back(); // base ends with "/"
CHECK_LE(3U, components.size()); // expect {"http:", "", "x"...}
if (components.size() < common_components_.size()) {
common_components_.resize(components.size());
}
for (size_t c = 0; c < common_components_.size(); ++c) {
if (common_components_[c] != components[c]) {
common_components_.resize(c);
break;
}
}
}
}
GoogleString UrlPartnership::ResolvedBase() const {
GoogleString ret;
if (!common_components_.empty()) {
for (size_t c = 0; c < common_components_.size(); ++c) {
const GoogleString& component = common_components_[c];
ret += component;
ret += "/"; // initial segment is "http" with no leading /
}
}
return ret;
}
// Returns the relative path of a particular URL that was added into
// the partnership. This requires that Resolve() be called first.
GoogleString UrlPartnership::RelativePath(int index) const {
GoogleString resolved_base = ResolvedBase();
StringPiece spec = url_vector_[index]->Spec();
CHECK_GE(spec.size(), resolved_base.size());
CHECK_EQ(StringPiece(spec.data(), resolved_base.size()),
StringPiece(resolved_base));
return GoogleString(spec.data() + resolved_base.size(),
spec.size() - resolved_base.size());
}
} // namespace net_instaweb