blob: 95dde6a2ad5589a9a8486ed03cc177bb19eab34d [file] [log] [blame]
// Copyright 2010 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: jmarantz@google.com (Joshua Marantz)
#include "base/logging.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/cache_url_async_fetcher.h"
#include "net/instaweb/http/public/http_dump_url_fetcher.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/http/public/url_async_fetcher.h"
#include "net/instaweb/public/global_constants.h"
#include "net/instaweb/rewriter/public/domain_lawyer.h"
#include "net/instaweb/rewriter/public/domain_rewrite_filter.h"
#include "net/instaweb/rewriter/public/iframe_fetcher.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "pagespeed/apache/apache_config.h"
#include "pagespeed/apache/apache_server_context.h"
#include "pagespeed/apache/apache_writer.h"
#include "pagespeed/apache/instaweb_handler.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/chunking_writer.h"
#include "pagespeed/kernel/base/condvar.h"
#include "pagespeed/kernel/base/message_handler.h"
#include "pagespeed/kernel/base/ref_counted_ptr.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/thread_system.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/html/html_keywords.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/http_names.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "pagespeed/kernel/http/response_headers.h"
#include "pagespeed/system/system_rewrite_options.h"
// The Apache headers must be after instaweb headers. Otherwise, the
// compiler will complain
// "strtoul_is_not_a_portable_function_use_strtol_instead".
// Also note the use of 'OK" as an Instaweb enum and as an Apache #define.
#include "http_protocol.h"
#include "httpd.h"
namespace net_instaweb {
namespace {
// Some of the sites we are trying to slurp have pagespeed enabled already.
// We actually want to start with the non-pagespeed-enabled site. But we'd
// rather not send ModPagespeed=off to servers that are not expecting it.
class StrippingFetch : public StringAsyncFetch {
public:
StrippingFetch(const GoogleString& url_input,
const DomainLawyer* lawyer,
UrlAsyncFetcher* fetcher,
ThreadSystem* thread_system,
const RequestContextPtr& ctx,
MessageHandler* message_handler)
: StringAsyncFetch(ctx),
fetcher_(fetcher),
lawyer_(lawyer),
url_(url_input),
message_handler_(message_handler),
stripped_(false),
mutex_(thread_system->NewMutex()),
condvar_(mutex_->NewCondvar()) {
}
// Blocking fetch.
bool Fetch() {
// To test sharding domains from a slurp of a site that does not support
// sharded domains, we apply mapping origin domain here. Simply map all
// the shards back into the origin domain in pagespeed.conf.
request_headers()->Add(kPageSpeedHeader, "off");
request_headers()->Add(kModPagespeedHeader, "off");
GoogleString origin_url;
GoogleString host_header;
bool is_proxy = false;
if (lawyer_->MapOrigin(url_, &origin_url, &host_header, &is_proxy)) {
url_ = origin_url;
GoogleUrl gurl(url_);
request_headers()->Replace(HttpAttributes::kHost, host_header);
}
fetcher_->Fetch(url_, message_handler_, this);
{
ScopedMutex lock(mutex_.get());
while (!done()) {
condvar_->TimedWait(Timer::kSecondMs);
}
}
return success();
}
virtual void HandleDone(bool success) {
bool done = true;
if (!success) {
set_success(false);
} else if (stripped_) {
// Second pass -- declare completion.
set_success(true);
} else if ((response_headers()->Lookup1(kModPagespeedHeader) != NULL) ||
(response_headers()->Lookup1(kPageSpeedHeader) != NULL)) {
// First pass -- the slurped site evidently had mod_pagespeed already
// enabled. Turn it off and re-fetch.
// Note that the request headers might be sanitized as we are entering
// the origin network, so there's no guarantees. We might need to
// send in the query-params after all.
LOG(ERROR) << "URL " << url_ << " already has mod_pagespeed. Stripping.";
Reset(); // Clears output buffer and response_headers.
GoogleString::size_type question = url_.find('?');
if (question == GoogleString::npos) {
url_ += "?ModPagespeed=off";
} else {
url_ += "&ModPagespeed=off";
}
stripped_ = true;
fetcher_->Fetch(url_, message_handler_, this);
done = false;
} else {
// First-pass -- the origin site did not have mod_pagespeed so no need
// for a second pass.
set_success(true);
}
if (done) {
ScopedMutex lock(mutex_.get());
set_done(true);
condvar_->Signal();
// Don't "delete this" -- this is allocated on the stack in ApacheSlurp.
}
}
private:
UrlAsyncFetcher* fetcher_;
const DomainLawyer* lawyer_;
GoogleString url_;
MessageHandler* message_handler_;
bool stripped_;
scoped_ptr<ThreadSystem::CondvarCapableMutex> mutex_;
scoped_ptr<ThreadSystem::Condvar> condvar_;
DISALLOW_COPY_AND_ASSIGN(StrippingFetch);
};
} // namespace
bool InstawebHandler::ProxyUrl() {
GoogleString origin_host;
GoogleString stripped_url = stripped_gurl_.Spec().as_string();
const DomainLawyer* lawyer = options()->domain_lawyer();
const GoogleString& proxy_suffix = lawyer->proxy_suffix();
if (!proxy_suffix.empty() &&
!lawyer->StripProxySuffix(stripped_gurl_, &stripped_url, &origin_host)) {
// This is simply a request for a URL whose host does not end in
// proxy_suffix. Let another Apache handler handle it; it might
// be a static asset, mod_pagespeed_example, etc. Note that this
// typically only happens when testing via forward proxy setting
// in the browser.
return false;
}
if (!AuthenticateProxy()) {
return true;
}
// Figure out if we should be using a slurp fetcher rather than the default
// system fetcher.
UrlAsyncFetcher* fetcher = server_context_->DefaultSystemFetcher();
scoped_ptr<UrlAsyncFetcher> fetcher_storage;
if (options()->test_proxy() && !options()->test_proxy_slurp().empty()) {
fetcher_storage.reset(new HttpDumpUrlFetcher(
options()->test_proxy_slurp(), server_context_->file_system(),
server_context_->timer()));
fetcher = fetcher_storage.get();
} else if (!proxy_suffix.empty() && options()->mob_iframe() &&
!options()->mob_config()) {
// Reset stripped_url to its original state and let the iframe-fetcher
// do the stripping again so it can redirect to the origin if the UA
// cannot support iframe mobilization.
stripped_url = stripped_gurl_.Spec().as_string();
fetcher_storage.reset(new IframeFetcher(
options(), server_context_->user_agent_matcher(), fetcher));
fetcher = fetcher_storage.get();
} else if (!proxy_suffix.empty()) {
// Do some extra caching when using proxy_suffix (but we don't want it in
// other modes since they are used for things like loadtesting)
// Passing the 'fetcher' explicitly here rather than calling
// CreateCacheFetcher() avoids getting the driver's loopback
// fetcher. We don't want the loopback fetcher because we
// are proxying an external site.
const GoogleString& fragment = options()->cache_fragment().empty()
? request_context_->minimal_private_suffix()
: options()->cache_fragment();
// Note that the cache fetcher is aware of request methods, so it won't
// cache POSTs improperly.
CacheUrlAsyncFetcher* cache_url_async_fetcher =
server_context_->CreateCustomCacheFetcher(options(), fragment,
NULL, fetcher);
cache_url_async_fetcher->set_ignore_recent_fetch_failed(true);
fetcher_storage.reset(cache_url_async_fetcher);
fetcher = fetcher_storage.get();
}
MessageHandler* handler = server_context_->message_handler();
RequestContextPtr request_context(
// TODO(sligocki): Do we want custom options here?
new RequestContext(options()->ComputeHttpOptions(),
server_context_->thread_system()->NewMutex(),
server_context_->timer()));
StrippingFetch fetch(stripped_url, options()->domain_lawyer(),
fetcher, server_context_->thread_system(),
request_context, handler);
fetch.set_request_headers(request_headers_.get());
// Handle a POST if needed.
if (request_->method_number == M_POST) {
apr_status_t ret;
GoogleString payload;
if (!parse_body_from_post(request_, &payload, &ret)) {
handler->Message(kInfo, "Trouble parsing POST of %s.",
stripped_url.c_str());
request_->status = HttpStatus::kBadRequest;
ap_send_error_response(request_, 0);
return true;
} else {
fetch.request_headers()->set_method(RequestHeaders::kPost);
fetch.request_headers()->set_message_body(payload);
}
}
if (!origin_host.empty()) {
// origin_host has proxy_suffix (if any) stripped out, allowing us
// to fetch the origin content.
fetch.request_headers()->Replace(HttpAttributes::kHost, origin_host);
}
bool fetch_succeeded = fetch.Fetch();
if (fetch_succeeded) {
if (fetch.response_headers()->status_code() != HttpStatus::kOK) {
// For redirects, we will need to update the Location: header.
// We have to do it here rather than relying on normal rewriting
// via DomainRewriteFilter since Apache 2.4's implementation of
// AddOutputFilterByType doesn't apply to non-200s, and the check
// doesn't appear to be possible to disable just for us.
//
// Similarly other non-200s may have cookies, so may also need patching.
// (200s will get handled by DomainRewriteFilter via normal rewriting).
DomainRewriteFilter::UpdateDomainHeaders(
stripped_gurl_, server_context_, server_context_->global_options(),
fetch.response_headers());
}
// We always disable downstream header filters when sending out
// slurped resources, since we've captured them from the origin
// in the fetch we did to write the slurp.
ApacheWriter apache_writer(request_, server_context_->thread_system());
// TODO(jmarantz): This is a bit of a hack, but we need to be able to
// tweak headers from pagespeed.conf while experimenting with mobilization.
// Longer term we might not need this, or we might want to make this
// a separate option.
if (!options()->Enabled(RewriteOptions::kMobilize)) {
apache_writer.set_disable_downstream_header_filters(true);
}
ChunkingWriter chunking_writer(
&apache_writer, options()->slurp_flush_limit());
apache_writer.OutputHeaders(fetch.response_headers());
chunking_writer.Write(fetch.buffer(), handler);
} else {
handler->Message(kInfo, "mod_pagespeed: slurp of url %s failed.\n"
"Request Headers: %s\n\nResponse Headers: %s",
stripped_url.c_str(),
fetch.request_headers()->ToString().c_str(),
fetch.response_headers()->ToString().c_str());
}
if (!fetch_succeeded || fetch.response_headers()->IsErrorStatus()) {
server_context_->ReportSlurpNotFound(stripped_url, request_);
}
return true;
}
bool InstawebHandler::AuthenticateProxy() {
StringPiece cookie_name, cookie_value, redirect;
const ApacheConfig* config = ApacheConfig::DynamicCast(options());
if (config->GetProxyAuth(&cookie_name, &cookie_value, &redirect)) {
bool ok = cookie_value.empty()
? request_headers_->HasCookie(cookie_name)
: request_headers_->HasCookieValue(cookie_name, cookie_value);
if (!ok) {
ResponseHeaders response_headers;
if (redirect.empty()) {
response_headers.SetStatusAndReason(HttpStatus::kForbidden);
} else {
response_headers.SetStatusAndReason(HttpStatus::kTemporaryRedirect);
response_headers.Add(HttpAttributes::kContentType, "text/html");
response_headers.Add(HttpAttributes::kLocation, redirect);
GoogleString redirect_escaped;
HtmlKeywords::Escape(redirect, &redirect_escaped);
send_out_headers_and_body(request_, response_headers, StrCat(
"Redirecting to ", redirect_escaped));
}
return false;
}
}
return true;
}
} // namespace net_instaweb