blob: e76bac6175b2054d22b8bc937047c8832a3ad5b1 [file] [log] [blame]
// Copyright 2010 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: lsong@google.com (Libo Song)
// jmarantz@google.com (Joshua Marantz)
#include "net/instaweb/apache/instaweb_handler.h"
#include "apr_strings.h"
#include "base/basictypes.h"
#include "base/string_util.h"
#include "net/instaweb/apache/apache_slurp.h"
#include "net/instaweb/apache/apr_statistics.h"
#include "net/instaweb/apache/apr_timer.h"
#include "net/instaweb/apache/header_util.h"
#include "net/instaweb/apache/instaweb_context.h"
#include "net/instaweb/apache/serf_url_async_fetcher.h"
#include "net/instaweb/apache/mod_instaweb.h"
#include "net/instaweb/rewriter/public/add_instrumentation_filter.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/util/public/google_message_handler.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/simple_meta_data.h"
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/string_writer.h"
#include "http_config.h"
#include "http_core.h"
#include "http_log.h"
#include "http_protocol.h"
namespace net_instaweb {
namespace {
const char kRepairCachingHeader[] = "X-Mod-Pagespeed-Repair";
bool IsCompressibleContentType(const char* content_type) {
if (content_type == NULL) {
return false;
}
std::string type = content_type;
size_t separator_idx = type.find(";");
if (separator_idx != std::string::npos) {
type.erase(separator_idx);
}
bool res = false;
if (type.find("text/") == 0) {
res = true;
} else if (type.find("application/") == 0) {
if (type.find("javascript") != type.npos ||
type.find("json") != type.npos ||
type.find("ecmascript") != type.npos ||
type == "application/livescript" ||
type == "application/js" ||
type == "application/jscript" ||
type == "application/x-js" ||
type == "application/xhtml+xml" ||
type == "application/xml") {
res = true;
}
}
return res;
}
// Default handler when the file is not found
void instaweb_default_handler(const std::string& url, request_rec* request) {
request->status = HTTP_NOT_FOUND;
ap_set_content_type(request, "text/html; charset=utf-8");
ap_rputs("<html><head><title>Not Found</title></head>", request);
ap_rputs("<body><h1>Apache server with mod_pagespeed</h1>OK", request);
ap_rputs("<hr>NOT FOUND:", request);
ap_rputs(url.c_str(), request);
ap_rputs("</body></html>", request);
}
// predeclare to minimize diffs for now. TODO(jmarantz): reorder
void send_out_headers_and_body(
request_rec* request,
const SimpleMetaData& response_headers,
const std::string& output);
// Determines whether the url can be handled as a mod_pagespeed resource,
// and handles it, returning true. A 'true' routine means that this
// method believed the URL was a mod_pagespeed resource -- it does not
// imply that it was handled successfully. That information will be
// in the status code in the response headers.
bool handle_as_resource(ApacheRewriteDriverFactory* factory,
request_rec* request,
const std::string& url) {
RewriteDriver* rewrite_driver = factory->GetRewriteDriver();
SimpleMetaData request_headers, response_headers;
std::string output; // TODO(jmarantz): quit buffering resource output
StringWriter writer(&output);
MessageHandler* message_handler = factory->message_handler();
SerfAsyncCallback* callback = new SerfAsyncCallback();
bool handled = rewrite_driver->FetchResource(
url, request_headers, &response_headers, &writer, message_handler,
callback);
if (handled) {
message_handler->Message(kInfo, "Fetching resource %s...", url.c_str());
if (!callback->done()) {
SerfUrlAsyncFetcher* serf_async_fetcher =
factory->serf_url_async_fetcher();
AprTimer timer;
int64 max_ms = factory->fetcher_time_out_ms();
for (int64 start_ms = timer.NowMs(), now_ms = start_ms;
!callback->done() && now_ms - start_ms < max_ms;
now_ms = timer.NowMs()) {
int64 remaining_us = max_ms - (now_ms - start_ms);
serf_async_fetcher->Poll(remaining_us);
}
if (!callback->done()) {
message_handler->Message(kError, "Timeout on url %s", url.c_str());
}
}
if (callback->success()) {
message_handler->Message(kInfo, "Fetch succeeded for %s, status=%d",
url.c_str(), response_headers.status_code());
send_out_headers_and_body(request, response_headers, output);
} else {
message_handler->Message(kError, "Fetch failed for %s, status=%d",
url.c_str(), response_headers.status_code());
factory->Increment404Count();
instaweb_default_handler(url, request);
}
}
callback->Release();
factory->ReleaseRewriteDriver(rewrite_driver);
return handled;
}
void send_out_headers_and_body(
request_rec* request,
const SimpleMetaData& response_headers,
const std::string& output) {
for (int idx = 0; idx < response_headers.NumAttributes(); ++idx) {
const char* name = response_headers.Name(idx);
const char* value = response_headers.Value(idx);
if (strcasecmp(name, HttpAttributes::kContentType) == 0) {
// ap_set_content_type does not make a copy of the string, we need
// to duplicate it.
char* ptr = apr_pstrdup(request->pool, value);
ap_set_content_type(request, ptr);
} else {
if (strcasecmp(name, HttpAttributes::kCacheControl) == 0) {
// In case Apache configuration directives set up caching headers,
// we will need override our cache-extended resources in a late-running
// output filter.
// TODO(jmarantz): Do not use headers_out as out message passing
// mechanism. Switch to configuration vectors or something like that
// instead.
apr_table_add(request->headers_out, kRepairCachingHeader, value);
// Add the repair headers filter to fix the cacheing header.
ap_add_output_filter("MOD_PAGESPEED_REPAIR_HEADERS", NULL, request,
request->connection);
}
// apr_table_add makes copies of both head key and value, so we do not
// have to duplicate them.
apr_table_add(request->headers_out, name, value);
}
}
if (response_headers.status_code() == HttpStatus::kOK &&
IsCompressibleContentType(request->content_type)) {
// Make sure compression is enabled for this response.
ap_add_output_filter("DEFLATE", NULL, request, request->connection);
}
// Recompute the content-length, because the content may have changed.
ap_set_content_length(request, output.size());
// Send the body
ap_rwrite(output.c_str(), output.size(), request);
}
} // namespace
apr_status_t repair_caching_header(ap_filter_t *filter,
apr_bucket_brigade *bb) {
request_rec* request = filter->r;
SimpleMetaData response_headers;
ApacheHeaderToMetaData(request->headers_out, request->status,
request->proto_num, &response_headers);
CharStarVector v;
if (response_headers.Lookup(kRepairCachingHeader, &v) == 1) {
apr_table_unset(request->headers_out, kRepairCachingHeader);
apr_table_set(request->headers_out, HttpAttributes::kCacheControl, v[0]);
}
ap_remove_output_filter(filter);
return ap_pass_brigade(filter->next, bb);
}
int instaweb_handler(request_rec* request) {
ApacheRewriteDriverFactory* factory =
InstawebContext::Factory(request->server);
int ret = OK;
// Only handle GET request
if (request->method_number != M_GET) {
ap_log_rerror(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, request,
"Not GET request: %d.", request->method_number);
ret = DECLINED;
} else if (strcmp(request->handler, "mod_pagespeed_statistics") == 0) {
std::string output;
SimpleMetaData response_headers;
StringWriter writer(&output);
AprStatistics* statistics = factory->statistics();
if (statistics) {
statistics->Dump(&writer, factory->message_handler());
}
send_out_headers_and_body(request, response_headers, output);
} else if (strcmp(request->handler, "mod_pagespeed_beacon") == 0) {
RewriteDriver* driver = factory->GetRewriteDriver();
AddInstrumentationFilter* aif = driver->add_instrumentation_filter();
if (aif && aif->HandleBeacon(request->unparsed_uri)) {
ret = HTTP_NO_CONTENT;
} else {
ret = DECLINED;
}
factory->ReleaseRewriteDriver(driver);
} else {
/*
* In some contexts we are seeing relative URLs passed
* into request->unparsed_uri. But when using mod_slurp, the rewritten
* HTML contains complete URLs, so this construction yields the host:port
* prefix twice.
*
* TODO(jmarantz): Figure out how to do this correctly at all times.
*/
std::string url;
if (strncmp(request->unparsed_uri, "http://", 7) == 0) {
url = request->unparsed_uri;
} else {
url = ap_construct_url(request->pool, request->unparsed_uri, request);
}
if (!handle_as_resource(factory, request, url)) {
if (factory->slurping_enabled()) {
SlurpUrl(url, factory, request);
if (request->status == HTTP_NOT_FOUND) {
factory->IncrementSlurpCount();
}
} else {
ap_log_rerror(APLOG_MARK, APLOG_INFO, APR_SUCCESS, request,
"mod_pagespeed: Declined request %s", url.c_str());
ret = DECLINED;
}
}
}
return ret;
}
} // namespace net_instaweb