| // Copyright 2010 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Author: lsong@google.com (Libo Song) |
| // jmarantz@google.com (Joshua Marantz) |
| |
| #include "net/instaweb/apache/instaweb_handler.h" |
| |
| #include "apr_strings.h" |
| #include "base/basictypes.h" |
| #include "base/scoped_ptr.h" |
| #include "net/instaweb/apache/apache_slurp.h" |
| #include "net/instaweb/apache/apr_statistics.h" |
| #include "net/instaweb/apache/apr_timer.h" |
| #include "net/instaweb/apache/header_util.h" |
| #include "net/instaweb/apache/instaweb_context.h" |
| #include "net/instaweb/apache/serf_async_callback.h" |
| #include "net/instaweb/apache/serf_url_async_fetcher.h" |
| #include "net/instaweb/apache/mod_instaweb.h" |
| #include "net/instaweb/rewriter/public/add_instrumentation_filter.h" |
| #include "net/instaweb/rewriter/public/output_resource.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| #include "net/instaweb/util/public/google_message_handler.h" |
| #include "net/instaweb/util/public/message_handler.h" |
| #include "net/instaweb/util/public/simple_meta_data.h" |
| #include "net/instaweb/util/public/string_util.h" |
| #include "net/instaweb/util/public/string_writer.h" |
| #include "http_config.h" |
| #include "http_core.h" |
| #include "http_log.h" |
| #include "http_protocol.h" |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| const char kStatisticsHandler[] = "mod_pagespeed_statistics"; |
| const char kBeaconHandler[] = "mod_pagespeed_beacon"; |
| const char kResourceUrlNote[] = "mod_pagespeed_resource"; |
| |
| bool IsCompressibleContentType(const char* content_type) { |
| if (content_type == NULL) { |
| return false; |
| } |
| std::string type = content_type; |
| size_t separator_idx = type.find(";"); |
| if (separator_idx != std::string::npos) { |
| type.erase(separator_idx); |
| } |
| |
| bool res = false; |
| if (type.find("text/") == 0) { |
| res = true; |
| } else if (type.find("application/") == 0) { |
| if (type.find("javascript") != type.npos || |
| type.find("json") != type.npos || |
| type.find("ecmascript") != type.npos || |
| type == "application/livescript" || |
| type == "application/js" || |
| type == "application/jscript" || |
| type == "application/x-js" || |
| type == "application/xhtml+xml" || |
| type == "application/xml") { |
| res = true; |
| } |
| } |
| |
| return res; |
| } |
| |
| // Default handler when the file is not found |
| void instaweb_default_handler(const std::string& url, request_rec* request) { |
| request->status = HTTP_NOT_FOUND; |
| ap_set_content_type(request, "text/html; charset=utf-8"); |
| ap_rputs("<html><head><title>Not Found</title></head>", request); |
| ap_rputs("<body><h1>Apache server with mod_pagespeed</h1>OK", request); |
| ap_rputs("<hr>NOT FOUND:", request); |
| ap_rputs(url.c_str(), request); |
| ap_rputs("</body></html>", request); |
| } |
| |
| // predeclare to minimize diffs for now. TODO(jmarantz): reorder |
| void send_out_headers_and_body( |
| request_rec* request, |
| const SimpleMetaData& response_headers, |
| const std::string& output); |
| |
| // Determines whether the url can be handled as a mod_pagespeed resource, |
| // and handles it, returning true. A 'true' routine means that this |
| // method believed the URL was a mod_pagespeed resource -- it does not |
| // imply that it was handled successfully. That information will be |
| // in the status code in the response headers. |
| bool handle_as_resource(ApacheRewriteDriverFactory* factory, |
| request_rec* request, |
| const std::string& url) { |
| RewriteDriver* rewrite_driver = factory->NewRewriteDriver(); |
| |
| SimpleMetaData request_headers, response_headers; |
| int n = arraysize(RewriteDriver::kPassThroughRequestAttributes); |
| for (int i = 0; i < n; ++i) { |
| const char* value = apr_table_get( |
| request->headers_in, |
| RewriteDriver::kPassThroughRequestAttributes[i]); |
| if (value != NULL) { |
| request_headers.Add(RewriteDriver::kPassThroughRequestAttributes[i], |
| value); |
| } |
| } |
| std::string output; // TODO(jmarantz): quit buffering resource output |
| StringWriter writer(&output); |
| MessageHandler* message_handler = factory->message_handler(); |
| SerfAsyncCallback* callback = new SerfAsyncCallback( |
| &response_headers, &writer); |
| bool handled = rewrite_driver->FetchResource( |
| url, request_headers, callback->response_headers(), callback->writer(), |
| message_handler, callback); |
| if (handled) { |
| message_handler->Message(kInfo, "Fetching resource %s...", url.c_str()); |
| if (!callback->done()) { |
| UrlPollableAsyncFetcher* sub_resource_fetcher = |
| factory->SubResourceFetcher(); |
| AprTimer timer; |
| int64 max_ms = factory->fetcher_time_out_ms(); |
| for (int64 start_ms = timer.NowMs(), now_ms = start_ms; |
| !callback->done() && now_ms - start_ms < max_ms; |
| now_ms = timer.NowMs()) { |
| int64 remaining_us = max_ms - (now_ms - start_ms); |
| sub_resource_fetcher->Poll(remaining_us); |
| } |
| |
| if (!callback->done()) { |
| message_handler->Message(kError, "Timeout on url %s", url.c_str()); |
| } |
| } |
| if (callback->success()) { |
| message_handler->Message(kInfo, "Fetch succeeded for %s, status=%d", |
| url.c_str(), response_headers.status_code()); |
| send_out_headers_and_body(request, response_headers, output); |
| } else { |
| message_handler->Message(kError, "Fetch failed for %s, status=%d", |
| url.c_str(), response_headers.status_code()); |
| factory->Increment404Count(); |
| instaweb_default_handler(url, request); |
| } |
| } else { |
| callback->Done(false); |
| } |
| callback->Release(); |
| factory->ReleaseRewriteDriver(rewrite_driver); |
| return handled; |
| } |
| |
| void send_out_headers_and_body( |
| request_rec* request, |
| const SimpleMetaData& response_headers, |
| const std::string& output) { |
| if (response_headers.status_code() != 0) { |
| request->status = response_headers.status_code(); |
| } |
| for (int idx = 0; idx < response_headers.NumAttributes(); ++idx) { |
| const char* name = response_headers.Name(idx); |
| const char* value = response_headers.Value(idx); |
| if (strcasecmp(name, HttpAttributes::kContentType) == 0) { |
| // ap_set_content_type does not make a copy of the string, we need |
| // to duplicate it. |
| char* ptr = apr_pstrdup(request->pool, value); |
| ap_set_content_type(request, ptr); |
| } else { |
| if (strcasecmp(name, HttpAttributes::kCacheControl) == 0) { |
| SetupCacheRepair(value, request); |
| } |
| // apr_table_add makes copies of both head key and value, so we do not |
| // have to duplicate them. |
| apr_table_add(request->headers_out, name, value); |
| } |
| } |
| if (response_headers.status_code() == HttpStatus::kOK && |
| IsCompressibleContentType(request->content_type)) { |
| // Make sure compression is enabled for this response. |
| ap_add_output_filter("DEFLATE", NULL, request, request->connection); |
| } |
| |
| // Recompute the content-length, because the content may have changed. |
| ap_set_content_length(request, output.size()); |
| // Send the body |
| ap_rwrite(output.c_str(), output.size(), request); |
| } |
| |
| } // namespace |
| |
| apr_status_t repair_caching_header(ap_filter_t *filter, |
| apr_bucket_brigade *bb) { |
| request_rec* request = filter->r; |
| RepairCachingHeaders(request); |
| ap_remove_output_filter(filter); |
| return ap_pass_brigade(filter->next, bb); |
| } |
| |
| apr_status_t instaweb_handler(request_rec* request) { |
| apr_status_t ret = DECLINED; |
| const char* url = apr_table_get(request->notes, kResourceUrlNote); |
| if (url != NULL) { |
| ApacheRewriteDriverFactory* factory = |
| InstawebContext::Factory(request->server); |
| ret = OK; |
| |
| // Only handle GET request |
| if (request->method_number != M_GET) { |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, request, |
| "Not GET request: %d.", request->method_number); |
| ret = DECLINED; |
| } else if (strcmp(request->handler, kStatisticsHandler) == 0) { |
| std::string output; |
| SimpleMetaData response_headers; |
| StringWriter writer(&output); |
| AprStatistics* statistics = factory->statistics(); |
| if (statistics) { |
| statistics->Dump(&writer, factory->message_handler()); |
| } |
| send_out_headers_and_body(request, response_headers, output); |
| } else if (strcmp(request->handler, kBeaconHandler) == 0) { |
| RewriteDriver* driver = factory->NewRewriteDriver(); |
| AddInstrumentationFilter* aif = driver->add_instrumentation_filter(); |
| if (aif && aif->HandleBeacon(request->unparsed_uri)) { |
| ret = HTTP_NO_CONTENT; |
| } else { |
| ret = DECLINED; |
| } |
| factory->ReleaseRewriteDriver(driver); |
| } else { |
| if (!handle_as_resource(factory, request, url)) { |
| if (factory->slurping_enabled()) { |
| SlurpUrl(url, factory, request); |
| if (request->status == HTTP_NOT_FOUND) { |
| factory->IncrementSlurpCount(); |
| } |
| } else { |
| ret = DECLINED; |
| } |
| } |
| } |
| } |
| return ret; |
| } |
| |
| // This translator must be inserted into the translate_name chain |
| // prior to mod_rewrite. By responding "OK" we prevent mod_rewrite |
| // from running on this request and borking URL names that need to be |
| // handled by mod_pagespeed. |
| // |
| // This hack seems to be the most robust way to immunize mod_pagespeed |
| // from when mod_rewrite rewrites the URL. We still need mod_rewrite |
| // to do required complex processing of the filename (e.g. prepending |
| // the DocumentRoot) so mod_authz_host is happy. |
| // |
| // One alternative strategy is to return OK to bypass mod_rewrite |
| // entirely, but then we'd have to duplicate the functionality in |
| // mod_rewrite that prepends the DocumentRoot, which is itself |
| // complex. See mod_rewrite.c:hook_fixup(), and look for calls to |
| // ap_document_root(). |
| // |
| // Or we could return DECLINED but set a note "mod_rewrite_rewritten" |
| // to try to convince mod_rewrite to leave our URLs alone. |
| // |
| // Another strategy is to return OK but leave request->filename NULL. |
| // In that case, the server kernel generates an ominious 'info' |
| // message: |
| // |
| // [info] [client ::1] Module bug? Request filename is missing for URI |
| // /mod_pagespeed_statistics |
| // |
| // This is generated by httpd/src/server/request.c line 486, and right |
| // above that is this comment: |
| // |
| // "OK" as a response to a real problem is not _OK_, but to |
| // allow broken modules to proceed, we will permit the |
| // not-a-path filename to pass the following two tests. This |
| // behavior may be revoked in future versions of Apache. We |
| // still must catch it later if it's heading for the core |
| // handler. Leave INFO notes here for module debugging. |
| // |
| // It seems like the simplest, most robust approach is to squirrel |
| // away the original URL *before* mod_rewrite sees it in |
| // kResourceUrlNote "mod_pagespeed_url" and use *that* rather than |
| // request->unparsed_uri (which mod_rewrite might have mangled) when |
| // procesing the request. |
| apr_status_t save_url_for_instaweb_handler(request_rec *request) { |
| char* url = NULL; |
| bool need_copy = true; |
| |
| /* |
| * In some contexts we are seeing relative URLs passed |
| * into request->unparsed_uri. But when using mod_slurp, the rewritten |
| * HTML contains complete URLs, so this construction yields the host:port |
| * prefix twice. |
| * |
| * TODO(jmarantz): Figure out how to do this correctly at all times. |
| */ |
| if (strncmp(request->unparsed_uri, "http://", 7) == 0) { |
| url = request->unparsed_uri; |
| } else { |
| url = ap_construct_url(request->pool, request->unparsed_uri, request); |
| need_copy = false; |
| } |
| |
| StringPiece url_piece(url); |
| bool bypass_mod_rewrite = false; |
| if (url_piece.ends_with(kStatisticsHandler) || |
| url_piece.ends_with(kBeaconHandler)) { |
| bypass_mod_rewrite = true; |
| } else { |
| ApacheRewriteDriverFactory* factory = |
| InstawebContext::Factory(request->server); |
| RewriteDriver* rewrite_driver = factory->NewRewriteDriver(); |
| RewriteFilter* filter; |
| scoped_ptr<OutputResource> output_resource( |
| rewrite_driver->DecodeOutputResource(url, &filter)); |
| if (output_resource.get() != NULL) { |
| bypass_mod_rewrite = true; |
| } |
| factory->ReleaseRewriteDriver(rewrite_driver); |
| } |
| |
| if (bypass_mod_rewrite) { |
| if (need_copy) { |
| apr_table_set(request->notes, kResourceUrlNote, url); |
| } else { |
| apr_table_setn(request->notes, kResourceUrlNote, url); |
| } |
| } |
| return DECLINED; |
| } |
| |
| } // namespace net_instaweb |