| // Copyright 2010 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Author: lsong@google.com (Libo Song) |
| // jmarantz@google.com (Joshua Marantz) |
| |
| #include "net/instaweb/apache/instaweb_handler.h" |
| |
| #include <cstddef> |
| #include <set> |
| #include <vector> |
| |
| #include "net/instaweb/apache/apache_config.h" |
| #include "net/instaweb/apache/apache_message_handler.h" |
| #include "net/instaweb/apache/apache_request_context.h" |
| #include "net/instaweb/apache/apache_rewrite_driver_factory.h" |
| #include "net/instaweb/apache/apache_server_context.h" |
| #include "net/instaweb/apache/apache_slurp.h" |
| #include "net/instaweb/apache/apache_writer.h" |
| #include "net/instaweb/apache/apr_timer.h" |
| #include "net/instaweb/apache/header_util.h" |
| #include "net/instaweb/apache/in_place_resource_recorder.h" |
| #include "net/instaweb/apache/instaweb_context.h" |
| #include "net/instaweb/apache/mod_instaweb.h" |
| #include "net/instaweb/automatic/public/proxy_fetch.h" |
| #include "net/instaweb/htmlparse/public/html_keywords.h" |
| #include "net/instaweb/http/public/async_fetch.h" |
| #include "net/instaweb/http/public/cache_url_async_fetcher.h" |
| #include "net/instaweb/http/public/content_type.h" |
| #include "net/instaweb/http/public/meta_data.h" |
| #include "net/instaweb/http/public/request_context.h" |
| #include "net/instaweb/http/public/request_headers.h" |
| #include "net/instaweb/http/public/response_headers.h" |
| #include "net/instaweb/http/public/sync_fetcher_adapter_callback.h" |
| #include "net/instaweb/public/global_constants.h" |
| #include "net/instaweb/rewriter/public/domain_lawyer.h" |
| #include "net/instaweb/rewriter/public/resource_fetch.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| #include "net/instaweb/rewriter/public/rewrite_options.h" |
| #include "net/instaweb/rewriter/public/rewrite_stats.h" |
| #include "net/instaweb/rewriter/public/server_context.h" |
| #include "net/instaweb/rewriter/public/static_asset_manager.h" |
| #include "net/instaweb/system/public/handlers.h" |
| #include "net/instaweb/system/public/system_caches.h" |
| #include "net/instaweb/util/public/abstract_mutex.h" |
| #include "net/instaweb/util/public/basictypes.h" |
| #include "net/instaweb/util/public/condvar.h" |
| #include "net/instaweb/util/public/escaping.h" |
| #include "net/instaweb/util/public/google_url.h" |
| #include "net/instaweb/util/public/message_handler.h" |
| #include "net/instaweb/util/public/query_params.h" |
| #include "net/instaweb/util/public/ref_counted_ptr.h" |
| #include "net/instaweb/util/public/scoped_ptr.h" |
| #include "net/instaweb/util/public/statistics.h" |
| #include "net/instaweb/util/public/statistics_logger.h" |
| #include "net/instaweb/util/public/string.h" |
| #include "net/instaweb/util/public/string_util.h" |
| #include "net/instaweb/util/public/string_writer.h" |
| #include "net/instaweb/util/public/thread_system.h" |
| #include "net/instaweb/util/public/timer.h" |
| #include "net/instaweb/util/public/writer.h" |
| |
| #include "http_config.h" |
| #include "http_core.h" |
| #include "http_protocol.h" |
| #include "http_request.h" |
| #include "net/instaweb/apache/apache_logging_includes.h" |
| |
| namespace net_instaweb { |
| |
| extern const char* JS_mod_pagespeed_console_js; |
| extern const char* CSS_mod_pagespeed_console_css; |
| extern const char* HTML_mod_pagespeed_console_body; |
| |
| namespace { |
| |
| const char kStatisticsHandler[] = "mod_pagespeed_statistics"; |
| const char kTempStatisticsGraphsHandler[] = |
| "mod_pagespeed_temp_statistics_graphs"; |
| const char kConsoleHandler[] = "pagespeed_console"; |
| const char kGlobalStatisticsHandler[] = "mod_pagespeed_global_statistics"; |
| const char kMessageHandler[] = "mod_pagespeed_message"; |
| const char kLogRequestHeadersHandler[] = "mod_pagespeed_log_request_headers"; |
| const char kGenerateResponseWithOptionsHandler[] = |
| "mod_pagespeed_response_options_handler"; |
| const char kResourceUrlNote[] = "mod_pagespeed_resource"; |
| const char kResourceUrlNo[] = "<NO>"; |
| const char kResourceUrlYes[] = "<YES>"; |
| |
| // Set the maximum size we allow for processing a POST body. The limit of 128k |
| // is based on a best guess for the maximum size of beacons required for |
| // critical CSS. |
| // TODO(jud): Factor this out, potentially into an option, and pass the value to |
| // any filters using beacons with POST requests (CriticalImagesBeaconFilter for |
| // instance). |
| const size_t kMaxPostSizeBytes = 131072; |
| |
| // Links an apache request_rec* to an AsyncFetch, adding the ability to |
| // block based on a condition variable. |
| // |
| // TODO(jmarantz): consider refactoring to share code with ProxyFetch, though |
| // this implementation does not imply any rewriting; it's just a caching |
| // proxy. |
| class ApacheProxyFetch : public AsyncFetchUsingWriter { |
| public: |
| ApacheProxyFetch(const GoogleString& mapped_url, ThreadSystem* thread_system, |
| RewriteDriver* driver, request_rec* request) |
| : AsyncFetchUsingWriter(driver->request_context(), &apache_writer_), |
| mapped_url_(mapped_url), |
| apache_writer_(request), |
| driver_(driver), |
| mutex_(thread_system->NewMutex()), |
| condvar_(mutex_->NewCondvar()), |
| done_(false), |
| handle_error_(true), |
| status_ok_(false) { |
| // We are proxying content, and the caching in the http configuration |
| // should not apply; we want to use the caching from the proxy. |
| apache_writer_.set_disable_downstream_header_filters(true); |
| apache_writer_.set_strip_cookies(true); |
| ApacheRequestToRequestHeaders(*request, request_headers()); |
| request_headers()->RemoveAll(HttpAttributes::kCookie); |
| request_headers()->RemoveAll(HttpAttributes::kCookie2); |
| } |
| |
| virtual ~ApacheProxyFetch() { |
| } |
| |
| // When used for in-place resource optimization in mod_pagespeed, we have |
| // disabled fetching resources that are not in cache, otherwise we may wind |
| // up doing a loopback fetch to the same Apache server. So the |
| // CacheUrlAsyncFetcher will return a 501 or 404 but we do not want to |
| // send that to the client. So for ipro we suppress resporting errors |
| // in this flow. |
| // |
| // TODO(jmarantz): consider allowing serf fetches in ipro when running as |
| // a reverse-proxy. |
| void set_handle_error(bool x) { handle_error_ = x; } |
| |
| virtual void HandleHeadersComplete() { |
| int status_code = response_headers()->status_code(); |
| status_ok_ = (status_code != 0) && (status_code < 400); |
| if (handle_error_ || status_ok_) { |
| // TODO(sligocki): Add X-Mod-Pagespeed header. |
| if (content_length_known()) { |
| apache_writer_.set_content_length(content_length()); |
| } |
| apache_writer_.OutputHeaders(response_headers()); |
| } |
| } |
| |
| virtual void HandleDone(bool success) { |
| ScopedMutex lock(mutex_.get()); |
| done_ = true; |
| if (status_ok_ && !success) { |
| driver_->message_handler()->Message( |
| kWarning, |
| "Response for url %s issued with status %d %s but " |
| "failed to complete.", |
| mapped_url_.c_str(), response_headers()->status_code(), |
| response_headers()->reason_phrase()); |
| } |
| condvar_->Signal(); |
| } |
| |
| // Blocks indefinitely waiting for the proxy fetch to complete. |
| // Every 'blocking_fetch_timeout_ms', log a message so that if |
| // we get stuck there's noise in the logs, but we don't expect this |
| // to happen because underlying fetch/cache timeouts should fire. |
| // |
| // Note that enforcing a timeout in this function makes debugging |
| // difficult. |
| void Wait() { |
| int64 timeout_ms = driver_->options()->blocking_fetch_timeout_ms(); |
| ServerContext* server_context = driver_->server_context(); |
| MessageHandler* handler = server_context->message_handler(); |
| Timer* timer = server_context->timer(); |
| int64 start_ms = timer->NowMs(); |
| { |
| ScopedMutex lock(mutex_.get()); |
| while (!done_) { |
| condvar_->TimedWait(timeout_ms); |
| if (!done_) { |
| int64 elapsed_ms = timer->NowMs() - start_ms; |
| handler->Message( |
| kWarning, "Waiting for in-place ProxyFetch on URL %s for %g sec", |
| mapped_url_.c_str(), elapsed_ms / 1000.0); |
| } |
| } |
| } |
| } |
| |
| bool status_ok() const { return status_ok_; } |
| |
| private: |
| GoogleString mapped_url_; |
| ApacheWriter apache_writer_; |
| RewriteDriver* driver_; |
| scoped_ptr<ThreadSystem::CondvarCapableMutex> mutex_; |
| scoped_ptr<ThreadSystem::Condvar> condvar_; |
| bool done_; |
| bool handle_error_; |
| bool status_ok_; |
| |
| DISALLOW_COPY_AND_ASSIGN(ApacheProxyFetch); |
| }; |
| |
| bool IsCompressibleContentType(const char* content_type) { |
| if (content_type == NULL) { |
| return false; |
| } |
| GoogleString type = content_type; |
| size_t separator_idx = type.find(";"); |
| if (separator_idx != GoogleString::npos) { |
| type.erase(separator_idx); |
| } |
| |
| bool res = false; |
| if (type.find("text/") == 0) { |
| res = true; |
| } else if (type.find("application/") == 0) { |
| if (type.find("javascript") != type.npos || |
| type.find("json") != type.npos || |
| type.find("ecmascript") != type.npos || |
| type == "application/livescript" || |
| type == "application/js" || |
| type == "application/jscript" || |
| type == "application/x-js" || |
| type == "application/xhtml+xml" || |
| type == "application/xml") { |
| res = true; |
| } |
| } |
| |
| return res; |
| } |
| |
| // Default handler when the file is not found |
| void instaweb_404_handler(const GoogleString& url, request_rec* request) { |
| request->status = HTTP_NOT_FOUND; |
| ap_set_content_type(request, "text/html; charset=utf-8"); |
| ap_rputs("<html><head><title>Not Found</title></head>", request); |
| ap_rputs("<body><h1>Apache server with mod_pagespeed</h1>OK", request); |
| ap_rputs("<hr>NOT FOUND:", request); |
| ap_rputs(url.c_str(), request); |
| ap_rputs("</body></html>", request); |
| } |
| |
| void send_out_headers_and_body(request_rec* request, |
| const ResponseHeaders& response_headers, |
| const GoogleString& output) { |
| // We always disable downstream header filters when sending out |
| // pagespeed resources, since we've captured them in the origin fetch. |
| ResponseHeadersToApacheRequest(response_headers, |
| true, // Disable downstream header filters. |
| request); |
| if (response_headers.status_code() == HttpStatus::kOK && |
| IsCompressibleContentType(request->content_type)) { |
| // Make sure compression is enabled for this response. |
| ap_add_output_filter("DEFLATE", NULL, request, request->connection); |
| } |
| |
| // Recompute the content-length, because the content may have changed. |
| ap_set_content_length(request, output.size()); |
| // Send the body |
| ap_rwrite(output.c_str(), output.size(), request); |
| } |
| |
| // Evaluate custom_options based upon global_options, directory-specific |
| // options and query-param/request-header options. Returns computed |
| // custom options (or NULL if global_options should be used). |
| // |
| // Caller takes ownership of options. |
| RewriteOptions* get_custom_options(ApacheServerContext* server_context, |
| request_rec* request, |
| GoogleUrl* gurl, |
| RequestHeaders* request_headers, |
| RewriteOptions* global_options) { |
| // Set directory specific options. These will be the options for the |
| // directory the resource is in, which under some configurations will be |
| // different from the options for the directory that the referencing html is |
| // in. This can lead to us using different options here when regenerating |
| // the resource than would be used if the resource were generated as part of |
| // a rewrite kicked off by a request for the referencing html file. This is |
| // hard to fix, so instead we're documenting that you must make sure the |
| // configuration for your resources matches the configuration for your html |
| // files. |
| RewriteOptions* custom_options = NULL; |
| ApacheConfig* directory_options = static_cast<ApacheConfig*> |
| ap_get_module_config(request->per_dir_config, &pagespeed_module); |
| if ((directory_options != NULL) && directory_options->modified()) { |
| custom_options = server_context->apache_factory()->NewRewriteOptions(); |
| custom_options->Merge(*global_options); |
| directory_options->Freeze(); |
| custom_options->Merge(*directory_options); |
| } |
| |
| // TODO(sligocki): Move inside PSOL. |
| // Merge in query-param or header-based options. |
| // Note: We do not generally get response headers in the resource flow, |
| // so NULL is passed in instead. |
| ServerContext::OptionsBoolPair query_options_success = |
| server_context->GetQueryOptions(gurl, request_headers, NULL); |
| if (!query_options_success.second) { |
| server_context->message_handler()->Message( |
| kWarning, "Invalid PageSpeed query params or headers for " |
| "request %s. Serving with default options.", gurl->spec_c_str()); |
| } |
| if (query_options_success.first != NULL) { |
| if (custom_options == NULL) { |
| custom_options = server_context->apache_factory()->NewRewriteOptions(); |
| custom_options->Merge(*global_options); |
| } |
| custom_options->Merge(*query_options_success.first); |
| delete query_options_success.first; |
| // Don't run any experiments if we're handling a customized request. |
| custom_options->set_running_experiment(false); |
| } |
| |
| return custom_options; |
| } |
| |
| // Handle url as .pagespeed. rewritten resource. |
| void handle_as_pagespeed_resource(const RequestContextPtr& request_context, |
| GoogleUrl* gurl, |
| const GoogleString& url, |
| RewriteOptions* custom_options, |
| ApacheServerContext* server_context, |
| RequestHeaders* request_headers, |
| request_rec* request) { |
| RewriteDriver* driver = ResourceFetch::GetDriver( |
| *gurl, custom_options, server_context, request_context); |
| |
| MessageHandler* message_handler = server_context->message_handler(); |
| message_handler->Message(kInfo, "Fetching resource %s...", url.c_str()); |
| |
| GoogleString output; // TODO(jmarantz): Quit buffering resource output. |
| StringWriter writer(&output); |
| |
| SyncFetcherAdapterCallback* callback = new SyncFetcherAdapterCallback( |
| server_context->thread_system(), &writer, request_context); |
| callback->SetRequestHeadersTakingOwnership(request_headers); |
| |
| if (ResourceFetch::BlockingFetch(*gurl, server_context, driver, callback)) { |
| ResponseHeaders* response_headers = callback->response_headers(); |
| // TODO(sligocki): Check that this is already done in ResourceFetch |
| // and remove redundant setting here. |
| response_headers->SetDate(server_context->timer()->NowMs()); |
| // ResourceFetch adds X-Page-Speed header, old mod_pagespeed code |
| // did not. For now, we remove that header for consistency. |
| // TODO(sligocki): Consistently use X- headers in MPS and PSOL. |
| // I think it would be good to change X-Mod-Pagespeed -> X-Page-Speed |
| // and use that for all HTML and resource requests. |
| response_headers->RemoveAll(kPageSpeedHeader); |
| message_handler->Message(kInfo, "Fetch succeeded for %s, status=%d", |
| url.c_str(), response_headers->status_code()); |
| send_out_headers_and_body(request, *response_headers, output); |
| } else { |
| server_context->ReportResourceNotFound(url, request); |
| } |
| |
| callback->Release(); |
| } |
| |
| // Handle url with In Place Resource Optimization (IPRO) flow. |
| bool handle_as_in_place(const RequestContextPtr& request_context, |
| GoogleUrl* gurl, |
| const GoogleString& url, |
| RewriteOptions* custom_options, |
| ApacheServerContext* server_context, |
| RequestHeaders* owned_headers, |
| request_rec* request) { |
| scoped_ptr<RequestHeaders> request_headers(owned_headers); |
| bool handled = false; |
| |
| RewriteDriver* driver = ResourceFetch::GetDriver( |
| *gurl, custom_options, server_context, request_context); |
| |
| MessageHandler* message_handler = server_context->message_handler(); |
| message_handler->Message(kInfo, "Trying to serve rewritten resource " |
| "in-place: %s", url.c_str()); |
| |
| ApacheProxyFetch fetch( |
| url, server_context->thread_system(), driver, request); |
| fetch.set_handle_error(false); |
| driver->FetchInPlaceResource(*gurl, false /* proxy_mode */, &fetch); |
| |
| fetch.Wait(); |
| if (fetch.status_ok()) { |
| server_context->rewrite_stats()->ipro_served()->Add(1); |
| message_handler->Message(kInfo, "Serving rewritten resource in-place: %s", |
| url.c_str()); |
| handled = true; |
| } else if (fetch.response_headers()->status_code() == |
| CacheUrlAsyncFetcher::kNotInCacheStatus) { |
| server_context->rewrite_stats()->ipro_not_in_cache()->Add(1); |
| message_handler->Message(kInfo, "Could not rewrite resource in-place " |
| "because URL is not in cache: %s", |
| url.c_str()); |
| // This URL was not found in cache (neither the input resource nor |
| // a ResourceNotCacheable entry) so we need to get it into cache |
| // (or at least a note that it cannot be cached stored there). |
| // We do that using an Apache output filter. |
| InPlaceResourceRecorder* recorder = new InPlaceResourceRecorder( |
| url, request_headers.release(), driver->options()->respect_vary(), |
| server_context->http_cache(), server_context->statistics(), |
| message_handler); |
| ap_add_output_filter(kModPagespeedInPlaceFilterName, recorder, |
| request, request->connection); |
| ap_add_output_filter(kModPagespeedInPlaceCheckHeadersName, recorder, |
| request, request->connection); |
| } else { |
| server_context->rewrite_stats()->ipro_not_rewritable()->Add(1); |
| message_handler->Message(kInfo, "Could not rewrite resource in-place: %s", |
| url.c_str()); |
| } |
| driver->Cleanup(); |
| |
| return handled; |
| } |
| |
| bool handle_as_proxy(ApacheServerContext* server_context, |
| request_rec* request, |
| const RequestContextPtr& request_context, |
| GoogleUrl* gurl, |
| RewriteOptions* options, |
| scoped_ptr<RewriteOptions>* custom_options) { |
| bool handled = false; |
| // Consider Issue 609: proxying an external CSS file via MapProxyDomain, and |
| // the CSS file makes reference to a font file, which mod_pagespeed does not |
| // know anything about, and does not know how to absolutify. We need to |
| // handle the request for the external font file here, even if IPRO (in place |
| // resource optimization) is off. |
| bool is_proxy = false; |
| GoogleString mapped_url; |
| if (options->domain_lawyer()->MapOriginUrl(*gurl, &mapped_url, &is_proxy) && |
| is_proxy) { |
| RewriteDriver* driver = ResourceFetch::GetDriver( |
| *gurl, custom_options->release(), server_context, request_context); |
| ApacheProxyFetch apache_proxy_fetch( |
| mapped_url, server_context->thread_system(), driver, request); |
| driver->SetRequestHeaders(*apache_proxy_fetch.request_headers()); |
| server_context->proxy_fetch_factory()->StartNewProxyFetch( |
| mapped_url, &apache_proxy_fetch, driver, NULL, NULL); |
| apache_proxy_fetch.Wait(); |
| handled = true; |
| } |
| |
| return handled; |
| } |
| |
| // Determines whether the url can be handled as a mod_pagespeed or in-place |
| // optimized resource, and handles it, returning true. Success status is |
| // written to the status code in the response headers. |
| bool handle_as_resource(ApacheServerContext* server_context, |
| request_rec* request, |
| GoogleUrl* gurl, |
| const GoogleString& url) { |
| if (!gurl->is_valid()) { |
| return false; |
| } |
| |
| // Flushing the cache mutates global_options, so this has to happen before we |
| // construct the options that we use to decide whether IPRO is enabled. |
| server_context->FlushCacheIfNecessary(); |
| |
| ApacheRequestContext* apache_request_context = new ApacheRequestContext( |
| server_context->thread_system()->NewMutex(), |
| server_context->timer(), |
| request); |
| apache_request_context->set_url(url); |
| RequestContextPtr request_context(apache_request_context); |
| bool using_spdy = request_context->using_spdy(); |
| RewriteOptions* global_options = server_context->global_options(); |
| if (using_spdy && (server_context->SpdyConfig() != NULL)) { |
| global_options = server_context->SpdyConfig(); |
| } |
| |
| scoped_ptr<RequestHeaders> request_headers(new RequestHeaders); |
| // Filter limited request headers into backend fetch. |
| // TODO(sligocki): Put this filtering in ResourceFetch and instead use: |
| // ApacheRequestToRequestHeaders(*request, request_headers.get()); |
| for (int i = 0, n = arraysize(RewriteDriver::kPassThroughRequestAttributes); |
| i < n; ++i) { |
| const char* value = apr_table_get( |
| request->headers_in, |
| RewriteDriver::kPassThroughRequestAttributes[i]); |
| if (value != NULL) { |
| request_headers->Add( |
| RewriteDriver::kPassThroughRequestAttributes[i], value); |
| } |
| } |
| |
| scoped_ptr<RewriteOptions> custom_options(get_custom_options( |
| server_context, request, gurl, request_headers.get(), global_options)); |
| |
| RewriteOptions* options = custom_options.get(); // Options for this request. |
| if (custom_options.get() == NULL) { |
| options = global_options; |
| } |
| |
| // Finally, do the actual handling. |
| bool handled = false; |
| if (server_context->IsPagespeedResource(*gurl)) { |
| handled = true; |
| handle_as_pagespeed_resource(request_context, gurl, url, |
| custom_options.release(), server_context, |
| request_headers.release(), request); |
| } else if (handle_as_proxy(server_context, request, request_context, gurl, |
| options, &custom_options)) { |
| handled = true; |
| } else if (options->in_place_rewriting_enabled() && options->enabled() && |
| options->IsAllowed(url)) { |
| handled = handle_as_in_place(request_context, gurl, url, |
| custom_options.release(), server_context, |
| request_headers.release(), request); |
| } |
| |
| return handled; |
| } |
| |
| // Write response headers and send out headers and output, including the option |
| // for a custom Content-Type. |
| void write_handler_response(const StringPiece& output, |
| request_rec* request, |
| ContentType content_type, |
| const StringPiece& cache_control) { |
| ResponseHeaders response_headers; |
| response_headers.SetStatusAndReason(HttpStatus::kOK); |
| response_headers.set_major_version(1); |
| response_headers.set_minor_version(1); |
| |
| response_headers.Add(HttpAttributes::kContentType, content_type.mime_type()); |
| // http://msdn.microsoft.com/en-us/library/ie/gg622941(v=vs.85).aspx |
| // Script and styleSheet elements will reject responses with |
| // incorrect MIME types if the server sends the response header |
| // "X-Content-Type-Options: nosniff". This is a security feature |
| // that helps prevent attacks based on MIME-type confusion. |
| response_headers.Add("X-Content-Type-Options", "nosniff"); |
| AprTimer timer; |
| int64 now_ms = timer.NowMs(); |
| response_headers.SetDate(now_ms); |
| response_headers.SetLastModified(now_ms); |
| response_headers.Add(HttpAttributes::kCacheControl, cache_control); |
| send_out_headers_and_body(request, response_headers, output.as_string()); |
| } |
| |
| void write_handler_response(const StringPiece& output, request_rec* request) { |
| write_handler_response(output, request, |
| kContentTypeHtml, HttpAttributes::kNoCacheMaxAge0); |
| } |
| |
| // Returns request URL if it was a .pagespeed. rewritten resource URL. |
| // Otherwise returns NULL. Since other Apache modules can change request->uri, |
| // we stow the original request URL in a note. This method reads that note |
| // and thus should return the URL that the browser actually requested (rather |
| // than a mod_rewrite altered URL). |
| const char* get_instaweb_resource_url(request_rec* request, |
| ApacheServerContext* server_context) { |
| const char* resource = apr_table_get(request->notes, kResourceUrlNote); |
| |
| // If our translate_name hook, save_url_hook, failed to run because some |
| // other module's translate_hook returned OK first, then run it now. The |
| // main reason we try to do this early is to save our URL before mod_rewrite |
| // mutates it. |
| if (resource == NULL) { |
| save_url_in_note(request, server_context); |
| resource = apr_table_get(request->notes, kResourceUrlNote); |
| } |
| |
| if (resource != NULL && strcmp(resource, kResourceUrlNo) == 0) { |
| return NULL; |
| } |
| |
| const char* url = apr_table_get(request->notes, kPagespeedOriginalUrl); |
| return url; |
| } |
| |
| // Used by log_request_headers for testing only. |
| struct HeaderLoggingData { |
| HeaderLoggingData(StringWriter* writer_in, MessageHandler* handler_in) |
| : writer(writer_in), handler(handler_in) {} |
| StringWriter* writer; |
| MessageHandler* handler; |
| }; |
| |
| // Helper function to support the LogRequestHeadersHandler. Called once for |
| // each header to write header data in a form suitable for javascript inlining. |
| // Used only for tests. |
| int log_request_headers(void* logging_data, |
| const char* key, const char* value) { |
| HeaderLoggingData* hld = static_cast<HeaderLoggingData*>(logging_data); |
| StringWriter* writer = hld->writer; |
| MessageHandler* handler = hld->handler; |
| |
| GoogleString escaped_key; |
| GoogleString escaped_value; |
| |
| EscapeToJsStringLiteral(key, false, &escaped_key); |
| EscapeToJsStringLiteral(value, false, &escaped_value); |
| |
| writer->Write("alert(\"", handler); |
| writer->Write(escaped_key, handler); |
| writer->Write("=", handler); |
| writer->Write(escaped_value, handler); |
| writer->Write("\");\n", handler); |
| |
| return 1; // Continue iteration. |
| } |
| |
| void instaweb_static_handler(request_rec* request, |
| ApacheServerContext* server_context) { |
| StaticAssetManager* static_asset_manager = |
| server_context->static_asset_manager(); |
| StringPiece request_uri_path = request->parsed_uri.path; |
| // Strip out the common prefix url before sending to StaticAssetManager. |
| StringPiece file_name = |
| request_uri_path.substr( |
| strlen(ApacheRewriteDriverFactory::kStaticAssetPrefix)); |
| StringPiece file_contents; |
| StringPiece cache_header; |
| ContentType content_type; |
| if (static_asset_manager->GetAsset( |
| file_name, &file_contents, &content_type, &cache_header)) { |
| write_handler_response(file_contents, request, content_type, cache_header); |
| } else { |
| server_context->ReportResourceNotFound(request->parsed_uri.path, request); |
| } |
| } |
| |
| // TODO(sligocki): This handler is currently unused, integrate this into |
| // the pagespeed_console. |
| apr_status_t instaweb_statistics_graphs_handler( |
| request_rec* request, ApacheConfig* config, |
| ApacheMessageHandler* message_handler) { |
| GoogleString output; |
| StringWriter writer(&output); |
| writer.Write("<!DOCTYPE html>" |
| "<title>mod_pagespeed console</title>", |
| message_handler); |
| writer.Write("<style>", message_handler); |
| writer.Write(CSS_mod_pagespeed_console_css, message_handler); |
| writer.Write("</style>", message_handler); |
| writer.Write(HTML_mod_pagespeed_console_body, message_handler); |
| writer.Write("<script>", message_handler); |
| if (config->statistics_logging_charts_js().size() > 0 && |
| config->statistics_logging_charts_css().size() > 0) { |
| writer.Write("var chartsOfflineJS = '", message_handler); |
| writer.Write(config->statistics_logging_charts_js(), message_handler); |
| writer.Write("';", message_handler); |
| writer.Write("var chartsOfflineCSS = '", message_handler); |
| writer.Write(config->statistics_logging_charts_css(), message_handler); |
| writer.Write("';", message_handler); |
| } else { |
| if (config->statistics_logging_charts_js().size() > 0 || |
| config->statistics_logging_charts_css().size() > 0) { |
| message_handler->Message(kWarning, "Using online Charts API."); |
| } |
| writer.Write("var chartsOfflineJS, chartsOfflineCSS;", message_handler); |
| } |
| writer.Write(JS_mod_pagespeed_console_js, message_handler); |
| writer.Write("</script>", message_handler); |
| write_handler_response(output, request); |
| return OK; |
| } |
| |
| apr_status_t instaweb_statistics_handler( |
| request_rec* request, ApacheServerContext* server_context, |
| ApacheRewriteDriverFactory* factory, MessageHandler* message_handler) { |
| bool general_stats_request = |
| (strcmp(request->handler, kStatisticsHandler) == 0); |
| bool global_stats_request = |
| (strcmp(request->handler, kGlobalStatisticsHandler) == 0); |
| |
| int64 start_time, end_time, granularity_ms; |
| std::set<GoogleString> var_titles; |
| if (general_stats_request && !factory->use_per_vhost_statistics()) { |
| global_stats_request = true; |
| } |
| |
| // Choose the correct statistics. |
| Statistics* statistics = global_stats_request ? |
| factory->statistics() : server_context->statistics(); |
| |
| QueryParams params; |
| params.Parse(request->args); |
| |
| // Parse various mode query params. |
| bool print_normal_config = params.Has("config"); |
| bool print_spdy_config = params.Has("spdy_config"); |
| |
| // JSON statistics handling is done only if we have a console logger. |
| bool json = false; |
| if (statistics->console_logger() != NULL) { |
| // Default values for start_time, end_time, and granularity_ms in case the |
| // query does not include these parameters. |
| start_time = 0; |
| end_time = server_context->timer()->NowMs(); |
| // Granularity is the difference in ms between data points. If it is not |
| // specified by the query, the default value is 3000 ms, the same as the |
| // default logging granularity. |
| granularity_ms = 3000; |
| for (int i = 0; i < params.size(); ++i) { |
| const GoogleString value = |
| (params.value(i) == NULL) ? "" : *params.value(i); |
| const char* name = params.name(i); |
| if (strcmp(name, "json") == 0) { |
| json = true; |
| } else if (strcmp(name, "start_time") == 0) { |
| StringToInt64(value, &start_time); |
| } else if (strcmp(name, "end_time") == 0) { |
| StringToInt64(value, &end_time); |
| } else if (strcmp(name, "var_titles") == 0) { |
| std::vector<StringPiece> variable_names; |
| SplitStringPieceToVector(value, ",", &variable_names, true); |
| for (size_t i = 0; i < variable_names.size(); ++i) { |
| var_titles.insert(variable_names[i].as_string()); |
| } |
| } else if (strcmp(name, "granularity") == 0) { |
| StringToInt64(value, &granularity_ms); |
| } |
| } |
| } else { |
| if (params.Has("json")) { |
| request->status = HTTP_NOT_FOUND; |
| ap_set_content_type(request, "text/html"); |
| ap_rputs("<p>console_logger must be enabled to use '?json' query " |
| "parameter.</p>", request); |
| return OK; |
| } |
| } |
| GoogleString output; |
| StringWriter writer(&output); |
| if (json) { |
| statistics->console_logger()->DumpJSON(var_titles, start_time, end_time, |
| granularity_ms, &writer, |
| message_handler); |
| } else { |
| // Generate some navigational links to the right to help |
| // our users get to other modes. |
| writer.Write( |
| "<div style='float:right'>View " |
| "<a href='?config'>Configuration</a>, " |
| "<a href='?spdy_config'>SPDY Configuration</a>, " |
| "<a href='?'>Statistics</a> " |
| "(<a href='?memcached'>with memcached Stats</a>). " |
| "</div>", |
| message_handler); |
| |
| // Only print stats or configuration, not both. |
| if (!print_normal_config && !print_spdy_config) { |
| writer.Write(global_stats_request ? |
| "Global Statistics" : "VHost-Specific Statistics", |
| message_handler); |
| |
| // Write <pre></pre> for Dump to keep good format. |
| writer.Write("<pre>", message_handler); |
| statistics->Dump(&writer, message_handler); |
| writer.Write("</pre>", message_handler); |
| statistics->RenderHistograms(&writer, message_handler); |
| |
| int flags = SystemCaches::kDefaultStatFlags; |
| if (global_stats_request) { |
| flags |= SystemCaches::kGlobalView; |
| } |
| |
| if (params.Has("memcached")) { |
| flags |= SystemCaches::kIncludeMemcached; |
| } |
| |
| GoogleString backend_stats; |
| factory->caches()->PrintCacheStats( |
| static_cast<SystemCaches::StatFlags>(flags), &backend_stats); |
| if (!backend_stats.empty()) { |
| HtmlKeywords::WritePre(backend_stats, &writer, message_handler); |
| } |
| } |
| |
| if (print_normal_config) { |
| writer.Write("Configuration:<br>", message_handler); |
| HtmlKeywords::WritePre(server_context->config()->OptionsToString(), |
| &writer, message_handler); |
| } |
| |
| if (print_spdy_config) { |
| ApacheConfig* spdy_config = server_context->SpdyConfig(); |
| if (spdy_config == NULL) { |
| writer.Write("SPDY-specific configuration missing, using default.", |
| message_handler); |
| } else { |
| writer.Write("SPDY-specific configuration:<br>", message_handler); |
| HtmlKeywords::WritePre(spdy_config->OptionsToString(), |
| &writer, message_handler); |
| } |
| } |
| } |
| |
| if (json) { |
| write_handler_response(output, request, |
| kContentTypeJson, HttpAttributes::kNoCacheMaxAge0); |
| } else { |
| write_handler_response(output, request); |
| } |
| return OK; |
| } |
| |
| // Append the query params from a request into data. This just parses the query |
| // params from a request URL. For parsing the query params from a POST body, use |
| // parse_body_from_post(). Return true if successful, otherwise, returns false |
| // and sets ret to the appropriate status. |
| bool parse_query_params(const request_rec* request, GoogleString* data, |
| apr_status_t* ret) { |
| // Add a dummy host (www.example.com) to the request URL to make it absolute |
| // so that GoogleUrl can be used for parsing. |
| GoogleUrl base("http://www.example.com"); |
| GoogleUrl url(base, request->unparsed_uri); |
| |
| if (!url.is_valid() || !url.has_query()) { |
| *ret = HTTP_BAD_REQUEST; |
| return false; |
| } |
| |
| url.Query().AppendToString(data); |
| return true; |
| } |
| |
| // Read the body from a POST request and append to data. Return true if |
| // successful, otherwise, returns false and sets ret to the appropriate status. |
| bool parse_body_from_post(const request_rec* request, GoogleString* data, |
| apr_status_t* ret) { |
| if (request->method_number != M_POST) { |
| *ret = HTTP_METHOD_NOT_ALLOWED; |
| return false; |
| } |
| |
| // Verify that the request has the correct content type for a form POST |
| // submission. Ideally, we could use request->content_type here, but that is |
| // coming back as NULL, even when the header was set correctly. |
| const char* content_type = apr_table_get(request->headers_in, |
| HttpAttributes::kContentType); |
| if (content_type == NULL) { |
| *ret = HTTP_BAD_REQUEST; |
| return false; |
| } |
| GoogleString mime_type; |
| GoogleString charset; |
| if (!ParseContentType(content_type, &mime_type, &charset)) { |
| *ret = HTTP_BAD_REQUEST; |
| return false; |
| } |
| if (!StringCaseEqual(mime_type, "application/x-www-form-urlencoded")) { |
| *ret = HTTP_BAD_REQUEST; |
| return false; |
| } |
| |
| // Setup the number of bytes to try to read from the POST body. If the |
| // Content-Length header is set, use it, otherwise try to pull up to |
| // kMaxPostSizeBytes. |
| int content_len = kMaxPostSizeBytes; |
| const char* content_len_str = apr_table_get(request->headers_in, |
| HttpAttributes::kContentLength); |
| if (content_len_str != NULL) { |
| if (!StringToInt(content_len_str, &content_len)) { |
| *ret = HTTP_BAD_REQUEST; |
| return false; |
| } |
| if (static_cast<size_t>(content_len) > kMaxPostSizeBytes) { |
| *ret = HTTP_REQUEST_ENTITY_TOO_LARGE; |
| return false; |
| } |
| } |
| |
| // Parse the incoming brigade and add the contents to data. In apache 2.4 we |
| // could just use ap_parse_form_data. See the example at |
| // http://httpd.apache.org/docs/2.4/developer/modguide.html#snippets. |
| apr_bucket_brigade* bbin = |
| apr_brigade_create(request->pool, request->connection->bucket_alloc); |
| |
| bool eos = false; |
| |
| while (!eos) { |
| apr_status_t rv = ap_get_brigade(request->input_filters, bbin, |
| AP_MODE_READBYTES, APR_BLOCK_READ, |
| content_len); |
| if (rv != APR_SUCCESS) { |
| // Form input read failed. |
| *ret = HTTP_INTERNAL_SERVER_ERROR; |
| return false; |
| } |
| for (apr_bucket* bucket = APR_BRIGADE_FIRST(bbin); |
| bucket != APR_BRIGADE_SENTINEL(bbin); |
| bucket = APR_BUCKET_NEXT(bucket) ) { |
| if (!APR_BUCKET_IS_METADATA(bucket)) { |
| const char* buf = NULL; |
| size_t bytes = 0; |
| rv = apr_bucket_read(bucket, &buf, &bytes, APR_BLOCK_READ); |
| if (rv != APR_SUCCESS) { |
| *ret = HTTP_INTERNAL_SERVER_ERROR; |
| return false; |
| } |
| if (data->length() + bytes > kMaxPostSizeBytes) { |
| *ret = HTTP_REQUEST_ENTITY_TOO_LARGE; |
| return false; |
| } |
| data->append(buf, bytes); |
| } else if (APR_BUCKET_IS_EOS(bucket)) { |
| eos = true; |
| break; |
| } |
| } |
| apr_brigade_cleanup(bbin); |
| } |
| |
| // No need to modify ret as it is only used if reading the POST failed. |
| return true; |
| } |
| |
| apr_status_t instaweb_beacon_handler(request_rec* request, |
| ApacheServerContext* server_context) { |
| GoogleString data; |
| apr_status_t ret = DECLINED; |
| if (request->method_number == M_GET) { |
| if (!parse_query_params(request, &data, &ret)) { |
| return ret; |
| } |
| } else if (request->method_number == M_POST) { |
| GoogleString query_param_data, post_data; |
| // Even if the beacon is a POST, the originating url should be in the query |
| // params, not the POST body. |
| if (!parse_query_params(request, &query_param_data, &ret)) { |
| return ret; |
| } |
| if (!parse_body_from_post(request, &post_data, &ret)) { |
| return ret; |
| } |
| StrAppend(&data, query_param_data, "&", post_data); |
| } else { |
| return HTTP_METHOD_NOT_ALLOWED; |
| } |
| RequestContextPtr request_context(new ApacheRequestContext( |
| server_context->thread_system()->NewMutex(), |
| server_context->timer(), |
| request)); |
| StringPiece user_agent = apr_table_get(request->headers_in, |
| HttpAttributes::kUserAgent); |
| server_context->HandleBeacon(data, user_agent, request_context); |
| apr_table_set(request->headers_out, HttpAttributes::kCacheControl, |
| HttpAttributes::kNoCacheMaxAge0); |
| return HTTP_NO_CONTENT; |
| } |
| |
| bool IsBeaconUrl(const RewriteOptions::BeaconUrl& beacons, |
| const GoogleUrl& gurl) { |
| // Check if the full path without query parameters equals the beacon URL, |
| // either the http or https version (we're too lazy to check specifically). |
| // This handles both GETs, which include query parameters, and POSTs, |
| // which will only have the originating url in the query params. |
| if (!gurl.is_valid()) { |
| return false; |
| } |
| // Ignore query params in the beacon URLs. Normally the beacon URL won't have |
| // a query param, but it could have been added using ModPagespeedBeaconUrl. |
| return (gurl.PathSansQuery() == beacons.http_in || |
| gurl.PathSansQuery() == beacons.https_in); |
| } |
| |
| } // namespace |
| |
| bool is_pagespeed_subrequest(request_rec* request) { |
| StringPiece user_agent = apr_table_get(request->headers_in, |
| HttpAttributes::kUserAgent); |
| return (user_agent.find(kModPagespeedSubrequestUserAgent) != user_agent.npos); |
| } |
| |
| apr_status_t instaweb_handler(request_rec* request) { |
| apr_status_t ret = DECLINED; |
| ApacheServerContext* server_context = |
| InstawebContext::ServerContextFromServerRec(request->server); |
| ApacheConfig* config = server_context->config(); |
| // Escape ASAP if we're in unplugged mode. |
| if (config->unplugged()) { |
| return DECLINED; |
| } |
| |
| ApacheRewriteDriverFactory* factory = server_context->apache_factory(); |
| ApacheMessageHandler* message_handler = factory->apache_message_handler(); |
| StringPiece request_handler_str = request->handler; |
| |
| // mod_pagespeed_statistics or mod_pagespeed_global_statistics. |
| if (request_handler_str == kStatisticsHandler || |
| request_handler_str == kGlobalStatisticsHandler) { |
| ret = instaweb_statistics_handler(request, server_context, factory, |
| message_handler); |
| |
| // TODO(sligocki): Merge this into kConsoleHandler. |
| } else if (request_handler_str == kTempStatisticsGraphsHandler) { |
| ret = instaweb_statistics_graphs_handler(request, config, message_handler); |
| |
| } else if (request_handler_str == kConsoleHandler) { |
| GoogleString output; |
| StringWriter writer(&output); |
| ConsoleHandler(config, &writer, message_handler); |
| write_handler_response(output, request); |
| ret = OK; |
| |
| } else if (request_handler_str == kMessageHandler) { |
| // Request for page /mod_pagespeed_message. |
| GoogleString html, log; |
| StringWriter html_writer(&html), log_writer(&log); |
| if (message_handler->Dump(&log_writer)) { |
| // Write pre-tag for Dump to keep good format. |
| HtmlKeywords::WritePre(log, &html_writer, message_handler); |
| } else { |
| html = |
| "Writing to mod_pagespeed_message failed. \n" |
| "Please check if it's enabled in pagespeed.conf.\n"; |
| } |
| write_handler_response(html, request); |
| ret = OK; |
| |
| } else if (request_handler_str == kLogRequestHeadersHandler) { |
| // For testing CustomFetchHeader. |
| GoogleString output; |
| StringWriter writer(&output); |
| HeaderLoggingData header_logging_data(&writer, message_handler); |
| apr_table_do(&log_request_headers, &header_logging_data, |
| request->headers_in, NULL); |
| |
| write_handler_response(output, request, kContentTypeJavascript, "public"); |
| ret = OK; |
| |
| } else if (strcmp(request->handler, kGenerateResponseWithOptionsHandler) == 0 |
| && request->uri != NULL) { |
| // This handler is only needed for apache_system_test. It adds headers to |
| // headers_out and/or err_headers_out to test handling of parameters in |
| // those resources. |
| if (strstr(request->parsed_uri.query, "headers_out") != NULL) { |
| apr_table_add(request->headers_out, "PageSpeed", "off"); |
| } else if (strstr(request->parsed_uri.query, "headers_errout") != NULL) { |
| apr_table_add(request->err_headers_out, "PageSpeed", "off"); |
| } else if (strstr(request->parsed_uri.query, "headers_override") != NULL) { |
| apr_table_add(request->headers_out, "PageSpeed", "off"); |
| apr_table_add(request->headers_out, "PageSpeedFilters", |
| "-remove_comments"); |
| apr_table_add(request->err_headers_out, "PageSpeed", "on"); |
| apr_table_add(request->err_headers_out, "PageSpeedFilters", |
| "+remove_comments"); |
| } else if (strstr(request->parsed_uri.query, "headers_combine") != NULL) { |
| apr_table_add(request->headers_out, "PageSpeed", "on"); |
| apr_table_add(request->err_headers_out, "PageSpeedFilters", |
| "+remove_comments"); |
| } |
| |
| } else { |
| const char* url = InstawebContext::MakeRequestUrl(*config, request); |
| // Do not try to rewrite our own sub-request. |
| if (url != NULL && !is_pagespeed_subrequest(request)) { |
| GoogleUrl gurl(url); |
| // For the beacon accept any method; for all others only allow GETs. |
| if (IsBeaconUrl(server_context->global_options()->beacon_url(), gurl)) { |
| ret = instaweb_beacon_handler(request, server_context); |
| } else if (request->method_number != M_GET) { |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, request, |
| "Not rewriting non-GET %d of %s", |
| request->method_number, gurl.spec_c_str()); |
| } else if (!gurl.is_valid()) { |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, request, |
| "Ignoring invalid URL: %s", gurl.spec_c_str()); |
| } else if (gurl.PathSansLeaf() == |
| ApacheRewriteDriverFactory::kStaticAssetPrefix) { |
| instaweb_static_handler(request, server_context); |
| ret = OK; |
| } else if (handle_as_resource(server_context, request, &gurl, url)) { |
| ret = OK; |
| } |
| } |
| |
| if (ret != OK && (config->slurping_enabled() || config->test_proxy())) { |
| SlurpUrl(server_context, request); |
| ret = OK; |
| } |
| } |
| return ret; |
| } |
| |
| // This translator must be inserted into the translate_name chain |
| // prior to mod_rewrite. By saving the original URL in a |
| // request->notes and using that in our handler, we prevent |
| // mod_rewrite from borking URL names that need to be handled by |
| // mod_pagespeed. |
| // |
| // This hack seems to be the most robust way to immunize mod_pagespeed |
| // from when mod_rewrite rewrites the URL. We still need mod_rewrite |
| // to do required complex processing of the filename (e.g. prepending |
| // the DocumentRoot) so mod_authz_host is happy, so we return DECLINED |
| // even for mod_pagespeed resources. |
| // |
| // One alternative strategy is to return OK to bypass mod_rewrite |
| // entirely, but then we'd have to duplicate the functionality in |
| // mod_rewrite that prepends the DocumentRoot, which is itself |
| // complex. See mod_rewrite.c:hook_fixup(), and look for calls to |
| // ap_document_root(). |
| // |
| // Or we could return DECLINED but set a note "mod_rewrite_rewritten" |
| // to try to convince mod_rewrite to leave our URLs alone, which seems |
| // fragile as that's an internal string literal in mod_rewrite.c and |
| // is not documented anywhere. |
| // |
| // Another strategy is to return OK but leave request->filename NULL. |
| // In that case, the server kernel generates an ominious 'info' |
| // message: |
| // |
| // [info] [client ::1] Module bug? Request filename is missing for URI |
| // /mod_pagespeed_statistics |
| // |
| // This is generated by httpd/src/server/request.c line 486, and right |
| // above that is this comment: |
| // |
| // "OK" as a response to a real problem is not _OK_, but to |
| // allow broken modules to proceed, we will permit the |
| // not-a-path filename to pass the following two tests. This |
| // behavior may be revoked in future versions of Apache. We |
| // still must catch it later if it's heading for the core |
| // handler. Leave INFO notes here for module debugging. |
| // |
| // It seems like the simplest, most robust approach is to squirrel |
| // away the original URL *before* mod_rewrite sees it in |
| // kPagespeedOriginalUrl "mod_pagespeed_url" and use *that* rather than |
| // request->unparsed_uri (which mod_rewrite might have mangled) when |
| // procesing the request. |
| // |
| // Additionally we store whether or not this request is a pagespeed |
| // resource or not in kResourceUrlNote. |
| apr_status_t save_url_hook(request_rec *request) { |
| ApacheServerContext* server_context = |
| InstawebContext::ServerContextFromServerRec(request->server); |
| return save_url_in_note(request, server_context); |
| } |
| |
| apr_status_t save_url_in_note(request_rec *request, |
| ApacheServerContext* server_context) { |
| // Escape ASAP if we're in unplugged mode. |
| if (server_context->config()->unplugged()) { |
| return DECLINED; |
| } |
| |
| // This call to MakeRequestUrl() not only returns the url but also |
| // saves it for future use so that if another module changes the |
| // url in the request, we still have the original one. |
| const char* url = InstawebContext::MakeRequestUrl( |
| *server_context->global_options(), request); |
| GoogleUrl gurl(url); |
| |
| bool bypass_mod_rewrite = false; |
| if (gurl.is_valid()) { |
| // Note: We cannot use request->handler because it may not be set yet :( |
| // TODO(sligocki): Make this robust to custom statistics and beacon URLs. |
| StringPiece leaf = gurl.LeafSansQuery(); |
| if (leaf == kStatisticsHandler || leaf == kConsoleHandler || |
| leaf == kGlobalStatisticsHandler || leaf == kMessageHandler || |
| gurl.PathSansLeaf() == ApacheRewriteDriverFactory::kStaticAssetPrefix || |
| IsBeaconUrl(server_context->global_options()->beacon_url(), gurl) || |
| server_context->IsPagespeedResource(gurl)) { |
| bypass_mod_rewrite = true; |
| } |
| } |
| |
| if (bypass_mod_rewrite) { |
| apr_table_set(request->notes, kResourceUrlNote, kResourceUrlYes); |
| } else { |
| // Leave behind a note for non-instaweb requests that says that |
| // our handler got called and we decided to pass. This gives us |
| // one final chance at serving resources in the presence of a |
| // module that intercepted 'translate_name' before mod_pagespeed. |
| // The absence of this marker indicates that translate_name did |
| // not get a chance to run, and thus we should try to look at |
| // the URI directly. |
| apr_table_set(request->notes, kResourceUrlNote, kResourceUrlNo); |
| } |
| return DECLINED; |
| } |
| |
| // Override core_map_to_storage for pagespeed resources. |
| apr_status_t instaweb_map_to_storage(request_rec* request) { |
| if (request->proxyreq == PROXYREQ_REVERSE) { |
| // If Apache is acting as a reverse proxy for this request there is no |
| // point in walking the directory because it doesn't apply to this |
| // server's htdocs tree, it applies to the server we are proxying to. |
| // This can result in it raising a 403 because some path doesn't exist. |
| // Note that experimenting shows that it doesn't matter if we return OK |
| // or DECLINED here, at least with URLs that aren't overly long; also, |
| // we actually fetch the DECODED URL (no .pagespeed. etc) from the proxy |
| // server and we rewrite it ourselves. |
| return DECLINED; |
| } |
| |
| if (request->filename == NULL) { |
| // We set filename to NULL below, and it appears other modules do too |
| // (the WebSphere plugin for example; see issue 610), so to prevent a |
| // dereference of NULL. |
| return DECLINED; |
| } |
| |
| ApacheServerContext* server_context = |
| InstawebContext::ServerContextFromServerRec(request->server); |
| if (server_context->config()->unplugged()) { |
| // If we're in unplugged mode then none of our hooks apply so escape ASAP. |
| return DECLINED; |
| } |
| |
| if (get_instaweb_resource_url(request, server_context) == NULL) { |
| return DECLINED; |
| } |
| |
| // core_map_to_storage does at least two things: |
| // 1) checks filename length limits |
| // 2) determines directory specific options |
| // We want (2) but not (1). If we simply return OK we will keep |
| // core_map_to_storage from running and let through our long filenames but |
| // resource requests that require regeneration will not respect directory |
| // specific options. |
| // |
| // To fix this we need to be more dependent on apache internals than we |
| // would like. core_map_to_storage always calls ap_directory_walk(request), |
| // which does both (1) and (2) and appears to work entirely off of |
| // request->filename. But ap_directory_walk doesn't care whether the last |
| // request->segment of the path actually exists. So if we change the |
| // request->filename from something like: |
| // /var/www/path/to/LEAF_WHICH_MAY_BE_HUGE.pagespeed.FILTER.HASH.EXT |
| // to: |
| // /var/www/path/to/A |
| // then we will bypass the filename length limit without harming the load of |
| // directory specific options. |
| // |
| // So: modify request->filename in place to cut it off after the last '/' |
| // character and replace the whole leaf with 'A', and then call |
| // ap_directory_walk to figure out custom options. |
| char* filename_starting_at_last_slash = strrchr(request->filename, '/'); |
| if (filename_starting_at_last_slash != NULL && |
| filename_starting_at_last_slash[1] != '\0') { |
| filename_starting_at_last_slash[1] = 'A'; |
| filename_starting_at_last_slash[2] = '\0'; |
| } |
| ap_directory_walk(request); |
| |
| // mod_speling, if enabled, looks for the filename on the file system, |
| // and tries to "correct" the spelling. This is not desired for |
| // mod_pagesped resources, but mod_speling will not do this damage |
| // when request->filename == NULL. See line 219 of |
| // http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/mappers/ |
| // mod_speling.c?revision=983065&view=markup |
| // |
| // Note that mod_speling runs 'hook_fixups' at APR_HOOK_LAST, and |
| // we are currently running instaweb_map_to_storage in map_to_storage |
| // HOOK_FIRST-2, which is a couple of phases before hook_fixups. |
| // |
| // If at some point we stop NULLing the filename here we need to modify the |
| // code above that mangles it to use a temporary buffer instead. |
| request->filename = NULL; |
| |
| // While setting request->filename helps get mod_speling (as well as |
| // mod_mime and mod_mime_magic) out of our hair, it causes crashes |
| // in mod_negotiation (if on) when finfo.filetype is APR_NOFILE. |
| // So we give it a type that's something other than APR_NOFILE (plus we |
| // also don't want APR_DIR, since that would make mod_mime to set the |
| // mimetype to httpd/unix-directory). |
| request->finfo.filetype = APR_UNKFILE; |
| |
| // Keep core_map_to_storage from running and rejecting our long filenames. |
| return OK; |
| } |
| |
| } // namespace net_instaweb |