Backport: Factor out handling for errors, stats, and logs. Add nosniff protection.
diff --git a/src/DEPS b/src/DEPS
index 982de7b..1931434 100644
--- a/src/DEPS
+++ b/src/DEPS
@@ -58,7 +58,7 @@
"opencv_src": "http://github.com/Itseez/opencv.git",
"opencv_revision": "@2.3.1",
- "jsoncpp_src": "https://jsoncpp.svn.sourceforge.net/svnroot/jsoncpp/trunk/",
+ "jsoncpp_src": "https://svn.code.sf.net/p/jsoncpp/code/trunk",
"jsoncpp_revision": "@247",
"gflags_root": "http://google-gflags.googlecode.com/svn/tags/gflags-1.5/src",
diff --git a/src/net/instaweb/apache/apache_server_context.cc b/src/net/instaweb/apache/apache_server_context.cc
index 8333916..73a4259 100644
--- a/src/net/instaweb/apache/apache_server_context.cc
+++ b/src/net/instaweb/apache/apache_server_context.cc
@@ -16,7 +16,8 @@
#include "net/instaweb/apache/apache_server_context.h"
-#include "httpd.h"
+#include "httpd.h" // NOLINT
+#include "http_protocol.h" // NOLINT
#include "base/logging.h"
#include "net/instaweb/apache/apache_config.h"
#include "net/instaweb/apache/apache_request_context.h"
@@ -34,6 +35,7 @@
#include "net/instaweb/system/public/system_caches.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/file_system.h"
+#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/shared_mem_statistics.h"
#include "net/instaweb/util/public/split_statistics.h"
#include "net/instaweb/util/public/statistics.h"
@@ -188,6 +190,18 @@
ApacheRewriteDriverFactory::InitStats(split_statistics_.get());
}
+void ApacheServerContext::ReportNotFoundHelper(StringPiece error_message,
+ request_rec* request,
+ Variable* error_count) {
+ error_count->Add(1);
+ request->status = HttpStatus::kNotFound;
+ ap_send_error_response(request, 0);
+ message_handler()->Message(kWarning, "%s %s: not found (404)",
+ (error_message.empty() ? "(null)" :
+ error_message.as_string().c_str()),
+ error_count->GetName().as_string().c_str());
+}
+
void ApacheServerContext::ChildInit() {
DCHECK(!initialized_);
if (!initialized_) {
diff --git a/src/net/instaweb/apache/apache_server_context.h b/src/net/instaweb/apache/apache_server_context.h
index c2e399d..167be22 100644
--- a/src/net/instaweb/apache/apache_server_context.h
+++ b/src/net/instaweb/apache/apache_server_context.h
@@ -19,12 +19,14 @@
#include "net/instaweb/apache/apache_config.h"
#include "net/instaweb/http/public/request_context.h"
+#include "net/instaweb/rewriter/public/rewrite_stats.h"
#include "net/instaweb/system/public/system_server_context.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
+struct request_rec;
struct server_rec;
namespace net_instaweb {
@@ -138,9 +140,37 @@
// let mod_pagespeed behave as an origin fetcher.
virtual bool ProxiesHtml() const { return false; }
+ // Reports an error status to the HTTP resource request, and logs
+ // the error as a Warning to the log file, and bumps a stat as
+ // needed.
+ void ReportResourceNotFound(StringPiece error_message, request_rec* request) {
+ ReportNotFoundHelper(error_message, request,
+ rewrite_stats()->resource_404_count());
+ }
+
+ // Reports an error status to the HTTP statistics request, and logs
+ // the error as a Warning to the log file, and bumps a stat as
+ // needed.
+ void ReportStatisticsNotFound(StringPiece error_message,
+ request_rec* request) {
+ ReportNotFoundHelper(error_message, request, statistics_404_count());
+ }
+
+ // Reports an error status to the HTTP slurp request, and logs
+ // the error as a Warning to the log file, and bumps a stat as
+ // needed.
+ void ReportSlurpNotFound(StringPiece error_message, request_rec* request) {
+ ReportNotFoundHelper(error_message, request,
+ rewrite_stats()->slurp_404_count());
+ }
+
private:
virtual bool UpdateCacheFlushTimestampMs(int64 timestamp_ms);
+ void ReportNotFoundHelper(StringPiece url,
+ request_rec* request,
+ Variable* error_count);
+
ApacheRewriteDriverFactory* apache_factory_;
server_rec* server_rec_;
GoogleString version_;
diff --git a/src/net/instaweb/apache/apache_slurp.cc b/src/net/instaweb/apache/apache_slurp.cc
index dd4e9fa..46f512d 100644
--- a/src/net/instaweb/apache/apache_slurp.cc
+++ b/src/net/instaweb/apache/apache_slurp.cc
@@ -259,7 +259,8 @@
request_context, handler);
ApacheRequestToRequestHeaders(*r, fetch.request_headers());
- if (fetch.Fetch()) {
+ bool fetch_succeeded = fetch.Fetch();
+ if (fetch_succeeded) {
// We always disable downstream header filters when sending out
// slurped resources, since we've captured them from the origin
// in the fetch we did to write the slurp.
@@ -275,7 +276,10 @@
stripped_url.c_str(),
fetch.request_headers()->ToString().c_str(),
fetch.response_headers()->ToString().c_str());
- SlurpDefaultHandler(r);
+ }
+
+ if (!fetch_succeeded || fetch.response_headers()->IsErrorStatus()) {
+ server_context->ReportSlurpNotFound(stripped_url, r);
}
}
diff --git a/src/net/instaweb/apache/instaweb_handler.cc b/src/net/instaweb/apache/instaweb_handler.cc
index d77054c..d9f6179 100644
--- a/src/net/instaweb/apache/instaweb_handler.cc
+++ b/src/net/instaweb/apache/instaweb_handler.cc
@@ -34,6 +34,7 @@
#include "net/instaweb/apache/instaweb_context.h"
#include "net/instaweb/apache/mod_instaweb.h"
#include "net/instaweb/automatic/public/proxy_fetch.h"
+#include "net/instaweb/htmlparse/public/html_keywords.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/cache_url_async_fetcher.h"
#include "net/instaweb/http/public/content_type.h"
@@ -366,9 +367,7 @@
url.c_str(), response_headers->status_code());
send_out_headers_and_body(request, *response_headers, output);
} else {
- RewriteStats* stats = server_context->rewrite_stats();
- stats->resource_404_count()->Add(1);
- instaweb_404_handler(url, request);
+ server_context->ReportResourceNotFound(url, request);
}
callback->Release();
@@ -532,7 +531,7 @@
}
// Write response headers and send out headers and output, including the option
-// for a custom Content-Type.
+// for a custom Content-Type.
void write_handler_response(const StringPiece& output,
request_rec* request,
ContentType content_type,
@@ -543,6 +542,12 @@
response_headers.set_minor_version(1);
response_headers.Add(HttpAttributes::kContentType, content_type.mime_type());
+ // http://msdn.microsoft.com/en-us/library/ie/gg622941(v=vs.85).aspx
+ // Script and styleSheet elements will reject responses with
+ // incorrect MIME types if the server sends the response header
+ // "X-Content-Type-Options: nosniff". This is a security feature
+ // that helps prevent attacks based on MIME-type confusion.
+ response_headers.Add("X-Content-Type-Options", "nosniff");
AprTimer timer;
int64 now_ms = timer.NowMs();
response_headers.SetDate(now_ms);
@@ -614,13 +619,6 @@
return 1; // Continue iteration.
}
-// Writes text wrapped in a <pre> block
-void WritePre(StringPiece str, Writer* writer, MessageHandler* handler) {
- writer->Write("<pre>\n", handler);
- writer->Write(str, handler);
- writer->Write("</pre>\n", handler);
-}
-
void instaweb_static_handler(request_rec* request,
ApacheServerContext* server_context) {
StaticAssetManager* static_asset_manager =
@@ -637,7 +635,7 @@
file_name, &file_contents, &content_type, &cache_header)) {
write_handler_response(file_contents, request, content_type, cache_header);
} else {
- instaweb_404_handler(request->parsed_uri.path, request);
+ server_context->ReportResourceNotFound(request->parsed_uri.path, request);
}
}
@@ -785,14 +783,14 @@
factory->caches()->PrintCacheStats(
static_cast<SystemCaches::StatFlags>(flags), &backend_stats);
if (!backend_stats.empty()) {
- WritePre(backend_stats, &writer, message_handler);
+ HtmlKeywords::WritePre(backend_stats, &writer, message_handler);
}
}
if (print_normal_config) {
writer.Write("Configuration:<br>", message_handler);
- WritePre(server_context->config()->OptionsToString(),
- &writer, message_handler);
+ HtmlKeywords::WritePre(server_context->config()->OptionsToString(),
+ &writer, message_handler);
}
if (print_spdy_config) {
@@ -802,7 +800,8 @@
message_handler);
} else {
writer.Write("SPDY-specific configuration:<br>", message_handler);
- WritePre(spdy_config->OptionsToString(), &writer, message_handler);
+ HtmlKeywords::WritePre(spdy_config->OptionsToString(),
+ &writer, message_handler);
}
}
}
@@ -1017,17 +1016,17 @@
} else if (request_handler_str == kMessageHandler) {
// Request for page /mod_pagespeed_message.
- GoogleString output;
- StringWriter writer(&output);
- // Write <pre></pre> for Dump to keep good format.
- writer.Write("<pre>", message_handler);
- if (!message_handler->Dump(&writer)) {
- writer.Write("Writing to mod_pagespeed_message failed. \n"
- "Please check if it's enabled in pagespeed.conf.\n",
- message_handler);
+ GoogleString html, log;
+ StringWriter html_writer(&html), log_writer(&log);
+ if (message_handler->Dump(&log_writer)) {
+ // Write pre-tag for Dump to keep good format.
+ HtmlKeywords::WritePre(log, &html_writer, message_handler);
+ } else {
+ html =
+ "Writing to mod_pagespeed_message failed. \n"
+ "Please check if it's enabled in pagespeed.conf.\n";
}
- writer.Write("</pre>", message_handler);
- write_handler_response(output, request);
+ write_handler_response(html, request);
ret = OK;
} else if (request_handler_str == kLogRequestHeadersHandler) {
@@ -1089,10 +1088,6 @@
if (ret != OK && (config->slurping_enabled() || config->test_proxy())) {
SlurpUrl(server_context, request);
- if (request->status == HTTP_NOT_FOUND) {
- RewriteStats* stats = server_context->rewrite_stats();
- stats->slurp_404_count()->Add(1);
- }
ret = OK;
}
}
diff --git a/src/net/instaweb/system/public/system_server_context.h b/src/net/instaweb/system/public/system_server_context.h
index 9bf6b8c..97158e6 100644
--- a/src/net/instaweb/system/public/system_server_context.h
+++ b/src/net/instaweb/system/public/system_server_context.h
@@ -49,6 +49,8 @@
static void InitStats(Statistics* statistics);
+ Variable* statistics_404_count();
+
protected:
// Flush the cache by updating the cache flush timestamp in the global
// options. This will change its signature, which is part of the cache key,
diff --git a/src/net/instaweb/system/system_caches.cc b/src/net/instaweb/system/system_caches.cc
index ba56c8b..812e1fb 100644
--- a/src/net/instaweb/system/system_caches.cc
+++ b/src/net/instaweb/system/system_caches.cc
@@ -22,6 +22,7 @@
#include <utility>
#include "base/logging.h"
+#include "net/instaweb/htmlparse/public/html_keywords.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/write_through_http_cache.h"
#include "net/instaweb/rewriter/public/server_context.h"
@@ -43,6 +44,7 @@
#include "net/instaweb/util/public/slow_worker.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
+#include "net/instaweb/util/public/string_writer.h"
#include "net/instaweb/util/public/thread_system.h"
#include "net/instaweb/util/public/write_through_cache.h"
@@ -470,8 +472,9 @@
if (cache_info->cache_backend != NULL) {
StrAppend(out, "Shared memory metadata cache '", p->first,
"' statistics:<br>");
- StrAppend(out, "<pre>", cache_info->cache_backend->DumpStats(),
- "</pre>");
+ StringWriter writer(out);
+ HtmlKeywords::WritePre(cache_info->cache_backend->DumpStats(),
+ &writer, factory_->message_handler());
}
}
}
diff --git a/src/net/instaweb/system/system_server_context.cc b/src/net/instaweb/system/system_server_context.cc
index 51d0858..38240c7 100644
--- a/src/net/instaweb/system/system_server_context.cc
+++ b/src/net/instaweb/system/system_server_context.cc
@@ -35,6 +35,7 @@
const char kCacheFlushCount[] = "cache_flush_count";
const char kCacheFlushTimestampMs[] = "cache_flush_timestamp_ms";
+const char kStatistics404Count[] = "statistics_404_count";
} // namespace
@@ -139,6 +140,11 @@
void SystemServerContext::InitStats(Statistics* statistics) {
statistics->AddVariable(kCacheFlushCount);
statistics->AddVariable(kCacheFlushTimestampMs);
+ statistics->AddVariable(kStatistics404Count);
+}
+
+Variable* SystemServerContext::statistics_404_count() {
+ return statistics()->GetVariable(kStatistics404Count);
}
} // namespace net_instaweb
diff --git a/src/pagespeed/kernel/base/statistics_logger.cc b/src/pagespeed/kernel/base/statistics_logger.cc
index 1345c4e..c7bb473 100644
--- a/src/pagespeed/kernel/base/statistics_logger.cc
+++ b/src/pagespeed/kernel/base/statistics_logger.cc
@@ -23,6 +23,7 @@
#include <utility> // for pair
#include <vector>
+#include "net/instaweb/util/public/escaping.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/file_writer.h"
@@ -33,6 +34,7 @@
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/base/writer.h"
+#include "pagespeed/kernel/html/html_keywords.h"
namespace net_instaweb {
@@ -208,10 +210,12 @@
if (iterator != parsed_var_data.begin()) {
writer->Write(",", message_handler);
}
- // StringPrintf not used so that StringPiece not converted to GoogleString.
- writer->Write("\"", message_handler);
- writer->Write(var_name, message_handler);
- writer->Write("\": [", message_handler);
+ GoogleString html_name, json_name;
+ HtmlKeywords::Escape(var_name, &html_name);
+ EscapeToJsStringLiteral(html_name, true /* add_quotes*/, &json_name);
+
+ writer->Write(json_name, message_handler);
+ writer->Write(": [", message_handler);
for (size_t i = 0; i < info.size(); ++i) {
writer->Write(info[i], message_handler);
// If we are at the last recorded variable, we do not want the
diff --git a/src/pagespeed/kernel/base/statistics_logger_test.cc b/src/pagespeed/kernel/base/statistics_logger_test.cc
index 6ebf548..b348f59 100644
--- a/src/pagespeed/kernel/base/statistics_logger_test.cc
+++ b/src/pagespeed/kernel/base/statistics_logger_test.cc
@@ -32,6 +32,7 @@
#include "pagespeed/kernel/base/thread_system.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
+#include "pagespeed/kernel/html/html_keywords.h"
#include "pagespeed/kernel/util/platform.h"
namespace net_instaweb {
@@ -60,6 +61,10 @@
NULL /* timestamp_var */, &handler_,
NULL /* statistics */, &file_system_, &timer_) {}
+ static void SetUpTestCase() {
+ HtmlKeywords::Init();
+ }
+
GoogleString CreateVariableDataResponse(bool has_unused_variable,
bool first) {
GoogleString var_data;
diff --git a/src/pagespeed/kernel/html/html_keywords.cc b/src/pagespeed/kernel/html/html_keywords.cc
index 44bd92a..f95ba34 100644
--- a/src/pagespeed/kernel/html/html_keywords.cc
+++ b/src/pagespeed/kernel/html/html_keywords.cc
@@ -27,6 +27,7 @@
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
+#include "pagespeed/kernel/base/writer.h"
#include "pagespeed/kernel/html/html_name.h"
namespace net_instaweb {
@@ -657,8 +658,9 @@
// This function, unfortunately, does not know what quoting was used.
// TODO(jmarantz): in remove_quotes filter, switch between ' and " for
// quoting based on whatever is in the attr value.
- if ((ch > 127) || (ch < 32) || (ch == '"') || (ch == '\'') || (ch == '&') ||
- (ch == '<') || (ch == '>')) {
+ if (!IsHtmlSpace(ch) &&
+ ((ch > 127) || (ch < 32) || (ch == '"') || (ch == '\'') ||
+ (ch == '&') || (ch == '<') || (ch == '>'))) {
char_to_escape.clear();
char_to_escape += ch;
StringStringSparseHashMapSensitive::const_iterator p =
@@ -799,4 +801,13 @@
PrepareForBinarySearch(&optionally_closed_);
}
+bool HtmlKeywords::WritePre(StringPiece str, Writer* writer,
+ MessageHandler* handler) {
+ bool ret = writer->Write("<pre>\n", handler);
+ GoogleString escaped;
+ ret &= writer->Write(HtmlKeywords::Escape(str, &escaped), handler);
+ ret &= writer->Write("</pre>\n", handler);
+ return ret;
+}
+
} // namespace net_instaweb
diff --git a/src/pagespeed/kernel/html/html_keywords.h b/src/pagespeed/kernel/html/html_keywords.h
index d29897a..6be4ebb 100644
--- a/src/pagespeed/kernel/html/html_keywords.h
+++ b/src/pagespeed/kernel/html/html_keywords.h
@@ -31,6 +31,9 @@
namespace net_instaweb {
+class MessageHandler;
+class Writer;
+
// Helper class for HtmlParser to recognize HTML keywords, handle escaping
// and unescaping, and assist the lexer in understanding how to interpret
// unbalanced tags.
@@ -109,6 +112,11 @@
keyword);
}
+ // Wraps text in a pre-tag and sends it to writer, returning false if the
+ // writer failed.
+ static bool WritePre(StringPiece text, Writer* writer,
+ MessageHandler* handler);
+
private:
typedef int32 KeywordPair; // Encoded via shift & OR.
typedef std::vector<KeywordPair> KeywordPairVec;
diff --git a/src/pagespeed/kernel/html/html_keywords_test.cc b/src/pagespeed/kernel/html/html_keywords_test.cc
index 1f0cd05..36921ba 100644
--- a/src/pagespeed/kernel/html/html_keywords_test.cc
+++ b/src/pagespeed/kernel/html/html_keywords_test.cc
@@ -41,6 +41,13 @@
EXPECT_STREQ(unescaped, Unescape(escaped, &buf));
}
+ // Validates that the provided text is not altered by escaping it.
+ void Unchanged(const GoogleString& text) {
+ GoogleString buf;
+ EXPECT_STREQ(text, HtmlKeywords::Escape(text, &buf));
+ EXPECT_STREQ(text, Unescape(text, &buf));
+ }
+
void TestEscape(const GoogleString& symbolic_code, char value) {
GoogleString symbolic_escaped = StrCat("&", symbolic_code, ";");
GoogleString numeric_escaped = StringPrintf(
@@ -276,4 +283,12 @@
BiTest("&ocircoooo", "ôoooo");
}
+TEST_F(HtmlKeywordsTest, WhitespaceNotChanged) {
+ Unchanged("a b");
+ Unchanged("a\nb");
+ Unchanged("a\rb");
+ Unchanged("a\tb");
+ Unchanged("a\fb");
+}
+
} // namespace net_instaweb