Backport: Factor out handling for errors, stats, and logs. Add nosniff protection.

diff --git a/src/DEPS b/src/DEPS
index 982de7b..1931434 100644
--- a/src/DEPS
+++ b/src/DEPS
@@ -58,7 +58,7 @@
   "opencv_src": "http://github.com/Itseez/opencv.git",
   "opencv_revision": "@2.3.1",
 
-  "jsoncpp_src": "https://jsoncpp.svn.sourceforge.net/svnroot/jsoncpp/trunk/",
+  "jsoncpp_src": "https://svn.code.sf.net/p/jsoncpp/code/trunk",
   "jsoncpp_revision": "@247",
 
   "gflags_root": "http://google-gflags.googlecode.com/svn/tags/gflags-1.5/src",
diff --git a/src/net/instaweb/apache/apache_server_context.cc b/src/net/instaweb/apache/apache_server_context.cc
index 8333916..73a4259 100644
--- a/src/net/instaweb/apache/apache_server_context.cc
+++ b/src/net/instaweb/apache/apache_server_context.cc
@@ -16,7 +16,8 @@
 
 #include "net/instaweb/apache/apache_server_context.h"
 
-#include "httpd.h"
+#include "httpd.h"                  // NOLINT
+#include "http_protocol.h"          // NOLINT
 #include "base/logging.h"
 #include "net/instaweb/apache/apache_config.h"
 #include "net/instaweb/apache/apache_request_context.h"
@@ -34,6 +35,7 @@
 #include "net/instaweb/system/public/system_caches.h"
 #include "net/instaweb/util/public/basictypes.h"
 #include "net/instaweb/util/public/file_system.h"
+#include "net/instaweb/util/public/message_handler.h"
 #include "net/instaweb/util/public/shared_mem_statistics.h"
 #include "net/instaweb/util/public/split_statistics.h"
 #include "net/instaweb/util/public/statistics.h"
@@ -188,6 +190,18 @@
   ApacheRewriteDriverFactory::InitStats(split_statistics_.get());
 }
 
+void ApacheServerContext::ReportNotFoundHelper(StringPiece error_message,
+                                               request_rec* request,
+                                               Variable* error_count) {
+  error_count->Add(1);
+  request->status = HttpStatus::kNotFound;
+  ap_send_error_response(request, 0);
+  message_handler()->Message(kWarning, "%s %s: not found (404)",
+                             (error_message.empty() ? "(null)" :
+                              error_message.as_string().c_str()),
+                             error_count->GetName().as_string().c_str());
+}
+
 void ApacheServerContext::ChildInit() {
   DCHECK(!initialized_);
   if (!initialized_) {
diff --git a/src/net/instaweb/apache/apache_server_context.h b/src/net/instaweb/apache/apache_server_context.h
index c2e399d..167be22 100644
--- a/src/net/instaweb/apache/apache_server_context.h
+++ b/src/net/instaweb/apache/apache_server_context.h
@@ -19,12 +19,14 @@
 
 #include "net/instaweb/apache/apache_config.h"
 #include "net/instaweb/http/public/request_context.h"
+#include "net/instaweb/rewriter/public/rewrite_stats.h"
 #include "net/instaweb/system/public/system_server_context.h"
 #include "net/instaweb/util/public/basictypes.h"
 #include "net/instaweb/util/public/scoped_ptr.h"
 #include "net/instaweb/util/public/string.h"
 #include "net/instaweb/util/public/string_util.h"
 
+struct request_rec;
 struct server_rec;
 
 namespace net_instaweb {
@@ -138,9 +140,37 @@
   // let mod_pagespeed behave as an origin fetcher.
   virtual bool ProxiesHtml() const { return false; }
 
+  // Reports an error status to the HTTP resource request, and logs
+  // the error as a Warning to the log file, and bumps a stat as
+  // needed.
+  void ReportResourceNotFound(StringPiece error_message, request_rec* request) {
+    ReportNotFoundHelper(error_message, request,
+                         rewrite_stats()->resource_404_count());
+  }
+
+  // Reports an error status to the HTTP statistics request, and logs
+  // the error as a Warning to the log file, and bumps a stat as
+  // needed.
+  void ReportStatisticsNotFound(StringPiece error_message,
+                                request_rec* request) {
+    ReportNotFoundHelper(error_message, request, statistics_404_count());
+  }
+
+  // Reports an error status to the HTTP slurp request, and logs
+  // the error as a Warning to the log file, and bumps a stat as
+  // needed.
+  void ReportSlurpNotFound(StringPiece error_message, request_rec* request) {
+    ReportNotFoundHelper(error_message, request,
+                         rewrite_stats()->slurp_404_count());
+  }
+
  private:
   virtual bool UpdateCacheFlushTimestampMs(int64 timestamp_ms);
 
+  void ReportNotFoundHelper(StringPiece url,
+                            request_rec* request,
+                            Variable* error_count);
+
   ApacheRewriteDriverFactory* apache_factory_;
   server_rec* server_rec_;
   GoogleString version_;
diff --git a/src/net/instaweb/apache/apache_slurp.cc b/src/net/instaweb/apache/apache_slurp.cc
index dd4e9fa..46f512d 100644
--- a/src/net/instaweb/apache/apache_slurp.cc
+++ b/src/net/instaweb/apache/apache_slurp.cc
@@ -259,7 +259,8 @@
                        request_context, handler);
   ApacheRequestToRequestHeaders(*r, fetch.request_headers());
 
-  if (fetch.Fetch()) {
+  bool fetch_succeeded = fetch.Fetch();
+  if (fetch_succeeded) {
     // We always disable downstream header filters when sending out
     // slurped resources, since we've captured them from the origin
     // in the fetch we did to write the slurp.
@@ -275,7 +276,10 @@
                      stripped_url.c_str(),
                      fetch.request_headers()->ToString().c_str(),
                      fetch.response_headers()->ToString().c_str());
-    SlurpDefaultHandler(r);
+  }
+
+  if (!fetch_succeeded || fetch.response_headers()->IsErrorStatus()) {
+    server_context->ReportSlurpNotFound(stripped_url, r);
   }
 }
 
diff --git a/src/net/instaweb/apache/instaweb_handler.cc b/src/net/instaweb/apache/instaweb_handler.cc
index d77054c..d9f6179 100644
--- a/src/net/instaweb/apache/instaweb_handler.cc
+++ b/src/net/instaweb/apache/instaweb_handler.cc
@@ -34,6 +34,7 @@
 #include "net/instaweb/apache/instaweb_context.h"
 #include "net/instaweb/apache/mod_instaweb.h"
 #include "net/instaweb/automatic/public/proxy_fetch.h"
+#include "net/instaweb/htmlparse/public/html_keywords.h"
 #include "net/instaweb/http/public/async_fetch.h"
 #include "net/instaweb/http/public/cache_url_async_fetcher.h"
 #include "net/instaweb/http/public/content_type.h"
@@ -366,9 +367,7 @@
                              url.c_str(), response_headers->status_code());
     send_out_headers_and_body(request, *response_headers, output);
   } else {
-    RewriteStats* stats = server_context->rewrite_stats();
-    stats->resource_404_count()->Add(1);
-    instaweb_404_handler(url, request);
+    server_context->ReportResourceNotFound(url, request);
   }
 
   callback->Release();
@@ -532,7 +531,7 @@
 }
 
 // Write response headers and send out headers and output, including the option
-//     for a custom Content-Type.
+// for a custom Content-Type.
 void write_handler_response(const StringPiece& output,
                             request_rec* request,
                             ContentType content_type,
@@ -543,6 +542,12 @@
   response_headers.set_minor_version(1);
 
   response_headers.Add(HttpAttributes::kContentType, content_type.mime_type());
+  // http://msdn.microsoft.com/en-us/library/ie/gg622941(v=vs.85).aspx
+  // Script and styleSheet elements will reject responses with
+  // incorrect MIME types if the server sends the response header
+  // "X-Content-Type-Options: nosniff". This is a security feature
+  // that helps prevent attacks based on MIME-type confusion.
+  response_headers.Add("X-Content-Type-Options", "nosniff");
   AprTimer timer;
   int64 now_ms = timer.NowMs();
   response_headers.SetDate(now_ms);
@@ -614,13 +619,6 @@
   return 1;  // Continue iteration.
 }
 
-// Writes text wrapped in a <pre> block
-void WritePre(StringPiece str, Writer* writer, MessageHandler* handler) {
-  writer->Write("<pre>\n", handler);
-  writer->Write(str, handler);
-  writer->Write("</pre>\n", handler);
-}
-
 void instaweb_static_handler(request_rec* request,
                              ApacheServerContext* server_context) {
   StaticAssetManager* static_asset_manager =
@@ -637,7 +635,7 @@
       file_name, &file_contents, &content_type, &cache_header)) {
     write_handler_response(file_contents, request, content_type, cache_header);
   } else {
-    instaweb_404_handler(request->parsed_uri.path, request);
+    server_context->ReportResourceNotFound(request->parsed_uri.path, request);
   }
 }
 
@@ -785,14 +783,14 @@
       factory->caches()->PrintCacheStats(
           static_cast<SystemCaches::StatFlags>(flags), &backend_stats);
       if (!backend_stats.empty()) {
-        WritePre(backend_stats, &writer, message_handler);
+        HtmlKeywords::WritePre(backend_stats, &writer, message_handler);
       }
     }
 
     if (print_normal_config) {
       writer.Write("Configuration:<br>", message_handler);
-      WritePre(server_context->config()->OptionsToString(),
-               &writer, message_handler);
+      HtmlKeywords::WritePre(server_context->config()->OptionsToString(),
+                             &writer, message_handler);
     }
 
     if (print_spdy_config) {
@@ -802,7 +800,8 @@
                      message_handler);
       } else {
         writer.Write("SPDY-specific configuration:<br>", message_handler);
-        WritePre(spdy_config->OptionsToString(), &writer, message_handler);
+        HtmlKeywords::WritePre(spdy_config->OptionsToString(),
+                               &writer, message_handler);
       }
     }
   }
@@ -1017,17 +1016,17 @@
 
   } else if (request_handler_str == kMessageHandler) {
     // Request for page /mod_pagespeed_message.
-    GoogleString output;
-    StringWriter writer(&output);
-    // Write <pre></pre> for Dump to keep good format.
-    writer.Write("<pre>", message_handler);
-    if (!message_handler->Dump(&writer)) {
-      writer.Write("Writing to mod_pagespeed_message failed. \n"
-                   "Please check if it's enabled in pagespeed.conf.\n",
-                   message_handler);
+    GoogleString html, log;
+    StringWriter html_writer(&html), log_writer(&log);
+    if (message_handler->Dump(&log_writer)) {
+      // Write pre-tag for Dump to keep good format.
+      HtmlKeywords::WritePre(log, &html_writer, message_handler);
+    } else {
+      html =
+          "Writing to mod_pagespeed_message failed. \n"
+          "Please check if it's enabled in pagespeed.conf.\n";
     }
-    writer.Write("</pre>", message_handler);
-    write_handler_response(output, request);
+    write_handler_response(html, request);
     ret = OK;
 
   } else if (request_handler_str == kLogRequestHeadersHandler) {
@@ -1089,10 +1088,6 @@
 
     if (ret != OK && (config->slurping_enabled() || config->test_proxy())) {
       SlurpUrl(server_context, request);
-      if (request->status == HTTP_NOT_FOUND) {
-        RewriteStats* stats = server_context->rewrite_stats();
-        stats->slurp_404_count()->Add(1);
-      }
       ret = OK;
     }
   }
diff --git a/src/net/instaweb/system/public/system_server_context.h b/src/net/instaweb/system/public/system_server_context.h
index 9bf6b8c..97158e6 100644
--- a/src/net/instaweb/system/public/system_server_context.h
+++ b/src/net/instaweb/system/public/system_server_context.h
@@ -49,6 +49,8 @@
 
   static void InitStats(Statistics* statistics);
 
+  Variable* statistics_404_count();
+
  protected:
   // Flush the cache by updating the cache flush timestamp in the global
   // options.  This will change its signature, which is part of the cache key,
diff --git a/src/net/instaweb/system/system_caches.cc b/src/net/instaweb/system/system_caches.cc
index ba56c8b..812e1fb 100644
--- a/src/net/instaweb/system/system_caches.cc
+++ b/src/net/instaweb/system/system_caches.cc
@@ -22,6 +22,7 @@
 #include <utility>
 
 #include "base/logging.h"
+#include "net/instaweb/htmlparse/public/html_keywords.h"
 #include "net/instaweb/http/public/http_cache.h"
 #include "net/instaweb/http/public/write_through_http_cache.h"
 #include "net/instaweb/rewriter/public/server_context.h"
@@ -43,6 +44,7 @@
 #include "net/instaweb/util/public/slow_worker.h"
 #include "net/instaweb/util/public/string.h"
 #include "net/instaweb/util/public/string_util.h"
+#include "net/instaweb/util/public/string_writer.h"
 #include "net/instaweb/util/public/thread_system.h"
 #include "net/instaweb/util/public/write_through_cache.h"
 
@@ -470,8 +472,9 @@
       if (cache_info->cache_backend != NULL) {
         StrAppend(out, "Shared memory metadata cache '", p->first,
                   "' statistics:<br>");
-        StrAppend(out, "<pre>", cache_info->cache_backend->DumpStats(),
-                  "</pre>");
+        StringWriter writer(out);
+        HtmlKeywords::WritePre(cache_info->cache_backend->DumpStats(),
+                               &writer, factory_->message_handler());
       }
     }
   }
diff --git a/src/net/instaweb/system/system_server_context.cc b/src/net/instaweb/system/system_server_context.cc
index 51d0858..38240c7 100644
--- a/src/net/instaweb/system/system_server_context.cc
+++ b/src/net/instaweb/system/system_server_context.cc
@@ -35,6 +35,7 @@
 
 const char kCacheFlushCount[] = "cache_flush_count";
 const char kCacheFlushTimestampMs[] = "cache_flush_timestamp_ms";
+const char kStatistics404Count[] = "statistics_404_count";
 
 }  // namespace
 
@@ -139,6 +140,11 @@
 void SystemServerContext::InitStats(Statistics* statistics) {
   statistics->AddVariable(kCacheFlushCount);
   statistics->AddVariable(kCacheFlushTimestampMs);
+  statistics->AddVariable(kStatistics404Count);
+}
+
+Variable* SystemServerContext::statistics_404_count() {
+  return statistics()->GetVariable(kStatistics404Count);
 }
 
 }  // namespace net_instaweb
diff --git a/src/pagespeed/kernel/base/statistics_logger.cc b/src/pagespeed/kernel/base/statistics_logger.cc
index 1345c4e..c7bb473 100644
--- a/src/pagespeed/kernel/base/statistics_logger.cc
+++ b/src/pagespeed/kernel/base/statistics_logger.cc
@@ -23,6 +23,7 @@
 #include <utility>                      // for pair
 #include <vector>
 
+#include "net/instaweb/util/public/escaping.h"
 #include "pagespeed/kernel/base/abstract_mutex.h"
 #include "pagespeed/kernel/base/basictypes.h"
 #include "pagespeed/kernel/base/file_writer.h"
@@ -33,6 +34,7 @@
 #include "pagespeed/kernel/base/string_util.h"
 #include "pagespeed/kernel/base/timer.h"
 #include "pagespeed/kernel/base/writer.h"
+#include "pagespeed/kernel/html/html_keywords.h"
 
 namespace net_instaweb {
 
@@ -208,10 +210,12 @@
     if (iterator != parsed_var_data.begin()) {
       writer->Write(",", message_handler);
     }
-    // StringPrintf not used so that StringPiece not converted to GoogleString.
-    writer->Write("\"", message_handler);
-    writer->Write(var_name, message_handler);
-    writer->Write("\": [", message_handler);
+    GoogleString html_name, json_name;
+    HtmlKeywords::Escape(var_name, &html_name);
+    EscapeToJsStringLiteral(html_name, true /* add_quotes*/, &json_name);
+
+    writer->Write(json_name, message_handler);
+    writer->Write(": [", message_handler);
     for (size_t i = 0; i < info.size(); ++i) {
       writer->Write(info[i], message_handler);
       // If we are at the last recorded variable, we do not want the
diff --git a/src/pagespeed/kernel/base/statistics_logger_test.cc b/src/pagespeed/kernel/base/statistics_logger_test.cc
index 6ebf548..b348f59 100644
--- a/src/pagespeed/kernel/base/statistics_logger_test.cc
+++ b/src/pagespeed/kernel/base/statistics_logger_test.cc
@@ -32,6 +32,7 @@
 #include "pagespeed/kernel/base/thread_system.h"
 #include "pagespeed/kernel/base/timer.h"
 #include "pagespeed/kernel/base/scoped_ptr.h"
+#include "pagespeed/kernel/html/html_keywords.h"
 #include "pagespeed/kernel/util/platform.h"
 
 namespace net_instaweb {
@@ -60,6 +61,10 @@
                 NULL /* timestamp_var */, &handler_,
                 NULL /* statistics */, &file_system_, &timer_) {}
 
+  static void SetUpTestCase() {
+    HtmlKeywords::Init();
+  }
+
   GoogleString CreateVariableDataResponse(bool has_unused_variable,
                                           bool first) {
     GoogleString var_data;
diff --git a/src/pagespeed/kernel/html/html_keywords.cc b/src/pagespeed/kernel/html/html_keywords.cc
index 44bd92a..f95ba34 100644
--- a/src/pagespeed/kernel/html/html_keywords.cc
+++ b/src/pagespeed/kernel/html/html_keywords.cc
@@ -27,6 +27,7 @@
 #include "pagespeed/kernel/base/basictypes.h"
 #include "pagespeed/kernel/base/string.h"
 #include "pagespeed/kernel/base/string_util.h"
+#include "pagespeed/kernel/base/writer.h"
 #include "pagespeed/kernel/html/html_name.h"
 
 namespace net_instaweb {
@@ -657,8 +658,9 @@
     // This function, unfortunately, does not know what quoting was used.
     // TODO(jmarantz): in remove_quotes filter, switch between ' and " for
     // quoting based on whatever is in the attr value.
-    if ((ch > 127) || (ch < 32) || (ch == '"') || (ch == '\'') || (ch == '&') ||
-        (ch == '<') || (ch == '>')) {
+    if (!IsHtmlSpace(ch) &&
+        ((ch > 127) || (ch < 32) || (ch == '"') || (ch == '\'') ||
+         (ch == '&') || (ch == '<') || (ch == '>'))) {
       char_to_escape.clear();
       char_to_escape += ch;
       StringStringSparseHashMapSensitive::const_iterator p =
@@ -799,4 +801,13 @@
   PrepareForBinarySearch(&optionally_closed_);
 }
 
+bool HtmlKeywords::WritePre(StringPiece str, Writer* writer,
+                            MessageHandler* handler) {
+  bool ret = writer->Write("<pre>\n", handler);
+  GoogleString escaped;
+  ret &= writer->Write(HtmlKeywords::Escape(str, &escaped), handler);
+  ret &= writer->Write("</pre>\n", handler);
+  return ret;
+}
+
 }  // namespace net_instaweb
diff --git a/src/pagespeed/kernel/html/html_keywords.h b/src/pagespeed/kernel/html/html_keywords.h
index d29897a..6be4ebb 100644
--- a/src/pagespeed/kernel/html/html_keywords.h
+++ b/src/pagespeed/kernel/html/html_keywords.h
@@ -31,6 +31,9 @@
 
 namespace net_instaweb {
 
+class MessageHandler;
+class Writer;
+
 // Helper class for HtmlParser to recognize HTML keywords, handle escaping
 // and unescaping, and assist the lexer in understanding how to interpret
 // unbalanced tags.
@@ -109,6 +112,11 @@
                               keyword);
   }
 
+  // Wraps text in a pre-tag and sends it to writer, returning false if the
+  // writer failed.
+  static bool WritePre(StringPiece text, Writer* writer,
+                       MessageHandler* handler);
+
  private:
   typedef int32 KeywordPair;  // Encoded via shift & OR.
   typedef std::vector<KeywordPair> KeywordPairVec;
diff --git a/src/pagespeed/kernel/html/html_keywords_test.cc b/src/pagespeed/kernel/html/html_keywords_test.cc
index 1f0cd05..36921ba 100644
--- a/src/pagespeed/kernel/html/html_keywords_test.cc
+++ b/src/pagespeed/kernel/html/html_keywords_test.cc
@@ -41,6 +41,13 @@
     EXPECT_STREQ(unescaped, Unescape(escaped, &buf));
   }
 
+  // Validates that the provided text is not altered by escaping it.
+  void Unchanged(const GoogleString& text) {
+    GoogleString buf;
+    EXPECT_STREQ(text, HtmlKeywords::Escape(text, &buf));
+    EXPECT_STREQ(text, Unescape(text, &buf));
+  }
+
   void TestEscape(const GoogleString& symbolic_code, char value) {
     GoogleString symbolic_escaped = StrCat("&", symbolic_code, ";");
     GoogleString numeric_escaped = StringPrintf(
@@ -276,4 +283,12 @@
   BiTest("&amp;ocircoooo", "&ocircoooo");
 }
 
+TEST_F(HtmlKeywordsTest, WhitespaceNotChanged) {
+  Unchanged("a b");
+  Unchanged("a\nb");
+  Unchanged("a\rb");
+  Unchanged("a\tb");
+  Unchanged("a\fb");
+}
+
 }  // namespace net_instaweb