src/net/instaweb/util/http_dump_url_writer.cc - incubator-pagespeed-mod - Git at Google

 /**
  * Copyright 2010 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Author: jmarantz@google.com (Joshua Marantz)

 #include "net/instaweb/util/public/http_dump_url_writer.h"
 #include "net/instaweb/util/public/file_writer.h"
 #include "net/instaweb/util/public/google_url.h"
 #include "net/instaweb/util/public/gzip_inflater.h"
 #include "net/instaweb/util/public/message_handler.h"
 #include "net/instaweb/util/public/meta_data.h"
 #include "net/instaweb/util/public/simple_meta_data.h"
 #include "net/instaweb/util/public/string_writer.h"
 #include "net/instaweb/util/stack_buffer.h"

 namespace net_instaweb {

 HttpDumpUrlWriter::~HttpDumpUrlWriter() {
 }

 bool HttpDumpUrlWriter::StreamingFetchUrl(const std::string& url,
                                           const MetaData& request_headers,
                                           MetaData* response_headers,
                                           Writer* response_writer,
                                           MessageHandler* handler) {
   bool ret = true;
   std::string filename;

   if (!dump_fetcher_.GetFilename(GURL(url), &filename, handler)) {
     handler->Message(kError, "Invalid url: %s", url.c_str());
     ret = false;
   } else if (!file_system_->Exists(filename.c_str(), handler).is_true()) {
     // Do the Fetch first, before opening the output file, so that if the
     // fetch fails, do not make an empty file.
     //
     // TODO(jmarantz): Re-integrate the use of SplitWriter.  We'll have
     // to do a lazy-open of the OutputFile* in a custom writer, though, to
     // avoid opening up a zero-size file when the URL fetch fails.
     std::string contents;
     StringWriter string_writer(&contents);
     // TODO(sligocki): Have this actually stream to response_writer.

     // In general we will want to always ask the origin for gzipped output,
     // but we are leaving in variable so this could be overridden by the
     // instantiator of the DumpUrlWriter.
     SimpleMetaData compress_headers, compressed_response;
     compress_headers.CopyFrom(request_headers);
     if (accept_gzip_) {
       compress_headers.RemoveAll(HttpAttributes::kAcceptEncoding);
       compress_headers.Add(HttpAttributes::kAcceptEncoding,
                            HttpAttributes::kGzip);
     }

     ret = base_fetcher_->StreamingFetchUrl(url, compress_headers,
                                            &compressed_response, &string_writer,
                                            handler);
     compressed_response.RemoveAll(HttpAttributes::kContentLength);
     compressed_response.Add(HttpAttributes::kContentLength,
                           IntegerToString(contents.size()).c_str());
     compressed_response.ComputeCaching();

     // Do not write an empty file if the fetch failed.
     if (ret) {
       // Check to see if a response marked as gzipped are really unzippable.
       if (compressed_response.IsGzipped()) {
         GzipInflater inflater(GzipInflater::kGzip);
         inflater.Init();
         CHECK(inflater.SetInput(contents.data(), contents.size()));
         while (inflater.HasUnconsumedInput()) {
           char buf[kStackBufferSize];
           if ((inflater.InflateBytes(buf, sizeof(buf)) == 0) ||
               inflater.error()) {
             compressed_response.RemoveAll(HttpAttributes::kContentEncoding);
             break;
           }
         }
       }

       FileSystem::OutputFile* file = file_system_->OpenTempFile(
           filename + ".temp", handler);
       if (file != NULL) {
         handler->Message(kInfo, "Storing %s as %s", url.c_str(),
                      filename.c_str());
         std::string temp_filename = file->filename();
         FileWriter file_writer(file);
         ret = compressed_response.Write(&file_writer, handler) &&
             file->Write(contents, handler);
         ret &= file_system_->Close(file, handler);
         ret &= file_system_->RenameFile(temp_filename.c_str(), filename.c_str(),
                                         handler);
       } else {
         ret = false;
       }
     }

     // We are not going to be able to read the response from the file
     // system so we better pass the error message through.
     if (!ret) {
       response_headers->CopyFrom(compressed_response);
       if (!response_headers->headers_complete()) {
         response_headers->SetStatusAndReason(HttpStatus::kNotFound);
         response_headers->ComputeCaching();
         response_headers->set_headers_complete(true);
       }
       response_writer->Write(contents, handler);
     }
   }

   // Always use the HttpDumpUrlFetcher, even if we are reading the file
   // ourselves.  Thus the problem of inflating gzipped requests for requesters
   // that want cleartext only is solved only in that file.
   return ret && dump_fetcher_.StreamingFetchUrl(
       url, request_headers, response_headers, response_writer, handler);
 }

 }  // namespace net_instaweb
	/**
	* Copyright 2010 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	// Author: jmarantz@google.com (Joshua Marantz)

	#include "net/instaweb/util/public/http_dump_url_writer.h"
	#include "net/instaweb/util/public/file_writer.h"
	#include "net/instaweb/util/public/google_url.h"
	#include "net/instaweb/util/public/gzip_inflater.h"
	#include "net/instaweb/util/public/message_handler.h"
	#include "net/instaweb/util/public/meta_data.h"
	#include "net/instaweb/util/public/simple_meta_data.h"
	#include "net/instaweb/util/public/string_writer.h"
	#include "net/instaweb/util/stack_buffer.h"

	namespace net_instaweb {

	HttpDumpUrlWriter::~HttpDumpUrlWriter() {
	}

	bool HttpDumpUrlWriter::StreamingFetchUrl(const std::string& url,
	const MetaData& request_headers,
	MetaData* response_headers,
	Writer* response_writer,
	MessageHandler* handler) {
	bool ret = true;
	std::string filename;

	if (!dump_fetcher_.GetFilename(GURL(url), &filename, handler)) {
	handler->Message(kError, "Invalid url: %s", url.c_str());
	ret = false;
	} else if (!file_system_->Exists(filename.c_str(), handler).is_true()) {
	// Do the Fetch first, before opening the output file, so that if the
	// fetch fails, do not make an empty file.
	//
	// TODO(jmarantz): Re-integrate the use of SplitWriter. We'll have
	// to do a lazy-open of the OutputFile* in a custom writer, though, to
	// avoid opening up a zero-size file when the URL fetch fails.
	std::string contents;
	StringWriter string_writer(&contents);
	// TODO(sligocki): Have this actually stream to response_writer.

	// In general we will want to always ask the origin for gzipped output,
	// but we are leaving in variable so this could be overridden by the
	// instantiator of the DumpUrlWriter.
	SimpleMetaData compress_headers, compressed_response;
	compress_headers.CopyFrom(request_headers);
	if (accept_gzip_) {
	compress_headers.RemoveAll(HttpAttributes::kAcceptEncoding);
	compress_headers.Add(HttpAttributes::kAcceptEncoding,
	HttpAttributes::kGzip);
	}

	ret = base_fetcher_->StreamingFetchUrl(url, compress_headers,
	&compressed_response, &string_writer,
	handler);
	compressed_response.RemoveAll(HttpAttributes::kContentLength);
	compressed_response.Add(HttpAttributes::kContentLength,
	IntegerToString(contents.size()).c_str());
	compressed_response.ComputeCaching();

	// Do not write an empty file if the fetch failed.
	if (ret) {
	// Check to see if a response marked as gzipped are really unzippable.
	if (compressed_response.IsGzipped()) {
	GzipInflater inflater(GzipInflater::kGzip);
	inflater.Init();
	CHECK(inflater.SetInput(contents.data(), contents.size()));
	while (inflater.HasUnconsumedInput()) {
	char buf[kStackBufferSize];
	if ((inflater.InflateBytes(buf, sizeof(buf)) == 0) \|\|
	inflater.error()) {
	compressed_response.RemoveAll(HttpAttributes::kContentEncoding);
	break;
	}
	}
	}

	FileSystem::OutputFile* file = file_system_->OpenTempFile(
	filename + ".temp", handler);
	if (file != NULL) {
	handler->Message(kInfo, "Storing %s as %s", url.c_str(),
	filename.c_str());
	std::string temp_filename = file->filename();
	FileWriter file_writer(file);
	ret = compressed_response.Write(&file_writer, handler) &&
	file->Write(contents, handler);
	ret &= file_system_->Close(file, handler);
	ret &= file_system_->RenameFile(temp_filename.c_str(), filename.c_str(),
	handler);
	} else {
	ret = false;
	}
	}

	// We are not going to be able to read the response from the file
	// system so we better pass the error message through.
	if (!ret) {
	response_headers->CopyFrom(compressed_response);
	if (!response_headers->headers_complete()) {
	response_headers->SetStatusAndReason(HttpStatus::kNotFound);
	response_headers->ComputeCaching();
	response_headers->set_headers_complete(true);
	}
	response_writer->Write(contents, handler);
	}
	}

	// Always use the HttpDumpUrlFetcher, even if we are reading the file
	// ourselves. Thus the problem of inflating gzipped requests for requesters
	// that want cleartext only is solved only in that file.
	return ret && dump_fetcher_.StreamingFetchUrl(
	url, request_headers, response_headers, response_writer, handler);
	}

	} // namespace net_instaweb