blob: cdba56c68f571e6526f712c50ebeaab71eba1ecf [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#include "net/instaweb/util/public/http_dump_url_writer.h"
#include "net/instaweb/util/public/file_writer.h"
#include "net/instaweb/util/public/google_url.h"
#include "net/instaweb/util/public/gzip_inflater.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/meta_data.h"
#include "net/instaweb/util/public/simple_meta_data.h"
#include "net/instaweb/util/public/string_writer.h"
#include "net/instaweb/util/stack_buffer.h"
namespace net_instaweb {
HttpDumpUrlWriter::~HttpDumpUrlWriter() {
}
bool HttpDumpUrlWriter::StreamingFetchUrl(const std::string& url,
const MetaData& request_headers,
MetaData* response_headers,
Writer* response_writer,
MessageHandler* handler) {
bool ret = true;
std::string filename;
if (!dump_fetcher_.GetFilename(GURL(url), &filename, handler)) {
handler->Message(kError, "Invalid url: %s", url.c_str());
ret = false;
} else if (!file_system_->Exists(filename.c_str(), handler).is_true()) {
// Do the Fetch first, before opening the output file, so that if the
// fetch fails, do not make an empty file.
//
// TODO(jmarantz): Re-integrate the use of SplitWriter. We'll have
// to do a lazy-open of the OutputFile* in a custom writer, though, to
// avoid opening up a zero-size file when the URL fetch fails.
std::string contents;
StringWriter string_writer(&contents);
// TODO(sligocki): Have this actually stream to response_writer.
// In general we will want to always ask the origin for gzipped output,
// but we are leaving in variable so this could be overridden by the
// instantiator of the DumpUrlWriter.
SimpleMetaData compress_headers, compressed_response;
compress_headers.CopyFrom(request_headers);
if (accept_gzip_) {
compress_headers.RemoveAll(HttpAttributes::kAcceptEncoding);
compress_headers.Add(HttpAttributes::kAcceptEncoding,
HttpAttributes::kGzip);
}
ret = base_fetcher_->StreamingFetchUrl(url, compress_headers,
&compressed_response, &string_writer,
handler);
compressed_response.RemoveAll(HttpAttributes::kContentLength);
compressed_response.Add(HttpAttributes::kContentLength,
IntegerToString(contents.size()).c_str());
compressed_response.ComputeCaching();
// Do not write an empty file if the fetch failed.
if (ret) {
// Check to see if a response marked as gzipped are really unzippable.
if (compressed_response.IsGzipped()) {
GzipInflater inflater(GzipInflater::kGzip);
inflater.Init();
CHECK(inflater.SetInput(contents.data(), contents.size()));
while (inflater.HasUnconsumedInput()) {
char buf[kStackBufferSize];
if ((inflater.InflateBytes(buf, sizeof(buf)) == 0) ||
inflater.error()) {
compressed_response.RemoveAll(HttpAttributes::kContentEncoding);
break;
}
}
}
FileSystem::OutputFile* file = file_system_->OpenTempFile(
filename + ".temp", handler);
if (file != NULL) {
handler->Message(kInfo, "Storing %s as %s", url.c_str(),
filename.c_str());
std::string temp_filename = file->filename();
FileWriter file_writer(file);
ret = compressed_response.Write(&file_writer, handler) &&
file->Write(contents, handler);
ret &= file_system_->Close(file, handler);
ret &= file_system_->RenameFile(temp_filename.c_str(), filename.c_str(),
handler);
} else {
ret = false;
}
}
// We are not going to be able to read the response from the file
// system so we better pass the error message through.
if (!ret) {
response_headers->CopyFrom(compressed_response);
if (!response_headers->headers_complete()) {
response_headers->SetStatusAndReason(HttpStatus::kNotFound);
response_headers->ComputeCaching();
response_headers->set_headers_complete(true);
}
response_writer->Write(contents, handler);
}
}
// Always use the HttpDumpUrlFetcher, even if we are reading the file
// ourselves. Thus the problem of inflating gzipped requests for requesters
// that want cleartext only is solved only in that file.
return ret && dump_fetcher_.StreamingFetchUrl(
url, request_headers, response_headers, response_writer, handler);
}
} // namespace net_instaweb