blob: e97823613a6b84985108399e8831f792ca8e15ce [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
//
// Implementation of ResourceCombiner, a helper for filters that combine
// multiple resources. Also contains CombinerCallback, which is used to collect
// input resources when doing a ResourceCombiner::Fetch.
#include "net/instaweb/rewriter/public/resource_combiner.h"
#include <cstddef>
#include "base/logging.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/output_resource.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_namer.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_filter.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/url_partnership.h"
#include "pagespeed/kernel/base/hasher.h"
#include "pagespeed/kernel/base/message_handler.h"
#include "pagespeed/kernel/base/ref_counted_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/string_writer.h"
#include "pagespeed/kernel/base/writer.h"
#include "pagespeed/kernel/http/response_headers.h"
#include "pagespeed/kernel/util/url_escaper.h"
#include "pagespeed/kernel/util/url_multipart_encoder.h"
namespace net_instaweb {
ResourceCombiner::ResourceCombiner(RewriteDriver* driver,
const StringPiece& extension,
RewriteFilter* filter)
: server_context_(driver->server_context()),
rewrite_driver_(driver),
partnership_(driver),
prev_num_components_(0),
accumulated_leaf_size_(0),
// TODO(jmarantz): The URL overhead computation is arguably fragile.
// Another approach is to put a CHECK that the final URL with the
// resource naming does not exceed the limit.
//
// Another option too is to just instantiate a ResourceNamer and a
// hasher put in the correct ID and EXT and leave the name blank and
// take size of that.
url_overhead_(strlen(filter->id()) + ResourceNamer::kOverhead +
extension.size()),
filter_(filter) {
// This CHECK is here because RewriteDriver is constructed with its
// server_context_ == NULL.
// TODO(sligocki): Construct RewriteDriver with a ServerContext, to avoid
// worrying about it not getting initialized.
CHECK(server_context_ != NULL);
}
ResourceCombiner::~ResourceCombiner() {
Clear();
}
TimedBool ResourceCombiner::AddResourceNoFetch(const ResourcePtr& resource,
MessageHandler* handler) {
TimedBool ret = {0, false};
// Assert the sanity of three parallel vectors.
CHECK_EQ(num_urls(), static_cast<int>(resources_.size()));
CHECK_EQ(num_urls(), static_cast<int>(multipart_encoder_urls_.size()));
if (num_urls() == 0) {
// Make sure to initialize the base URL.
Reset();
}
// From here on out, the answer will not change until the resource itself
// does.
ret.expiration_ms = resource->CacheExpirationTimeMs();
// Make sure the specific filter is OK with the data --- it may be
// unable to combine it safely
GoogleString failure_reason;
if (!ResourceCombinable(resource.get(), &failure_reason, handler)) {
handler->Message(
kInfo, "Cannot combine %s: resource not combinable, reason: %s",
resource->url().c_str(), failure_reason.c_str());
return ret;
}
// Now manage the URL and policy.
bool added = partnership_.AddUrl(resource->url(), handler);
if (added) {
int index = num_urls() - 1;
if (partnership_.NumCommonComponents() != prev_num_components_) {
UpdateResolvedBase();
}
const GoogleString relative_path = partnership_.RelativePath(index);
multipart_encoder_urls_.push_back(relative_path);
if (accumulated_leaf_size_ == 0) {
ComputeLeafSize();
} else {
AccumulateLeafSize(relative_path);
}
AccumulateCombinedSize(resource);
resources_.push_back(resource);
const char* failure_reason = NULL;
if (ContentSizeTooBig()) {
failure_reason = "combined contents too big.";
} else if (UrlTooBig()) {
failure_reason = "combined url too long.";
}
if (failure_reason != NULL) {
handler->Message(
kInfo, "Cannot combine %s: %s",
resource->url().c_str(), failure_reason);
RemoveLastResource();
added = false;
}
} else {
handler->MessageS(kInfo, "Cannot combine: partnership forbids");
}
ret.value = added;
return ret;
}
void ResourceCombiner::RemoveLastResource() {
partnership_.RemoveLast();
resources_.pop_back();
multipart_encoder_urls_.pop_back();
if (partnership_.NumCommonComponents() != prev_num_components_) {
UpdateResolvedBase();
}
}
GoogleString ResourceCombiner::UrlSafeId() const {
GoogleString segment;
UrlMultipartEncoder encoder;
encoder.Encode(multipart_encoder_urls_, NULL, &segment);
return segment;
}
void ResourceCombiner::ComputeLeafSize() {
GoogleString segment = UrlSafeId();
accumulated_leaf_size_ = segment.size() + url_overhead_
+ server_context_->hasher()->HashSizeInChars();
}
void ResourceCombiner::AccumulateLeafSize(const StringPiece& url) {
GoogleString segment;
UrlEscaper::EncodeToUrlSegment(url, &segment);
const int kMultipartOverhead = 1; // for the '+'
accumulated_leaf_size_ += segment.size() + kMultipartOverhead;
}
bool ResourceCombiner::UrlTooBig() {
// Note: We include kUrlSlack in our computations so that other filters,
// which might add to URL length, can run after ours
int expanded_size = accumulated_leaf_size_ + ResourceCombiner::kUrlSlack;
if (expanded_size > rewrite_driver_->options()->max_url_segment_size()) {
return true;
}
if ((expanded_size + static_cast<int>(resolved_base_.size())) >
rewrite_driver_->options()->max_url_size()) {
return true;
}
return false;
}
bool ResourceCombiner::ResourceCombinable(
Resource* /*resource*/,
GoogleString* /*failure_reason*/,
MessageHandler* /*handler*/) {
return true;
}
void ResourceCombiner::UpdateResolvedBase() {
// If the addition of this URL changes the base path,
// then we will have to recompute the multi-part encoding.
// This is n^2 in the pathological case and if this code
// gets used for image spriting then this
// algorithm should be revisited. For CSS and JS we expect N to
// be relatively small.
prev_num_components_ = partnership_.NumCommonComponents();
resolved_base_ = ResolvedBase();
multipart_encoder_urls_.clear();
for (size_t i = 0; i < resources_.size(); ++i) {
multipart_encoder_urls_.push_back(partnership_.RelativePath(i));
}
accumulated_leaf_size_ = 0;
}
OutputResourcePtr ResourceCombiner::Combine(MessageHandler* handler) {
OutputResourcePtr combination;
if (resources_.size() <= 1) {
// No point in combining.
return combination;
}
// First, compute the name of the new resource based on the names of
// the old resources.
GoogleString url_safe_id = UrlSafeId();
// Start building up the combination. At this point we are still
// not committed to the combination, because the 'write' can fail.
// TODO(jmaessen, jmarantz): encode based on partnership
GoogleString resolved_base = ResolvedBase();
GoogleString failure_reason;
combination.reset(rewrite_driver_->CreateOutputResourceWithMappedPath(
resolved_base, resolved_base, filter_->id(), url_safe_id,
kRewrittenResource, &failure_reason));
if (combination.get() == NULL) {
// TODO(sligocki): Note failure_reason somewhere.
} else {
if (combination->cached_result() != NULL &&
combination->cached_result()->optimizable()) {
// If the combination has a Url set on it we have cached information
// on what the output would be, so we'll just use that.
return combination;
}
if (WriteCombination(resources_, combination, handler)
&& combination->IsWritten()) {
// Otherwise, we have to compute it.
return combination;
}
// No dice.
combination.clear();
}
return combination;
}
bool ResourceCombiner::WriteCombination(
const ResourceVector& combine_resources,
const OutputResourcePtr& combination,
MessageHandler* handler) {
bool written = true;
// TODO(sligocki): Write directly to a temp file rather than doing the extra
// string copy.
GoogleString combined_contents;
StringWriter writer(&combined_contents);
for (int i = 0, n = combine_resources.size(); written && (i < n); ++i) {
ResourcePtr input(combine_resources[i]);
written = WritePiece(i, n, input.get(),
combination.get(), &writer, handler);
}
if (written) {
// Intersect the response headers from each input.
ResponseHeaders* output_headers = combination->response_headers();
DCHECK_EQ(0, output_headers->NumAttributes());
// We don't copy over all the resources from [0] because we don't
// want the input cache-control. The output cache-control is set via
// RewriteDriver::Write when it calls
// RewriteDriver::SetDefaultLongCacheHeaders.
server_context_->MergeNonCachingResponseHeaders(
*combine_resources[0]->response_headers(), output_headers);
for (int i = 1, n = combine_resources.size(); i < n; ++i) {
output_headers->RemoveIfNotIn(*combine_resources[i]->response_headers());
}
// TODO(morlovich): Fix combiners to deal with charsets.
written =
rewrite_driver_->Write(
combine_resources, combined_contents, CombinationContentType(),
StringPiece() /* not computing charset for now */,
combination.get());
}
return written;
}
bool ResourceCombiner::WritePiece(int index,
int num_pieces,
const Resource* input,
OutputResource* /*combination*/,
Writer* writer,
MessageHandler* handler) {
return writer->Write(input->ExtractUncompressedContents(), handler);
}
void ResourceCombiner::Clear() {
resources_.clear();
multipart_encoder_urls_.clear();
}
void ResourceCombiner::Reset() {
Clear();
partnership_.Reset(rewrite_driver_->base_url());
prev_num_components_ = 0;
accumulated_leaf_size_ = 0;
resolved_base_.clear();
}
} // namespace net_instaweb