blob: 24f715cfddfd9de3aa0ccdea2e5e9cc0318c9fe2 [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: matterbury@google.com (Matt Atterbury)
#include "net/instaweb/rewriter/public/dedup_inlined_images_filter.h"
#include <map>
#include "base/logging.h"
#include "net/instaweb/rewriter/public/request_properties.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/static_asset_manager.h"
#include "pagespeed/kernel/base/hasher.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/http/data_url.h"
#include "pagespeed/kernel/http/request_headers.h"
namespace net_instaweb {
const unsigned int DedupInlinedImagesFilter::kMinimumImageCutoff = 185;
const char DedupInlinedImagesFilter::kDiiInitializer[] =
"pagespeed.dedupInlinedImagesInit();";
const char DedupInlinedImagesFilter::kCandidatesFound[] =
"num_dedup_inlined_images_candidates_found";
const char DedupInlinedImagesFilter::kCandidatesReplaced[] =
"num_dedup_inlined_images_candidates_replaced";
DedupInlinedImagesFilter::DedupInlinedImagesFilter(RewriteDriver* driver)
: CommonFilter(driver),
script_inserted_(false),
snippet_id_(0) {
Statistics* stats = server_context()->statistics();
num_dedup_inlined_images_candidates_found_ =
stats->GetVariable(kCandidatesFound);
num_dedup_inlined_images_candidates_replaced_ =
stats->GetVariable(kCandidatesReplaced);
}
DedupInlinedImagesFilter::~DedupInlinedImagesFilter() {
hash_to_id_map_.clear();
}
void DedupInlinedImagesFilter::InitStats(Statistics* statistics) {
statistics->AddVariable(DedupInlinedImagesFilter::kCandidatesFound);
statistics->AddVariable(DedupInlinedImagesFilter::kCandidatesReplaced);
}
void DedupInlinedImagesFilter::DetermineEnabled(GoogleString* disabled_reason) {
// We are treating this filter like a version of lazyload images because
// they both replace an image with JavaScript, and in both cases we need
// to disable the filter for certain classes of UA.
if (!driver()->request_properties()->SupportsLazyloadImages() ||
driver()->flushing_early() ||
(driver()->request_headers() != NULL &&
driver()->request_headers()->IsXmlHttpRequest())) {
set_is_enabled(false);
}
}
void DedupInlinedImagesFilter::StartDocumentImpl() {
script_inserted_ = false;
snippet_id_ = 0;
}
void DedupInlinedImagesFilter::EndDocument() {
hash_to_id_map_.clear();
}
void DedupInlinedImagesFilter::StartElementImpl(HtmlElement* element) {
// If this is an inlined image that we've seen before, we will replace it
// with JS in EndElementImpl. Before we do that for the first time we need
// to insert our JS script of functions, though not if we're inside a
// <noscript> as that would be dumb.
if (!script_inserted_) {
StringPiece src;
if (IsDedupCandidate(element, &src)) {
GoogleString hash = server_context()->hasher()->Hash(src);
if (hash_to_id_map_.find(hash) != hash_to_id_map_.end()) {
InsertOurScriptElement(element);
}
}
}
}
void DedupInlinedImagesFilter::EndElementImpl(HtmlElement* element) {
StringPiece src;
if (IsDedupCandidate(element, &src)) {
num_dedup_inlined_images_candidates_found_->Add(1);
// Whether this is the source or destination, we need it to have an id.
// TODO(matterbury): We could check if an id is used more than once and
// refuse to deduplicate it if so. We'd need to check all images at least,
// though to be correct we should check all tags; this seems like a lot
// of work to cater for something people tend not to do (because it's
// such a bad idea basically).
GoogleString hash = server_context()->hasher()->Hash(src);
GoogleString element_id;
const char* id = element->AttributeValue(HtmlName::kId);
if (id == NULL || id[0] == '\0') {
element_id = StrCat("pagespeed_img_", hash,
IntegerToString(++snippet_id_));
driver()->AddAttribute(element, HtmlName::kId, element_id);
} else {
element_id = id;
}
if (hash_to_id_map_.find(hash) == hash_to_id_map_.end()) {
// This is the first time we've seen this particular image.
hash_to_id_map_[hash] = element_id;
} else {
// A subsequent use of an already inlined image: dedup it!
DCHECK(script_inserted_);
num_dedup_inlined_images_candidates_replaced_->Add(1);
GoogleString from_img_id = hash_to_id_map_[hash];
GoogleString script_id = StrCat("pagespeed_script_",
IntegerToString(++snippet_id_));
// NOTE: If you change this you need to update kMinimumImageCutoff,
// which is currently set to 185, slightly less than this snippet:
// <script type="text/javascript" id="pagespeed_script_1"
// data-pagespeed-no-defer>
// pagespeed.dedupInlinedImages.inlineImg("pagespeed_img_12345678",
// "pagespeed_img_87654321",
// "pagespeed_script_1");
// </script>
GoogleString snippet("pagespeed.dedupInlinedImages.");
StrAppend(&snippet, "inlineImg('", from_img_id, "','",
element_id, "','", script_id, "');");
HtmlElement* script = driver()->NewElement(element, HtmlName::kScript);
driver()->InsertElementAfterElement(element, script);
AddJsToElement(snippet, script);
driver()->AddAttribute(script, HtmlName::kId, script_id);
driver()->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
element->DeleteAttribute(HtmlName::kSrc);
}
}
}
bool DedupInlinedImagesFilter::IsDedupCandidate(HtmlElement* element,
StringPiece* src_iff_true) {
// Ignore images inside a <noscript> as inserting any JS is pointless.
// Ignore images that aren't inlined (a data URI).
// Ignore images that are smaller than the cutoff, current set to roughly
// the size of the JS snippet we insert (ignoring the functions JS overhead).
// TODO(matterbury): Also handle input tags.
if (noscript_element() == NULL && element->keyword() == HtmlName::kImg) {
const StringPiece src(element->AttributeValue(HtmlName::kSrc));
if (IsDataImageUrl(src) && src.size() > kMinimumImageCutoff) {
*src_iff_true = src;
return true;
}
}
return false;
}
void DedupInlinedImagesFilter::InsertOurScriptElement(HtmlElement* before) {
StaticAssetManager* static_asset_manager =
server_context()->static_asset_manager();
StringPiece dedup_inlined_images_js =
static_asset_manager->GetAsset(
StaticAssetEnum::DEDUP_INLINED_IMAGES_JS, driver()->options());
const GoogleString& initialized_js = StrCat(dedup_inlined_images_js,
kDiiInitializer);
HtmlElement* script_element = driver()->NewElement(before->parent(),
HtmlName::kScript);
driver()->InsertElementBeforeElement(before, script_element);
AddJsToElement(initialized_js, script_element);
driver()->AddAttribute(script_element, HtmlName::kDataPagespeedNoDefer, NULL);
script_inserted_ = true;
}
} // namespace net_instaweb