blob: 3691e7c86dff0d5d248bf26a0af506bbc09ec226 [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmaessen@google.com (Jan Maessen)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_IMAGE_REWRITE_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_IMAGE_REWRITE_FILTER_H_
#include <map>
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/image.h"
#include "net/instaweb/rewriter/public/image_url_encoder.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_context.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_filter.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/rewrite_result.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/printf_format.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/image_types.pb.h"
#include "pagespeed/kernel/util/url_segment_encoder.h"
namespace net_instaweb {
// See MessageForInlineResult for enum meanings.
enum InlineResult {
INLINE_SUCCESS,
INLINE_UNSUPPORTED_DEVICE,
INLINE_NOT_CRITICAL,
INLINE_NO_DATA,
INLINE_TOO_LARGE,
INLINE_CACHE_SMALL_IMAGES_UNREWRITTEN,
// Image should not be inlined because it is part of a responsive image.
INLINE_RESPONSIVE,
INLINE_SHORTCUT,
INLINE_INTERNAL_ERROR,
};
// Identify img tags in html and optimize them.
// TODO(jmaessen): Big open question: how best to link pulled-in resources to
// rewritten urls, when in general those urls will be in a different domain.
class ImageRewriteFilter : public RewriteFilter {
public:
typedef std::map<GoogleString, AssociatedImageInfo> AssociatedImageInfoMap;
// Statistic names:
static const char kImageNoRewritesHighResolution[];
static const char kImageOngoingRewrites[];
static const char kImageResizedUsingRenderedDimensions[];
static const char kImageRewriteLatencyFailedMs[];
static const char kImageRewriteLatencyOkMs[];
static const char kImageRewriteLatencyTotalMs[];
static const char kImageRewritesDroppedDecodeFailure[];
static const char kImageRewritesDroppedDueToLoad[];
static const char kImageRewritesDroppedMIMETypeUnknown[];
static const char kImageRewritesDroppedNoSavingNoResize[];
static const char kImageRewritesDroppedNoSavingResize[];
static const char kImageRewritesDroppedServerWriteFail[];
static const char kImageRewritesSquashingForMobileScreen[];
static const char kImageRewrites[];
static const char kImageWebpRewrites[];
static const char kImageWebpFromGifFailureMs[];
static const char kImageWebpFromGifSuccessMs[];
static const char kImageWebpFromGifTimeouts[];
static const char kImageWebpFromJpegFailureMs[];
static const char kImageWebpFromJpegSuccessMs[];
static const char kImageWebpFromJpegTimeouts[];
static const char kImageWebpFromPngFailureMs[];
static const char kImageWebpFromPngSuccessMs[];
static const char kImageWebpFromPngTimeouts[];
static const char kImageWebpOpaqueFailureMs[];
static const char kImageWebpOpaqueSuccessMs[];
static const char kImageWebpOpaqueTimeouts[];
static const char kImageWebpWithAlphaFailureMs[];
static const char kImageWebpWithAlphaSuccessMs[];
static const char kImageWebpWithAlphaTimeouts[];
static const char kImageWebpFromGifAnimatedFailureMs[];
static const char kImageWebpFromGifAnimatedSuccessMs[];
static const char kImageWebpFromGifAnimatedTimeouts[];
// The property cache property name used to store URLs discovered when
// image_inlining_identify_and_cache_without_rewriting() is set in the
// RewriteOptions.
static const char kInlinableImageUrlsPropertyName[];
static const RewriteOptions::Filter kRelatedFilters[];
static const int kRelatedFiltersSize;
explicit ImageRewriteFilter(RewriteDriver* driver);
virtual ~ImageRewriteFilter();
static void InitStats(Statistics* statistics);
static void Initialize();
static void Terminate();
static void AddRelatedOptions(StringPieceVector* target);
virtual void StartDocumentImpl();
virtual void EndDocument();
virtual void RenderDone();
virtual void StartElementImpl(HtmlElement* element) {}
virtual void EndElementImpl(HtmlElement* element);
virtual const char* Name() const { return "ImageRewrite"; }
virtual const char* id() const { return RewriteOptions::kImageCompressionId; }
virtual void EncodeUserAgentIntoResourceContext(
ResourceContext* context) const;
// Registers image information associated with a URL, for use by
// experiment_collect_mob_image_info. Should be called from DOM-safe
// context: the parser thread or a Render() method.
void RegisterImageInfo(const AssociatedImageInfo& image_info);
// Tries to extract information stored by the image rewrite filter
// in *in, and to store it in *out. Returns whether successful.
//
// The context is used to try to find a name for the image if it was
// not optimized.
static bool ExtractAssociatedImageInfo(const CachedResult* in,
RewriteContext* context,
AssociatedImageInfo* out);
// Can we inline resource? If so, encode its contents into the data_url,
// otherwise leave data_url alone.
InlineResult TryInline(bool is_html, bool is_critical,
int64 image_inline_max_bytes, const CachedResult* cached_result,
ResourceSlot* slot, GoogleString* data_url);
// The valid contents of a dimension attribute on an image element have one of
// the following forms: "45%" "45%px" "+45.0%" [45% of browser width; we can't
// handle this] "45", "+45", "45px", "45arbitraryjunk" "45px%" [45 pixels
// regardless of junk] Technically 0 is an invalid dimension, so we'll reject
// those as well; note that 0 dimensions occur in the wild and Safari and
// Chrome at least don't display anything.
//
// We actually reject the arbitraryjunk cases, as older browsers (eg FF9,
// which isn't *that* old) don't deal with them at all. So the only trailing
// stuff we allow is px possibly with some white space. Note that some older
// browsers (like FF9) accept other units such as "in" or "pt" as synonyms for
// px!
//
// We round fractions, as fractional pixels appear to be rounded in practice
// (and our image resize algorithms require integer pixel sizes).
//
// Far more detail in the spec at:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/
// common-microsyntaxes.html#percentages-and-dimensions
static bool ParseDimensionAttribute(const char* position, int* value);
// Creates a nested rewrite for an image inside a CSS file with the given
// parent and slot, and returns it. The result is not registered with the
// parent.
RewriteContext* MakeNestedRewriteContextForCss(
int64 css_image_inline_max_bytes,
RewriteContext* parent,
const ResourceSlotPtr& slot);
// Creates a nested rewrite for the given parent and slot and returns it. The
// result is not registered with the parent.
virtual RewriteContext* MakeNestedRewriteContext(RewriteContext* parent,
const ResourceSlotPtr& slot);
// Update desired image dimensions if necessary. Returns true if it is
// updated.
bool UpdateDesiredImageDimsIfNecessary(
const ImageDim& image_dim, const ResourceContext& resource_context,
ImageDim* desired_dim);
// Determines whether an image should be resized based on the current options.
//
// Returns the dimensions to resize to in *desired_dimensions.
bool ShouldResize(const ResourceContext& context,
const GoogleString& url,
Image* image,
ImageDim* desired_dimensions);
// Resize image if necessary, returning true if this resizing succeeds and
// false if it's unnecessary or fails.
bool ResizeImageIfNecessary(
const RewriteContext* rewrite_context, const GoogleString& url,
ResourceContext* context, Image* image, CachedResult* cached);
// Allocate and initialize CompressionOptions object based on RewriteOptions
// and ResourceContext.
Image::CompressionOptions* ImageOptionsForLoadedResource(
const ResourceContext& context, const ResourcePtr& input_resource);
virtual const RewriteOptions::Filter* RelatedFilters(int* num_filters) const;
virtual const StringPieceVector* RelatedOptions() const {
return related_options_;
}
// Disable all filters listed in kRelatedFilters in options.
static void DisableRelatedFilters(RewriteOptions* options);
// Update stats for a rewrite dropped due to load.
void ReportDroppedRewrite();
protected:
virtual const UrlSegmentEncoder* encoder() const;
virtual RewriteContext* MakeRewriteContext();
private:
class Context;
friend class Context;
// Helper methods.
void InfoAndTrace(Context* context, const char* format, ...)
INSTAWEB_PRINTF_FORMAT(3, 4);
const ContentType* ImageToContentType(const GoogleString& origin_url,
Image* image);
void BeginRewriteImageUrl(HtmlElement* element, HtmlElement::Attribute* src);
RewriteResult RewriteLoadedResourceImpl(Context* context,
const ResourcePtr& input_resource,
const OutputResourcePtr& result);
// Returns true if it rewrote (ie inlined) the URL.
bool FinishRewriteCssImageUrl(
int64 css_image_inline_max_bytes,
const CachedResult* cached, ResourceSlot* slot,
InlineResult* inline_result);
// Returns true if it rewrote the URL.
bool FinishRewriteImageUrl(
const CachedResult* cached, const ResourceContext* resource_context,
HtmlElement* element, HtmlElement::Attribute* src, int image_index,
HtmlResourceSlot* slot, InlineResult* inline_result);
// Save image contents in cached if the image is inlinable.
void SaveIfInlinable(const StringPiece& contents,
const ImageType image_type,
CachedResult* cached);
// Populates width and height from either the attributes specified in the
// image tag (including in an inline style attribute) or from the rendered
// dimensions and sets is_resized_using_rendered_dimensions to true if
// dimensions are taken from rendered dimensions.
void GetDimensions(HtmlElement* element, ImageDim* page_dim,
const HtmlElement::Attribute* src,
bool* is_resized_using_rendered_dimensions);
// Returns true if there is either a width or height attribute specified,
// even if they're not parsable.
bool HasAnyDimensions(HtmlElement* element);
// Resizes low quality image. It further reduces the size of inlined low
// quality image for mobile.
void ResizeLowQualityImage(
Image* low_image, const ResourcePtr& input_resource,
CachedResult* cached);
// Checks if image is critical to generate low res image for the given image.
// An image is considered critical if it is in the critical list as determined
// by CriticalImagesFinder. Images are considered critical if the platform
// lacks a CriticalImageFinder implementation.
bool IsHtmlCriticalImage(StringPiece image_url) const;
// Persist a URL that would have be inlined to the property cache, if
// options()->image_inlining_identify_and_cache_without_rewriting(). Returns
// true if a PropertyValue was written.
bool StoreUrlInPropertyCache(const StringPiece& url);
void SaveDebugMessageToCache(const GoogleString& message,
Context* rewrite_context,
CachedResult* cached_result);
// Statistics
// # of images rewritten successfully.
Variable* image_rewrites_;
// # of images resized using rendered dimensions;
Variable* image_resized_using_rendered_dimensions_;
// # of images that we decided not to rewrite because of size constraint.
Variable* image_norewrites_high_resolution_;
// # of images that we decided not to serve rewritten. This could be because
// the rewrite failed, recompression wasn't effective enough, the image
// couldn't be resized because it had an alpha-channel, etc.
// Note: This overlaps with most of the other image_rewrites_dropped_* vars.
Variable* image_rewrites_dropped_intentionally_;
// # of images not rewritten because we failed to decode them.
Variable* image_rewrites_dropped_decode_failure_;
// # of images not rewritten because the image MIME type is unknown.
Variable* image_rewrites_dropped_mime_type_unknown_;
// # of images not rewritten because the server fails to write the merged
// html files.
Variable* image_rewrites_dropped_server_write_fail_;
// # of images not rewritten because the rewriting does not reduce the
// data size by a certain threshold. The image is resized in this case.
Variable* image_rewrites_dropped_nosaving_resize_;
// # of images not rewritten because the rewriting does not reduce the
// data size by a certain threshold. The image is not resized in this case.
Variable* image_rewrites_dropped_nosaving_noresize_;
// # of images not rewritten because of load.
TimedVariable* image_rewrites_dropped_due_to_load_;
// # of image squashing for mobile screen initiated. This may not be the
// actual # of images squashed as squashing may fail or rewritten image size
// is larger.
TimedVariable* image_rewrites_squashing_for_mobile_screen_;
// # of bytes saved from image rewriting (Note: This is computed at
// rewrite time not at serve time, so the number of bytes saved in
// transmission should be larger than this).
Variable* image_rewrite_total_bytes_saved_;
// Sum of original sizes of all successfully rewritten images.
// image_rewrite_total_bytes_saved_ / image_rewrite_total_original_bytes_
// is the average percentage reduction in image size.
Variable* image_rewrite_total_original_bytes_;
// # of uses of rewritten images (updating <img> src= attributes in HTML
// or url()s in CSS).
Variable* image_rewrite_uses_;
// # of inlines of images (into HTML or CSS).
Variable* image_inline_count_;
// # of images rewritten into WebP format.
Variable* image_webp_rewrites_;
// # of images being rewritten right now.
UpDownCounter* image_ongoing_rewrites_;
// # total number of milliseconds spent rewriting images since server start
Variable* image_rewrite_latency_total_ms_;
// Delay in microseconds of successful image rewrites.
Histogram* image_rewrite_latency_ok_ms_;
// Delay in microseconds of failed image rewrites.
Histogram* image_rewrite_latency_failed_ms_;
ImageUrlEncoder encoder_;
// Counter to help associate each <img> tag in the HTML with a unique index,
// for use in determining whether the image should be previewed.
int image_counter_;
// The set of inlinable URLs, populated as the page is parsed, if
// image_inlining_identify_and_cache_without_rewriting() is set in the
// RewriteOptions.
StringSet inlinable_urls_;
// Sets of variables and histograms for various conversions to WebP.
Image::ConversionVariables webp_conversion_variables_;
// The options related to this filter.
static StringPieceVector* related_options_;
std::map<GoogleString, AssociatedImageInfo> image_info_;
// Used to figure out which RenderDone() call is the last one.
bool saw_end_document_;
DISALLOW_COPY_AND_ASSIGN(ImageRewriteFilter);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_IMAGE_REWRITE_FILTER_H_