blob: 2530e15832f6029a9a9197ebfb2f173f66781e06 [file] [log] [blame]
* Copyright 2010 Google Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
// Author: (Jan Maessen)
#include "net/instaweb/rewriter/public/rewrite_filter.h"
#include "base/basictypes.h"
#include "base/scoped_ptr.h"
#include "net/instaweb/htmlparse/public/html_element.h"
#include "net/instaweb/rewriter/public/img_tag_scanner.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/util/public/atom.h"
#include <string>
#include "net/instaweb/util/public/url_segment_encoder.h"
namespace net_instaweb {
class ContentType;
class FileSystem;
class HtmlParse;
class Image;
class ImageDim;
class OutputResource;
class ResourceManager;
class UrlEscaper;
class Variable;
// This class supports the encoding of image urls with optional
// additional dimension metadata. The passed-in stored_dim is used as
// the source and/or destination of this metadata during encode/decode
// respectively.
class ImageUrlEncoder : public UrlSegmentEncoder {
ImageUrlEncoder(UrlEscaper* url_escaper, ImageDim* stored_dim);
virtual ~ImageUrlEncoder();
// Encode an origin_url and stored_dim from origin page to a rewritten_url.
virtual void EncodeToUrlSegment(
const StringPiece& origin_url, std::string* rewritten_url);
// Decode an origin_url and stored_dim from a rewritten_url, returning false
// on parse failure (invalidating output vars).
virtual bool DecodeFromUrlSegment(const StringPiece& rewritten_url,
std::string* origin_url);
UrlEscaper* url_escaper_;
ImageDim* stored_dim_;
// Identify img tags in html and optimize them.
// TODO(jmaessen): See which ones have immediately-obvious size info.
// TODO(jmaessen): Provide alternate resources at rewritten urls
// asynchronously somehow.
// TODO(jmaessen): Big open question: how best to link pulled-in resources to
// rewritten urls, when in general those urls will be in a different domain.
class ImgRewriteFilter : public RewriteFilter {
ImgRewriteFilter(RewriteDriver* driver,
bool log_image_elements,
bool insert_image_dimensions,
StringPiece path_prefix,
size_t img_inline_max_bytes);
static void Initialize(Statistics* statistics);
virtual void StartDocumentImpl() {}
virtual void StartElementImpl(HtmlElement* element) {}
virtual void EndElementImpl(HtmlElement* element);
virtual void Flush();
virtual bool Fetch(OutputResource* resource,
Writer* writer,
const MetaData& request_header,
MetaData* response_headers,
UrlAsyncFetcher* fetcher,
MessageHandler* message_handler,
UrlAsyncFetcher::Callback* callback);
virtual const char* Name() const { return "ImgRewrite"; }
// Can we inline resource? If so, encode its contents into the data_url,
// otherwise leave data_url alone.
static bool CanInline(
int img_inline_max_bytes, const StringPiece& contents,
const ContentType* content_type, std::string* data_url);
// Helper methods.
Image* GetImage(const StringPiece& origin_url, Resource* img_resource);
OutputResource* ImageOutputResource(const std::string& url_string,
Image* image);
const ContentType* ImageToContentType(const std::string& origin_url,
Image* image);
void OptimizeImage(const Resource& input_resource, const ImageDim& page_dim,
Image* image, OutputResource* result);
bool OptimizedImageFor(
const StringPiece& origin_url, const ImageDim& page_dim,
Resource* img_resource, OutputResource* output);
void RewriteImageUrl(HtmlElement* element, HtmlElement::Attribute* src);
void UpdateTargetElement(const Resource& input_resource,
const OutputResource& output_resource,
const ImageDim& page_dim, const ImageDim& actual_dim,
HtmlElement* element, HtmlElement::Attribute* src);
FileSystem* file_system_;
HtmlParse* html_parse_;
scoped_ptr<ImgTagScanner> img_filter_;
ResourceManager* resource_manager_;
// Threshold size (in bytes) below which we should just inline images
// encountered.
// TODO(jmaessen): Heuristic must be more sophisticated. Does this image
// touch a fresh domain? Require opening a new connection? If so we can
// afford to inline quite large images (basically anything we could transmit
// in the resulting RTTs)---but of course we don't know about RTT here. In
// the absence of such information, we ought to inline if header length + url
// size can be saved by inlining image, without increasing the size in packets
// of the html. Otherwise we end up loading the image in favor of the html,
// which might be a lose. More work is needed here to figure out the exact
// tradeoffs involved, especially as we also undermine image cacheability.
size_t img_inline_max_bytes_;
// Should we log each image element as we encounter it? Handy for debug.
bool log_image_elements_;
// Should we insert image dimensions into html if they are absent?
bool insert_image_dimensions_;
const Atom s_width_;
const Atom s_height_;
Variable* rewrite_count_;
Variable* inline_count_;
Variable* rewrite_saved_bytes_;
} // namespace net_instaweb