| /* |
| * Copyright 2012 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: pulkitg@google.com (Pulkit Goyal) |
| |
| #include "net/instaweb/rewriter/public/critical_images_finder.h" |
| |
| #include <map> |
| |
| #include "base/logging.h" |
| #include "net/instaweb/http/public/log_record.h" |
| #include "net/instaweb/http/public/request_context.h" |
| #include "net/instaweb/rewriter/critical_images.pb.h" |
| #include "net/instaweb/rewriter/public/critical_finder_support_util.h" |
| #include "net/instaweb/rewriter/public/property_cache_util.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| #include "net/instaweb/rewriter/public/rewrite_options.h" |
| #include "net/instaweb/rewriter/public/server_context.h" |
| #include "net/instaweb/rewriter/rendered_image.pb.h" |
| #include "net/instaweb/util/public/fallback_property_page.h" |
| #include "net/instaweb/util/public/property_cache.h" |
| #include "pagespeed/kernel/base/json.h" |
| #include "pagespeed/kernel/base/message_handler.h" |
| #include "pagespeed/kernel/base/proto_util.h" |
| #include "pagespeed/kernel/base/scoped_ptr.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/http/google_url.h" |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| const char kRenderedImageJsonWidthKey[] = "rw"; |
| const char kRenderedImageJsonHeightKey[] = "rh"; |
| const char kOriginalImageJsonWidthKey[] = "ow"; |
| const char kOriginalImageJsonHeightKey[] = "oh"; |
| const char kEmptyValuePlaceholder[] = "\n"; |
| |
| // Create CriticalImagesInfo object from the value of property_value. NULL if |
| // no value is found, or if the property value reflects that no results are |
| // available. Result is owned by caller. |
| CriticalImagesInfo* CriticalImagesInfoFromPropertyValue( |
| int percent_seen_for_critical, |
| const PropertyValue* property_value) { |
| DCHECK(property_value != NULL); |
| scoped_ptr<CriticalImagesInfo> info(new CriticalImagesInfo()); |
| if (!CriticalImagesFinder::PopulateCriticalImagesFromPropertyValue( |
| property_value, &info->proto)) { |
| return NULL; |
| } |
| // Fill in map fields based on proto value so that image lookups are O(lg n). |
| GetCriticalKeysFromProto(percent_seen_for_critical, |
| info->proto.html_critical_image_support(), |
| &info->html_critical_images); |
| GetCriticalKeysFromProto(percent_seen_for_critical, |
| info->proto.css_critical_image_support(), |
| &info->css_critical_images); |
| return info.release(); |
| } |
| |
| // Setup a map for RenderedImages and their dimensions. |
| void SetupRenderedImageDimensionsMap( |
| const RenderedImages& rendered_images, |
| RenderedImageDimensionsMap* map) { |
| for (int i = 0; i < rendered_images.image_size(); ++i) { |
| const RenderedImages_Image& images = rendered_images.image(i); |
| // In case of beacons returning these rendered dimensions, images.src() |
| // will be a hash of the image url. Hence when we do a lookup in |
| // rendered_images_map we need to hash the url. |
| (*map)[images.src()] = std::make_pair( |
| images.rendered_width(), images.rendered_height()); |
| } |
| } |
| |
| } // namespace |
| |
| const char CriticalImagesFinder::kCriticalImagesPropertyName[] = |
| "critical_images"; |
| |
| const char CriticalImagesFinder::kCriticalImagesValidCount[] = |
| "critical_images_valid_count"; |
| |
| const char CriticalImagesFinder::kCriticalImagesExpiredCount[] = |
| "critical_images_expired_count"; |
| |
| const char CriticalImagesFinder::kCriticalImagesNotFoundCount[] = |
| "critical_images_not_found_count"; |
| |
| const char CriticalImagesFinder::kRenderedImageDimensionsProperty[] = |
| "rendered_image_dimensions"; |
| |
| CriticalImagesFinder::CriticalImagesFinder(const PropertyCache::Cohort* cohort, |
| Statistics* statistics) |
| : cohort_(cohort) { |
| critical_images_valid_count_ = statistics->GetVariable( |
| kCriticalImagesValidCount); |
| critical_images_expired_count_ = statistics->GetVariable( |
| kCriticalImagesExpiredCount); |
| critical_images_not_found_count_ = statistics->GetVariable( |
| kCriticalImagesNotFoundCount); |
| } |
| |
| CriticalImagesFinder::~CriticalImagesFinder() { |
| } |
| |
| void CriticalImagesFinder::InitStats(Statistics* statistics) { |
| statistics->AddVariable(kCriticalImagesValidCount); |
| statistics->AddVariable(kCriticalImagesExpiredCount); |
| statistics->AddVariable(kCriticalImagesNotFoundCount); |
| } |
| |
| namespace { |
| |
| bool IsCriticalImage(const GoogleString& image_url, |
| const StringSet& critical_images_set) { |
| return (critical_images_set.find(image_url) != critical_images_set.end()); |
| } |
| |
| } // namespace |
| |
| bool CriticalImagesFinder::IsHtmlCriticalImage(StringPiece image_url, |
| RewriteDriver* driver) { |
| return IsCriticalImage(GetKeyForUrl(image_url), |
| GetHtmlCriticalImages(driver)); |
| } |
| |
| bool CriticalImagesFinder::IsCssCriticalImage(StringPiece image_url, |
| RewriteDriver* driver) { |
| return IsCriticalImage(GetKeyForUrl(image_url), |
| GetCssCriticalImages(driver)); |
| } |
| |
| bool CriticalImagesFinder::GetRenderedImageDimensions( |
| RewriteDriver* driver, |
| const GoogleUrl& image_src_gurl, |
| std::pair<int32, int32>* dimensions) { |
| UpdateCriticalImagesSetInDriver(driver); |
| const CriticalImagesInfo* info = driver->critical_images_info(); |
| CHECK(info != NULL); |
| RenderedImageDimensionsMap::const_iterator iterator = |
| info->rendered_images_map.find( |
| GetKeyForUrl(image_src_gurl.spec_c_str())); |
| if (iterator != info->rendered_images_map.end()) { |
| *dimensions = iterator->second; |
| return true; |
| } |
| return false; |
| } |
| |
| const StringSet& CriticalImagesFinder::GetHtmlCriticalImages( |
| RewriteDriver* driver) { |
| UpdateCriticalImagesSetInDriver(driver); |
| const CriticalImagesInfo* info = driver->critical_images_info(); |
| CHECK(info != NULL); |
| |
| return info->html_critical_images; |
| } |
| |
| const StringSet& CriticalImagesFinder::GetCssCriticalImages( |
| RewriteDriver* driver) { |
| UpdateCriticalImagesSetInDriver(driver); |
| const CriticalImagesInfo* info = driver->critical_images_info(); |
| CHECK(info != NULL); |
| |
| return info->css_critical_images; |
| } |
| |
| StringSet* CriticalImagesFinder::mutable_html_critical_images( |
| RewriteDriver* driver) { |
| DCHECK(driver != NULL); |
| CriticalImagesInfo* driver_info = driver->critical_images_info(); |
| // Preserve CSS critical images if they have been updated already. |
| if (driver_info == NULL) { |
| driver_info = new CriticalImagesInfo; |
| driver->set_critical_images_info(driver_info); |
| } |
| return &driver_info->html_critical_images; |
| } |
| |
| StringSet* CriticalImagesFinder::mutable_css_critical_images( |
| RewriteDriver* driver) { |
| DCHECK(driver != NULL); |
| CriticalImagesInfo* driver_info = driver->critical_images_info(); |
| // Preserve CSS critical images if they have been updated already. |
| if (driver_info == NULL) { |
| driver_info = new CriticalImagesInfo; |
| driver->set_critical_images_info(driver_info); |
| } |
| return &driver_info->css_critical_images; |
| } |
| |
| // Copy the critical images for this request from the property cache into the |
| // RewriteDriver. The critical images are not stored in CriticalImageFinder |
| // because the ServerContext holds the CriticalImageFinder and hence is shared |
| // between requests. |
| void CriticalImagesFinder::UpdateCriticalImagesSetInDriver( |
| RewriteDriver* driver) { |
| // Don't update critical_images_info if it's already been set. |
| if (driver->critical_images_info() != NULL) { |
| return; |
| } |
| CriticalImagesInfo* info = NULL; |
| // Fallback properties can be used for critical images. |
| AbstractPropertyPage* page = driver->fallback_property_page(); |
| if (page != NULL && cohort() != NULL) { |
| PropertyValue* property_value = page->GetProperty( |
| cohort(), kCriticalImagesPropertyName); |
| info = ExtractCriticalImagesFromCache(driver, property_value); |
| if (info != NULL) { |
| info->is_critical_image_info_present = true; |
| if (driver->request_context().get() != NULL) { |
| driver->log_record()->SetNumHtmlCriticalImages( |
| info->html_critical_images.size()); |
| driver->log_record()->SetNumCssCriticalImages( |
| info->css_critical_images.size()); |
| } |
| } |
| } |
| |
| // Store an empty CriticalImagesInfo back into the driver if we don't have any |
| // beacon results yet. |
| if (info == NULL) { |
| info = new CriticalImagesInfo; |
| } |
| |
| if (driver->options()->Enabled( |
| RewriteOptions::kResizeToRenderedImageDimensions)) { |
| scoped_ptr<RenderedImages> rendered_images( |
| ExtractRenderedImageDimensionsFromCache(driver)); |
| if (rendered_images != NULL) { |
| SetupRenderedImageDimensionsMap(*rendered_images, |
| &info->rendered_images_map); |
| } |
| } |
| |
| driver->set_critical_images_info(info); |
| } |
| |
| bool CriticalImagesFinder::UpdateCriticalImagesCacheEntryFromDriver( |
| const StringSet* html_critical_images_set, |
| const StringSet* css_critical_images_set, |
| RewriteDriver* driver) { |
| // Update property cache if above the fold critical images are successfully |
| // determined. |
| // Fallback properties will be updated for critical images. |
| AbstractPropertyPage* page = driver->fallback_property_page(); |
| return UpdateCriticalImagesCacheEntry( |
| html_critical_images_set, css_critical_images_set, |
| NULL /* RenderedImages Proto */, |
| SupportInterval(), cohort(), page); |
| } |
| |
| // Setup the HTML and CSS critical image sets in *critical_images using |
| // *property_value. Return true if property_value had a value, and |
| // deserialization of it succeeded. |
| bool CriticalImagesFinder::PopulateCriticalImagesFromPropertyValue( |
| const PropertyValue* property_value, |
| CriticalImages* critical_images) { |
| DCHECK(property_value != NULL); |
| DCHECK(critical_images != NULL); |
| if (!property_value->has_value()) { |
| return false; |
| } |
| // Check if we have the placeholder string value, indicating an empty value. |
| // This will be stored when we have an empty set of critical images, since the |
| // property cache doesn't store empty values. |
| if (property_value->value() == kEmptyValuePlaceholder) { |
| critical_images->Clear(); |
| return true; |
| } |
| // Having dealt with the unusual cases, parse the proto. |
| ArrayInputStream input(property_value->value().data(), |
| property_value->value().size()); |
| return critical_images->ParseFromZeroCopyStream(&input); |
| } |
| |
| bool CriticalImagesFinder::UpdateCriticalImagesCacheEntry( |
| const StringSet* html_critical_images_set, |
| const StringSet* css_critical_images_set, |
| const RenderedImages* rendered_images_set, |
| int support_interval, |
| const PropertyCache::Cohort* cohort, |
| AbstractPropertyPage* page) { |
| // Update property cache if above the fold critical images are successfully |
| // determined. |
| if (page == NULL) { |
| return false; |
| } |
| if (cohort == NULL) { |
| LOG(WARNING) << "Critical Images Cohort is NULL."; |
| return false; |
| } |
| PropertyValue* property_value = page->GetProperty( |
| cohort, kCriticalImagesPropertyName); |
| // Read in the current critical images, and preserve the current HTML or |
| // CSS critical images if they are not being updated. |
| CriticalImages critical_images; |
| PopulateCriticalImagesFromPropertyValue(property_value, &critical_images); |
| return UpdateAndWriteBackCriticalImagesCacheEntry( |
| html_critical_images_set, css_critical_images_set, rendered_images_set, |
| support_interval, cohort, page, &critical_images); |
| } |
| |
| bool CriticalImagesFinder::UpdateAndWriteBackCriticalImagesCacheEntry( |
| const StringSet* html_critical_images_set, |
| const StringSet* css_critical_images_set, |
| const RenderedImages* rendered_images_set, |
| int support_interval, |
| const PropertyCache::Cohort* cohort, |
| AbstractPropertyPage* page, |
| CriticalImages* critical_images) { |
| // Update RenderedImages proto in property Cache. |
| if (rendered_images_set != NULL) { |
| UpdateInPropertyCache( |
| *rendered_images_set, cohort, kRenderedImageDimensionsProperty, |
| false /* don't write cohort */, page); |
| } |
| if (!UpdateCriticalImages( |
| html_critical_images_set, css_critical_images_set, |
| support_interval, critical_images)) { |
| return false; |
| } |
| |
| GoogleString buf; |
| if (!critical_images->SerializeToString(&buf)) { |
| LOG(WARNING) << "Serialization of critical images protobuf failed."; |
| return false; |
| } |
| // The property cache won't store an empty value, which is what an |
| // empty CriticalImages will serialize to. If buf is an empty string, |
| // repalce with a placeholder that we can then handle when decoding |
| // the property_cache value in |
| // PopulateCriticalImagesFromPropertyValue. |
| if (buf.empty()) { |
| buf = kEmptyValuePlaceholder; |
| } |
| page->UpdateValue(cohort, kCriticalImagesPropertyName, buf); |
| return true; |
| } |
| |
| bool CriticalImagesFinder::UpdateCriticalImages( |
| const StringSet* html_critical_images, |
| const StringSet* css_critical_images, |
| int support_interval, |
| CriticalImages* critical_images) { |
| DCHECK(critical_images != NULL); |
| if (html_critical_images != NULL) { |
| UpdateCriticalKeys( |
| false /* require_prior_support */, |
| *html_critical_images, |
| support_interval, |
| critical_images->mutable_html_critical_image_support()); |
| } |
| if (css_critical_images != NULL) { |
| UpdateCriticalKeys( |
| false /* require_prior_support */, |
| *css_critical_images, |
| support_interval, |
| critical_images->mutable_css_critical_image_support()); |
| } |
| // We updated if either StringSet* was set. |
| return (html_critical_images != NULL || css_critical_images != NULL); |
| } |
| |
| RenderedImages* CriticalImagesFinder::ExtractRenderedImageDimensionsFromCache( |
| RewriteDriver* driver) { |
| PropertyCacheDecodeResult pcache_status; |
| scoped_ptr<RenderedImages> dimensions( |
| DecodeFromPropertyCache<RenderedImages>( |
| driver, |
| cohort(), |
| kRenderedImageDimensionsProperty, |
| driver->options()->finder_properties_cache_expiration_time_ms(), |
| &pcache_status)); |
| if (pcache_status == kPropertyCacheDecodeParseError) { |
| driver->message_handler()->Message( |
| kWarning, "Unable to parse Critical RenderedImage PropertyValue for %s", |
| driver->url()); |
| } |
| return dimensions.release(); |
| } |
| |
| RenderedImages* CriticalImagesFinder::JsonMapToRenderedImagesMap( |
| const GoogleString& str, |
| const RewriteOptions* options) { |
| Json::Reader reader; |
| Json::Value json_rendered_image_map; |
| if (!reader.parse(str, json_rendered_image_map)) { |
| LOG(WARNING) << "Unable to parse Json data for rendered images"; |
| return NULL; |
| } |
| // Parse json data into a map. |
| if (json_rendered_image_map.isNull() || !json_rendered_image_map.isObject()) { |
| LOG(WARNING) << "Bad Json rendered image dimensions map"; |
| return NULL; |
| } |
| // Put the extracted map into RenderedImages proto data. |
| RenderedImages* rendered_images = new RenderedImages(); |
| Json::Value::Members imgs = json_rendered_image_map.getMemberNames(); |
| for (int i = 0, n = imgs.size(); i < n; ++i) { |
| const GoogleString& img_src = imgs[i]; |
| int original_width = json_rendered_image_map[img_src].get( |
| kOriginalImageJsonWidthKey, 0).asInt(); |
| int original_height = json_rendered_image_map[img_src].get( |
| kOriginalImageJsonHeightKey, 0).asInt(); |
| int rendered_width = json_rendered_image_map[img_src].get( |
| kRenderedImageJsonWidthKey, 0).asInt(); |
| int rendered_height = json_rendered_image_map[img_src].get( |
| kRenderedImageJsonHeightKey, 0).asInt(); |
| int original_area = (original_width * original_height); |
| int rendered_area = (rendered_width * rendered_height); |
| // Store renderedWidth and renderedHeight for the image only if |
| // the rendered sizes are lower than the original sizes by at least the |
| // percentage threshold set. |
| if (100 * rendered_area < original_area * |
| options->image_limit_rendered_area_percent()) { |
| RenderedImages_Image* images = rendered_images->add_image(); |
| images->set_src(img_src); |
| images->set_rendered_width(rendered_width); |
| images->set_rendered_height(rendered_height); |
| } |
| } |
| return rendered_images; |
| } |
| |
| CriticalImagesInfo* CriticalImagesFinder::ExtractCriticalImagesFromCache( |
| RewriteDriver* driver, |
| const PropertyValue* property_value) { |
| CriticalImagesInfo* critical_images_info = NULL; |
| // Don't track stats if we are flushing early, since we will already be |
| // counting this when we are rewriting the full page. |
| bool track_stats = !driver->flushing_early(); |
| const PropertyCache* page_property_cache = |
| driver->server_context()->page_property_cache(); |
| int64 cache_ttl_ms = |
| driver->options()->finder_properties_cache_expiration_time_ms(); |
| // Check if the cache value exists and is not expired. |
| if (property_value->has_value()) { |
| const bool is_valid = |
| !page_property_cache->IsExpired(property_value, cache_ttl_ms); |
| if (is_valid) { |
| critical_images_info = |
| CriticalImagesInfoFromPropertyValue(PercentSeenForCritical(), |
| property_value); |
| if (track_stats) { |
| if (critical_images_info == NULL) { |
| critical_images_not_found_count_->Add(1); |
| } else { |
| critical_images_valid_count_->Add(1); |
| } |
| } |
| } else if (track_stats) { |
| critical_images_expired_count_->Add(1); |
| } |
| } else if (track_stats) { |
| critical_images_not_found_count_->Add(1); |
| } |
| return critical_images_info; |
| } |
| |
| CriticalImagesFinder::Availability CriticalImagesFinder::Available( |
| RewriteDriver* driver) { |
| UpdateCriticalImagesSetInDriver(driver); |
| CriticalImagesInfo* info = driver->critical_images_info(); |
| if (info != NULL && info->is_critical_image_info_present && |
| info->proto.has_html_critical_image_support() && |
| IsBeaconDataAvailable(info->proto.html_critical_image_support())) { |
| return kAvailable; |
| } else { |
| return kNoDataYet; |
| } |
| } |
| |
| bool CriticalImagesFinder::IsCriticalImageInfoPresent(RewriteDriver* driver) { |
| UpdateCriticalImagesSetInDriver(driver); |
| return driver->critical_images_info()->is_critical_image_info_present; |
| } |
| |
| void CriticalImagesFinder::AddHtmlCriticalImage( |
| const GoogleString& url, |
| RewriteDriver* driver) { |
| mutable_html_critical_images(driver)->insert(GetKeyForUrl(url)); |
| } |
| |
| } // namespace net_instaweb |