blob: 1a28c4932efd8b025ff26a037f09b65e7548a3e0 [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jud@google.com (Jud Porter)
#include "net/instaweb/rewriter/public/critical_finder_support_util.h"
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include "base/logging.h"
#include "net/instaweb/public/global_constants.h"
#include "net/instaweb/rewriter/critical_keys.pb.h"
#include "net/instaweb/rewriter/public/property_cache_util.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/util/public/property_cache.h"
#include "pagespeed/kernel/base/base64_util.h"
#include "pagespeed/kernel/base/message_handler.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "pagespeed/kernel/util/nonce_generator.h"
namespace net_instaweb {
namespace {
typedef std::map<GoogleString, int> SupportMap;
// *dest += addend, but capping at kint32max
inline void SaturatingAddTo(int32 addend, int32* dest) {
int64 result = *dest;
result += addend;
*dest = (result > kint32max) ? kint32max : static_cast<int32>(result);
}
bool LessBySupportMapValue(const SupportMap::value_type& pair1,
const SupportMap::value_type& pair2) {
return (pair1.second < pair2.second);
}
SupportMap ConvertCriticalKeysProtoToSupportMap(
const CriticalKeys& critical_keys) {
SupportMap support_map;
// Start by reading in the support data.
for (int i = 0; i < critical_keys.key_evidence_size(); ++i) {
const CriticalKeys::KeyEvidence& evidence = critical_keys.key_evidence(i);
if (!evidence.key().empty()) {
// We aggregate here just in case of a corrupt duplicate entry.
SaturatingAddTo(evidence.support(), &support_map[evidence.key()]);
}
}
return support_map;
}
void WriteSupportMapToCriticalKeysProto(const SupportMap& support_map,
CriticalKeys* critical_keys) {
// Clean out the existing evidence and inject the fresh evidence.
critical_keys->clear_key_evidence();
for (SupportMap::const_iterator entry = support_map.begin();
entry != support_map.end(); ++entry) {
CriticalKeys::KeyEvidence* evidence = critical_keys->add_key_evidence();
evidence->set_key(entry->first);
evidence->set_support(entry->second);
}
}
// Decay a single support value by multiplying it by k / (k+1), rounding down.
// Fractional arithmetic done in int64 to avoid int overflow.
inline int Decay(int support_interval, int64 support_value) {
support_value *= support_interval;
support_value /= support_interval + 1;
return support_value;
}
// Decay support, deleting elements whose support drops to 0.
void DecaySupportMap(int support_interval, SupportMap* support_map) {
SupportMap::iterator next = support_map->begin(), end = support_map->end();
while (next != end) {
// We increment at the top of the loop so that we can erase curr
// without invalidating next. See:
// http://stackoverflow.com/questions/2874441/deleting-elements-from-stl-set-while-iterating
SupportMap::iterator curr = next++;
// Multiply support_value by the fraction
// support_interval / (support_interval + 1)
// using int64 to avoid overflow.
int support_value = Decay(support_interval, curr->second);
if (support_value == 0 && curr->second > 0) {
// Remove entry when its support falls to 0 (this will expire entries that
// should not be candidates; if curr should still be a candidate, we will
// re-insert it as part of beaconing).
support_map->erase(curr);
} else {
curr->second = static_cast<int32>(support_value);
}
}
}
void ClearInvalidNonces(const int64 now_ms, CriticalKeys* critical_keys) {
// Invalidate expired entries, and if no valid entries remain then delete all
// outstanding entries.
bool found_valid_nonce = false;
for (int i = 0; i < critical_keys->pending_nonce_size(); ++i) {
CriticalKeys::PendingNonce* entry = critical_keys->mutable_pending_nonce(i);
if (!entry->has_nonce()) {
// Entry unoccupied. Fall through.
} else if ((entry->timestamp_ms() + kBeaconTimeoutIntervalMs) < now_ms) {
entry->clear_timestamp_ms();
entry->clear_nonce();
critical_keys->set_nonces_recently_expired(
critical_keys->nonces_recently_expired() + 1);
} else {
found_valid_nonce = true;
}
}
if (!found_valid_nonce) {
critical_keys->clear_pending_nonce();
}
}
// Generate a nonce and record the existence of a beacon with that nonce sent at
// timestamp_ms, updating *nonce with the new value.
void AddNonceToCriticalSelectors(const int64 timestamp_ms,
NonceGenerator* nonce_generator,
CriticalKeys* critical_keys,
GoogleString* nonce) {
CHECK(nonce_generator != NULL);
uint64 nonce_value = nonce_generator->NewNonce();
StringPiece nonce_piece(reinterpret_cast<char*>(&nonce_value),
sizeof(nonce_value));
Web64Encode(nonce_piece, nonce);
if (nonce->size() > 11) {
// Only keep the first 66 bits of nonce since original value is 64 bits.
nonce->resize(11);
}
ClearInvalidNonces(timestamp_ms, critical_keys);
// Look for an invalid entry to reuse.
CriticalKeys::PendingNonce* pending_nonce = NULL;
for (int i = 0; i < critical_keys->pending_nonce_size(); ++i) {
CriticalKeys::PendingNonce* entry = critical_keys->mutable_pending_nonce(i);
if (!entry->has_nonce()) {
pending_nonce = entry;
break;
}
}
if (pending_nonce == NULL) {
// No entry to reuse; create new entry
pending_nonce = critical_keys->add_pending_nonce();
}
pending_nonce->set_timestamp_ms(timestamp_ms);
pending_nonce->set_nonce(*nonce);
}
} // namespace
// Check whether the given nonce is valid, invalidating any expired nonce
// entries we might encounter. To avoid the need to copy and clear the nonce
// list, we invalidate the entry used and any expired entries by clearing the
// nonce value and timestamp. These entries will be reused by
// AddNonceToCriticalSelectors.
bool ValidateAndExpireNonce(int64 now_ms, StringPiece nonce,
CriticalKeys* critical_keys) {
if (nonce.empty()) {
// Someone sent us a clearly bogus beacon result.
return false;
}
ClearInvalidNonces(now_ms, critical_keys);
for (int i = 0; i < critical_keys->pending_nonce_size(); ++i) {
CriticalKeys::PendingNonce* entry = critical_keys->mutable_pending_nonce(i);
if (nonce == entry->nonce()) {
// Matched. Entry is valid. Remove it.
entry->clear_timestamp_ms();
entry->clear_nonce();
return true;
}
}
return false;
}
void GetCriticalKeysFromProto(int64 support_percentage,
const CriticalKeys& critical_keys,
StringSet* keys) {
int64 support_threshold =
(support_percentage == 0) ?
1 : (support_percentage * critical_keys.maximum_possible_support());
// Collect supported beacon results
for (int i = 0; i < critical_keys.key_evidence_size(); ++i) {
const CriticalKeys::KeyEvidence& evidence = critical_keys.key_evidence(i);
// Do percentage conversion on support value using int64 to avoid overflow.
int64 support = evidence.support();
if (support * 100 >= support_threshold && !evidence.key().empty()) {
keys->insert(evidence.key());
}
}
}
// Merge the given set into the existing critical key proto by adding
// support for new_set to existing support.
void UpdateCriticalKeys(bool require_prior_support,
const StringSet& new_set, int support_value,
CriticalKeys* critical_keys) {
DCHECK(critical_keys != NULL);
SupportMap support_map = ConvertCriticalKeysProtoToSupportMap(*critical_keys);
DecaySupportMap(support_value, &support_map);
// Update maximum_possible_support. Initial value must account for legacy
// data.
int maximum_support;
if (critical_keys->has_maximum_possible_support()) {
maximum_support =
Decay(support_value, critical_keys->maximum_possible_support());
} else if (support_map.empty()) {
maximum_support = 0;
} else {
maximum_support = std::max_element(support_map.begin(), support_map.end(),
LessBySupportMapValue)->second;
}
SaturatingAddTo(support_value, &maximum_support);
critical_keys->set_maximum_possible_support(maximum_support);
// Actually add the new_set to the support_map.
if (require_prior_support) {
for (StringSet::const_iterator s = new_set.begin();
s != new_set.end(); ++s) {
// Only add entries that are already in the support_map
// (critical_css_beacon_filter initializes candidate entries to have
// support 0). This avoids a cache-fill DoS with spurious beacon data.
SupportMap::iterator entry = support_map.find(*s);
if (entry != support_map.end()) {
SaturatingAddTo(support_value, &entry->second);
}
}
} else {
// Unconditionally add entries to support_map.
for (StringSet::const_iterator s = new_set.begin();
s != new_set.end(); ++s) {
SaturatingAddTo(support_value, &support_map[*s]);
}
}
critical_keys->set_valid_beacons_received(
critical_keys->valid_beacons_received() + 1);
critical_keys->set_nonces_recently_expired(0);
WriteSupportMapToCriticalKeysProto(support_map, critical_keys);
}
void WriteCriticalKeysToPropertyCache(
const StringSet& new_keys, StringPiece nonce, int support_interval,
CriticalKeysWriteFlags flags,
StringPiece property_name, const PropertyCache* cache,
const PropertyCache::Cohort* cohort, AbstractPropertyPage* page,
MessageHandler* message_handler, Timer* timer) {
// We can't do anything here if page is NULL, so bail out early.
if (page == NULL) {
return;
}
scoped_ptr<CriticalKeys> critical_keys;
// TODO(jud): Consider refactoring this into the subclasses as part of the
// WriteCriticalSelectors refactoring that's ongoing. Note that this may
// break slamm's tests at the bottom of critical_selector_finder_test.cc
// depending on how subclassing is done, so some care will be required.
if (flags & kReplacePriorResult) {
critical_keys.reset(new CriticalKeys);
} else {
// We first need to read the current critical keys in the property cache,
// then update it with the new set if it exists, or create it if it doesn't.
PropertyCacheDecodeResult decode_result;
critical_keys.reset(DecodeFromPropertyCache<CriticalKeys>(
cache, page, cohort, property_name, -1, &decode_result));
switch (decode_result) {
case kPropertyCacheDecodeOk:
// We successfully decoded the property cache value, so use the returned
// CriticalKeys.
break;
case kPropertyCacheDecodeNotFound:
// We either got here because the property cache is not set up correctly
// (the cohort doesn't exist), or we just don't have a value already.
// For the former, bail out since there is no use trying to update the
// property cache if it is not setup. For the later, create a new
// CriticalKeys, since we just haven't written a value before.
if (cohort == NULL) {
return;
}
FALLTHROUGH_INTENDED;
case kPropertyCacheDecodeExpired:
case kPropertyCacheDecodeParseError:
// We can proceed here, but we need to create a new CriticalKeys.
critical_keys.reset(new CriticalKeys);
break;
}
if (!(flags & kSkipNonceCheck) &&
!ValidateAndExpireNonce(timer->NowMs(), nonce, critical_keys.get())) {
return;
}
}
UpdateCriticalKeys(flags & kRequirePriorSupport, new_keys, support_interval,
critical_keys.get());
PropertyCacheUpdateResult result = UpdateInPropertyCache(
*critical_keys, cohort, property_name, false /* write_cohort */, page);
switch (result) {
case kPropertyCacheUpdateNotFound:
message_handler->MessageS(kWarning,
"Unable to get Critical keys set for update.");
break;
case kPropertyCacheUpdateEncodeError:
message_handler->MessageS(kWarning, "Trouble marshaling CriticalKeys!?");
break;
case kPropertyCacheUpdateOk:
// Nothing more to do.
break;
}
}
bool ShouldBeacon(int64 next_beacon_timestamp_ms, const RewriteDriver& driver) {
// When downstream cache integration is enabled, and there is a rebeaconing
// key already specified in the config, we should only rebeacon when there
// is a matching key in the beacon requesting header.
if (driver.options()->IsDownstreamCacheIntegrationEnabled() &&
driver.options()->IsDownstreamCacheRebeaconingKeyConfigured()) {
return driver.options()->MatchesDownstreamCacheRebeaconingKey(
driver.request_headers()->Lookup1(kPsaShouldBeacon));
}
int64 now_ms = driver.timer()->NowMs();
return now_ms >= next_beacon_timestamp_ms;
}
void PrepareForBeaconInsertionHelper(CriticalKeys* proto,
NonceGenerator* nonce_generator,
RewriteDriver* driver,
bool using_candidate_key_detection,
BeaconMetadata* result) {
result->status = kDoNotBeacon;
CHECK(proto);
if (!ShouldBeacon(proto->next_beacon_timestamp_ms(), *driver)) {
return;
}
if (driver->options()->IsDownstreamCacheIntegrationEnabled() &&
!driver->options()->IsDownstreamCacheRebeaconingKeyConfigured()) {
// Note that we do not put out this message on a per request basis, because
// it will clutter up the logs. Instead we do it only once every beaconing
// interval.
driver->message_handler()->Message(
kWarning,
"You seem to have downstream caching configured on your server. "
"DownstreamCacheRebeaconingKey should also be set for this to work "
"correctly. Refer to "
"https://developers.google.com/speed/pagespeed/module/"
"downstream-caching#beaconing "
"for more details.");
}
// We need to rebeacon so update the timestamp for the next time to
// rebeacon. If we are using candidate key detection, then check how many
// valid beacons we have received since the last time the candidate keys
// changed to determine if we are doing high frequency vs low frequency
// beaconing.
// TODO(jmaessen): Add noise to inter-beacon interval. How? Currently first
// visit to page after next_beacon_timestamp_ms will beacon.
int64 beacon_reinstrument_time_ms =
driver->options()->beacon_reinstrument_time_sec() * Timer::kSecondMs;
if ((proto->nonces_recently_expired() > kNonceExpirationLimit) ||
(using_candidate_key_detection &&
(proto->valid_beacons_received() >= kHighFreqBeaconCount))) {
beacon_reinstrument_time_ms *= kLowFreqBeaconMult;
}
int64 now_ms = driver->timer()->NowMs();
proto->set_next_beacon_timestamp_ms(now_ms + beacon_reinstrument_time_ms);
AddNonceToCriticalSelectors(now_ms, nonce_generator, proto, &result->nonce);
result->status = kBeaconWithNonce;
}
bool UpdateCandidateKeys(const StringSet& keys, CriticalKeys* proto,
bool clear_rebeacon_timestamp) {
// Check to see if candidate keys are already known to pcache. Insert
// previously-unknown candidates with a support of 0, to indicate that beacon
// results for those keys will be considered valid. Other keys returned in a
// beacon result will simply be ignored, avoiding DoSing the pcache. New
// candidate keys cause us to re-beacon.
SupportMap support_map = ConvertCriticalKeysProtoToSupportMap(*proto);
bool support_map_changed = false;
for (StringSet::const_iterator i = keys.begin(), end = keys.end(); i != end;
++i) {
if (support_map.insert(pair<GoogleString, int>(*i, 0)).second) {
support_map_changed = true;
}
}
if (support_map_changed) {
// The candidate keys changed, so we need to go into high frequency
// beaconing mode. Reset the number of beacons received to signal this.
proto->set_valid_beacons_received(0);
// Clear the rebeaconing timestamp to force rebeaconing if requested.
if (clear_rebeacon_timestamp) {
proto->clear_next_beacon_timestamp_ms();
}
// Update the proto value with the new set of keys. Note that we are not
// changing the calculated set of critical keys, so we don't need to
// update the state in the RewriteDriver.
WriteSupportMapToCriticalKeysProto(support_map, proto);
return true;
}
return false;
}
} // namespace net_instaweb