blob: 15d1b276d6a748a949d3a4b3e264e22772ba53e3 [file] [log] [blame]
/*
* Copyright 2011 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
//
#include "net/instaweb/rewriter/public/input_info_utils.h"
#include "base/logging.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/kernel/base/file_system.h"
#include "pagespeed/kernel/base/hasher.h"
#include "pagespeed/kernel/base/proto_util.h"
#include "pagespeed/kernel/base/shared_string.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/cache/cache_interface.h"
namespace net_instaweb {
namespace input_info_utils {
namespace {
bool MatchesFileSystemMetadataCacheEntry(
const InputInfo& input_info,
const InputInfo& fsmdc_info,
int64 mtime_ms) {
return (fsmdc_info.has_last_modified_time_ms() &&
fsmdc_info.has_input_content_hash() &&
fsmdc_info.last_modified_time_ms() == mtime_ms &&
fsmdc_info.input_content_hash() == input_info.input_content_hash());
}
// Checks if the stat() data about the input_info's file matches that in the
// filesystem metadata cache; it needs to be for the input to be "valid".
bool IsFilesystemMetadataCacheCurrent(CacheInterface* fsmdc,
const GoogleString& file_key,
const InputInfo& input_info,
int64 mtime_ms) {
// Get the filesystem metadata cache (FSMDC) entry for the filename.
// If we found an entry,
// Extract the FSMDC timestamp and contents hash.
// If the FSMDC timestamp == the file's current timestamp,
// (the FSMDC contents hash is valid/current/correct)
// If the FSMDC content hash == the metadata cache's content hash,
// The metadata cache's entry is valid so its input_info is valid.
// Else
// Return false as the metadata cache's entry is not valid as
// someone has changed it on us.
// Else
// Return false as our FSMDC entry is out of date so we can't
// tell if the metadata cache's input_info is valid.
// Else
// Return false as we can't tell if the metadata cache's input_info is
// valid.
CacheInterface::SynchronousCallback callback;
fsmdc->Get(file_key, &callback);
DCHECK(callback.called());
if (callback.state() == CacheInterface::kAvailable) {
StringPiece val_str = callback.value().Value();
ArrayInputStream input(val_str.data(), val_str.size());
InputInfo fsmdc_info;
if (fsmdc_info.ParseFromZeroCopyStream(&input)) {
// We have a filesystem metadata cache entry: if its timestamp equals
// the file's, and its contents hash equals the metadata caches's, then
// the input is valid.
return MatchesFileSystemMetadataCacheEntry(
input_info, fsmdc_info, mtime_ms);
}
}
return false;
}
// Update the filesystem metadata cache with the timestamp and contents hash
// of the given input's file (which is read from disk to compute the hash).
// Returns false if the file cannot be read.
bool UpdateFilesystemMetadataCache(ServerContext* server_context,
const GoogleString& file_key,
const InputInfo& input_info,
int64 mtime_ms,
CacheInterface* fsmdc,
InputInfo* fsmdc_info) {
GoogleString contents;
if (!server_context->file_system()->ReadFile(
input_info.filename().c_str(), &contents,
server_context->message_handler())) {
return false;
}
GoogleString contents_hash =
server_context->contents_hasher()->Hash(contents);
fsmdc_info->set_type(InputInfo::FILE_BASED);
DCHECK_LT(0, mtime_ms);
fsmdc_info->set_last_modified_time_ms(mtime_ms);
fsmdc_info->set_input_content_hash(contents_hash);
GoogleString buf;
{
// MUST be in a block so that sstream is destructed to finalize buf.
StringOutputStream sstream(&buf);
fsmdc_info->SerializeToZeroCopyStream(&sstream);
}
fsmdc->PutSwappingString(file_key, &buf);
return true;
}
} // namespace
// Checks whether the given input is still unchanged.
bool IsInputValid(
ServerContext* server_context, const RewriteOptions* options,
bool nested_rewrite, const InputInfo& input_info,
int64 now_ms, bool* purged, bool* stale_rewrite) {
switch (input_info.type()) {
case InputInfo::CACHED: {
// It is invalid if cacheable inputs have expired or ...
DCHECK(input_info.has_expiration_time_ms());
if (input_info.has_url()) {
// We do not search wildcards when validating metadata because
// that would require N wildcard matches (not even a
// FastWildcardGroup) per input dependency.
if (!options->IsUrlCacheValid(input_info.url(), input_info.date_ms(),
false /* search_wildcards */)) {
*purged = true;
return false;
}
}
if (!input_info.has_expiration_time_ms()) {
return false;
}
int64 ttl_ms = input_info.expiration_time_ms() - now_ms;
if (ttl_ms > 0) {
return true;
} else if (
!nested_rewrite &&
ttl_ms + options->metadata_cache_staleness_threshold_ms() > 0) {
*stale_rewrite = true;
return true;
}
return false;
}
case InputInfo::FILE_BASED: {
// ... if file-based inputs have changed.
DCHECK(input_info.has_last_modified_time_ms() &&
input_info.has_filename());
if (!input_info.has_last_modified_time_ms() ||
!input_info.has_filename()) {
return false;
}
int64 mtime_sec;
server_context->file_system()->Mtime(input_info.filename(), &mtime_sec,
server_context->message_handler());
int64 mtime_ms = mtime_sec * Timer::kSecondMs;
CacheInterface* fsmdc = server_context->filesystem_metadata_cache();
if (fsmdc != nullptr) {
CHECK(fsmdc->IsBlocking());
if (!input_info.has_input_content_hash()) {
return false;
}
// Construct a host-specific key. The format is somewhat arbitrary,
// all it needs to do is differentiate the same path on different
// hosts. If the size of the key becomes a concern we can hash it
// and hope.
GoogleString file_key;
StrAppend(&file_key, "file://", server_context->hostname(),
input_info.filename());
if (IsFilesystemMetadataCacheCurrent(fsmdc, file_key, input_info,
mtime_ms)) {
return true;
}
InputInfo fsmdc_info;
if (!UpdateFilesystemMetadataCache(server_context, file_key,
input_info, mtime_ms, fsmdc,
&fsmdc_info)) {
return false;
}
// Check again now that we KNOW we have the most up-to-date data
// in the filesystem metadata cache.
return MatchesFileSystemMetadataCacheEntry(
input_info, fsmdc_info, mtime_ms);
} else {
DCHECK_LT(0, input_info.last_modified_time_ms());
return (mtime_ms == input_info.last_modified_time_ms());
}
}
case InputInfo::ALWAYS_VALID:
return true;
}
LOG(DFATAL) << "Corrupt InputInfo object !?";
return false;
}
} // namespace input_info_utils
} // namespace net_instaweb