| /* |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: lsong@google.com (Libo Song) |
| |
| #include "pagespeed/kernel/cache/file_cache.h" |
| |
| #include <algorithm> |
| #include <vector> |
| #include <stdint.h> |
| |
| #include "base/logging.h" |
| #include "pagespeed/kernel/base/abstract_mutex.h" |
| #include "pagespeed/kernel/base/basictypes.h" |
| #include "pagespeed/kernel/base/file_system.h" |
| #include "pagespeed/kernel/base/function.h" |
| #include "pagespeed/kernel/base/hasher.h" |
| #include "pagespeed/kernel/base/message_handler.h" |
| #include "pagespeed/kernel/base/null_message_handler.h" |
| #include "pagespeed/kernel/base/scoped_ptr.h" |
| #include "pagespeed/kernel/base/shared_string.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/base/thread_system.h" |
| #include "pagespeed/kernel/base/timer.h" |
| #include "pagespeed/kernel/cache/cache_interface.h" |
| #include "pagespeed/kernel/thread/slow_worker.h" |
| #include "pagespeed/kernel/util/url_to_filename_encoder.h" |
| |
| namespace net_instaweb { |
| |
| namespace { // For structs used only in Clean(). |
| |
| struct CompareByAtime { |
| public: |
| // Sort by ascending atime. |
| bool operator()(const FileSystem::FileInfo& one, |
| const FileSystem::FileInfo& two) const { |
| return one.atime_sec < two.atime_sec; |
| } |
| }; |
| |
| } // namespace |
| |
| class FileCache::CacheCleanFunction : public Function { |
| public: |
| CacheCleanFunction(FileCache* cache, int64 next_clean_time_ms) |
| : cache_(cache), |
| next_clean_time_ms_(next_clean_time_ms) {} |
| virtual ~CacheCleanFunction() {} |
| virtual void Run() { cache_->CleanWithLocking(next_clean_time_ms_); } |
| |
| private: |
| FileCache* cache_; |
| int64 next_clean_time_ms_; |
| DISALLOW_COPY_AND_ASSIGN(CacheCleanFunction); |
| }; |
| |
| const char FileCache::kBytesFreedInCleanup[] = |
| "file_cache_bytes_freed_in_cleanup"; |
| const char FileCache::kCleanups[] = "file_cache_cleanups"; |
| const char FileCache::kDiskChecks[] = "file_cache_disk_checks"; |
| const char FileCache::kEvictions[] = "file_cache_evictions"; |
| const char FileCache::kWriteErrors[] = "file_cache_write_errors"; |
| |
| // Filenames for the next scheduled clean time and the lockfile. In |
| // order to prevent these from colliding with actual cachefiles, they |
| // contain characters that our filename encoder would escape. |
| const char FileCache::kCleanTimeName[] = "!clean!time!"; |
| const char FileCache::kCleanLockName[] = "!clean!lock!"; |
| |
| // TODO(abliss): remove policy from constructor; provide defaults here |
| // and setters below. |
| FileCache::FileCache(const GoogleString& path, FileSystem* file_system, |
| ThreadSystem* thread_system, SlowWorker* worker, |
| CachePolicy* policy, Statistics* stats, |
| MessageHandler* handler) |
| : path_(path), |
| file_system_(file_system), |
| worker_(worker), |
| message_handler_(handler), |
| cache_policy_(policy), |
| mutex_(thread_system->NewMutex()), |
| next_clean_ms_(INT64_MAX), |
| path_length_limit_(file_system_->MaxPathLength(path)), |
| clean_time_path_(path), |
| clean_lock_path_(path), |
| disk_checks_(stats->GetVariable(kDiskChecks)), |
| cleanups_(stats->GetVariable(kCleanups)), |
| evictions_(stats->GetVariable(kEvictions)), |
| bytes_freed_in_cleanup_(stats->GetVariable(kBytesFreedInCleanup)), |
| write_errors_(stats->GetVariable(kWriteErrors)) { |
| if (policy->cleaning_enabled()) { |
| next_clean_ms_ = policy->timer->NowMs() + policy->clean_interval_ms / 2; |
| } |
| EnsureEndsInSlash(&clean_time_path_); |
| StrAppend(&clean_time_path_, kCleanTimeName); |
| EnsureEndsInSlash(&clean_lock_path_); |
| StrAppend(&clean_lock_path_, kCleanLockName); |
| } |
| |
| FileCache::~FileCache() { |
| } |
| |
| void FileCache::InitStats(Statistics* statistics) { |
| statistics->AddVariable(kBytesFreedInCleanup); |
| statistics->AddVariable(kCleanups); |
| statistics->AddVariable(kDiskChecks); |
| statistics->AddVariable(kEvictions); |
| statistics->AddVariable(kWriteErrors); |
| } |
| |
| void FileCache::Get(const GoogleString& key, Callback* callback) { |
| GoogleString filename; |
| bool ret = EncodeFilename(key, &filename); |
| if (ret) { |
| // Suppress read errors. Note that we want to show Write errors, |
| // as they likely indicate a permissions or disk-space problem |
| // which is best not eaten. It's cheap enough to construct |
| // a NullMessageHandler on the stack when we want one. |
| NullMessageHandler null_handler; |
| GoogleString buf; |
| ret = file_system_->ReadFile(filename.c_str(), &buf, &null_handler); |
| callback->value()->SwapWithString(&buf); |
| } |
| ValidateAndReportResult(key, ret ? kAvailable : kNotFound, callback); |
| } |
| |
| void FileCache::Put(const GoogleString& key, SharedString* value) { |
| GoogleString filename; |
| if (EncodeFilename(key, &filename) && |
| !file_system_->WriteFileAtomic(filename, value->Value(), |
| message_handler_)) { |
| write_errors_->Add(1); |
| } |
| CleanIfNeeded(); |
| } |
| |
| void FileCache::Delete(const GoogleString& key) { |
| GoogleString filename; |
| if (!EncodeFilename(key, &filename)) { |
| return; |
| } |
| NullMessageHandler null_handler; // Do not emit messages on delete failures. |
| file_system_->RemoveFile(filename.c_str(), &null_handler); |
| } |
| |
| bool FileCache::EncodeFilename(const GoogleString& key, |
| GoogleString* filename) { |
| GoogleString prefix = path_; |
| // TODO(abliss): unify and make explicit everyone's assumptions |
| // about trailing slashes. |
| EnsureEndsInSlash(&prefix); |
| UrlToFilenameEncoder::EncodeSegment(prefix, key, '/', filename); |
| |
| // Make sure the length isn't too big for filesystem to handle; if it is |
| // just name the object using a hash. |
| if (static_cast<int>(filename->length()) > path_length_limit_) { |
| UrlToFilenameEncoder::EncodeSegment( |
| prefix, cache_policy_->hasher->Hash(key), '/', filename); |
| } |
| |
| return true; |
| } |
| |
| namespace { |
| // The minimum age an empty directory needs to be before cache cleaning will |
| // delete it. This is to prevent cache cleaning from removing file lock |
| // directories that StdioFileSystem uses and is set to be double |
| // ServerContext::kBreakLockMs / kSecondMs. |
| const int64 kEmptyDirCleanAgeSec = 60; |
| } // namespace |
| |
| bool FileCache::Clean(int64 target_size_bytes, int64 target_inode_count) { |
| DCHECK(cache_policy_->cleaning_enabled()); |
| // TODO(jud): this function can delete .lock and .outputlock files, is this |
| // problematic? |
| message_handler_->Message(kInfo, |
| "Checking cache size against target %s and inode " |
| "count against target %s", |
| Integer64ToString(target_size_bytes).c_str(), |
| Integer64ToString(target_inode_count).c_str()); |
| disk_checks_->Add(1); |
| |
| bool everything_ok = true; |
| |
| // Get the contents of the cache |
| FileSystem::DirInfo dir_info; |
| file_system_->GetDirInfo(path_, &dir_info, message_handler_); |
| |
| // Check to see if cache size or inode count exceeds our limits. |
| // target_inode_count of 0 indicates no inode limit. |
| int64 cache_size = dir_info.size_bytes; |
| int64 cache_inode_count = dir_info.inode_count; |
| if (cache_size < target_size_bytes && |
| (target_inode_count == 0 || |
| cache_inode_count < target_inode_count)) { |
| message_handler_->Message(kInfo, |
| "File cache size is %s and contains %s inodes; " |
| "no cleanup needed.", |
| Integer64ToString(cache_size).c_str(), |
| Integer64ToString(cache_inode_count).c_str()); |
| return true; |
| } |
| |
| message_handler_->Message(kInfo, |
| "File cache size is %s and contains %s inodes; " |
| "beginning cleanup.", |
| Integer64ToString(cache_size).c_str(), |
| Integer64ToString(cache_inode_count).c_str()); |
| cleanups_->Add(1); |
| |
| // Remove empty directories. |
| StringVector::iterator it; |
| for (it = dir_info.empty_dirs.begin(); it != dir_info.empty_dirs.end(); |
| ++it) { |
| // StdioFileSystem uses an empty directory as a file lock. Avoid deleting |
| // these file locks by not removing the file cache clean lock file, and |
| // making sure empty directories are at least n seconds old before removing |
| // them, where n is double ServerContext::kBreakLockMs. |
| int64 timestamp_sec; |
| file_system_->Mtime(*it, ×tamp_sec, message_handler_); |
| const int64 now_sec = cache_policy_->timer->NowMs() / Timer::kSecondMs; |
| int64 age_sec = now_sec - timestamp_sec; |
| if (age_sec > kEmptyDirCleanAgeSec && |
| clean_lock_path_.compare(it->c_str()) != 0) { |
| everything_ok &= file_system_->RemoveDir(it->c_str(), message_handler_); |
| } |
| // Decrement cache_inode_count even if RemoveDir failed. This is likely |
| // because the directory has already been removed. |
| --cache_inode_count; |
| } |
| |
| // Save original cache size to track how many bytes we've cleaned up. |
| int64 orig_cache_size = cache_size; |
| |
| // Sort files by atime in ascending order to remove oldest files first. |
| std::sort(dir_info.files.begin(), dir_info.files.end(), CompareByAtime()); |
| |
| // Set the target size to clean to. |
| target_size_bytes = (target_size_bytes * 3) / 4; |
| target_inode_count = (target_inode_count * 3) / 4; |
| |
| // Delete files until we are under our targets. |
| std::vector<FileSystem::FileInfo>::iterator file_itr = dir_info.files.begin(); |
| while (file_itr != dir_info.files.end() && |
| (cache_size > target_size_bytes || |
| (target_inode_count != 0 && |
| cache_inode_count > target_inode_count))) { |
| FileSystem::FileInfo file = *file_itr; |
| ++file_itr; |
| // Don't clean the clean_time or clean_lock files! They ought to be the |
| // newest files (and very small) so they would normally not be deleted |
| // anyway. But on some systems (e.g. mounted noatime?) they were getting |
| // deleted. |
| if (clean_time_path_.compare(file.name) == 0 || |
| clean_lock_path_.compare(file.name) == 0) { |
| continue; |
| } |
| cache_size -= file.size_bytes; |
| // Decrement inode_count even if RemoveFile fails. This is likely because |
| // the file has already been removed. |
| --cache_inode_count; |
| everything_ok &= file_system_->RemoveFile(file.name.c_str(), |
| message_handler_); |
| evictions_->Add(1); |
| } |
| |
| int64 bytes_freed = orig_cache_size - cache_size; |
| message_handler_->Message(kInfo, |
| "File cache cleanup complete; freed %s bytes", |
| Integer64ToString(bytes_freed).c_str()); |
| bytes_freed_in_cleanup_->Add(bytes_freed); |
| return everything_ok; |
| } |
| |
| void FileCache::CleanWithLocking(int64 next_clean_time_ms) { |
| if (file_system_->TryLockWithTimeout( |
| clean_lock_path_, Timer::kHourMs, cache_policy_->timer, |
| message_handler_).is_true()) { |
| // Update the timestamp file. |
| { |
| ScopedMutex lock(mutex_.get()); |
| next_clean_ms_ = next_clean_time_ms; |
| } |
| if (!file_system_->WriteFileAtomic(clean_time_path_, |
| Integer64ToString(next_clean_time_ms), |
| message_handler_)) { |
| write_errors_->Add(1); |
| } |
| |
| // Now actually clean. |
| Clean(cache_policy_->target_size_bytes, cache_policy_->target_inode_count); |
| file_system_->Unlock(clean_lock_path_, message_handler_); |
| } |
| } |
| |
| bool FileCache::ShouldClean(int64* suggested_next_clean_time_ms) { |
| if (!cache_policy_->cleaning_enabled()) { |
| return false; |
| } |
| |
| bool to_return = false; |
| const int64 now_ms = cache_policy_->timer->NowMs(); |
| { |
| ScopedMutex lock(mutex_.get()); |
| if (now_ms < next_clean_ms_) { |
| *suggested_next_clean_time_ms = next_clean_ms_; // No change yet. |
| return false; |
| } |
| } |
| |
| GoogleString clean_time_str; |
| int64 clean_time_ms = 0; |
| int64 new_clean_time_ms = now_ms + cache_policy_->clean_interval_ms; |
| NullMessageHandler null_handler; |
| if (file_system_->ReadFile(clean_time_path_.c_str(), &clean_time_str, |
| &null_handler)) { |
| StringToInt64(clean_time_str, &clean_time_ms); |
| } else { |
| message_handler_->Message( |
| kWarning, "Failed to read cache clean timestamp %s. " |
| " Doing an extra cache clean to be safe.", clean_time_path_.c_str()); |
| } |
| |
| // If the "clean time" written in the file is older than now, we clean. |
| if (clean_time_ms < now_ms) { |
| message_handler_->Message( |
| kInfo, "Need to check cache size against target %s", |
| Integer64ToString(cache_policy_->target_size_bytes).c_str()); |
| to_return = true; |
| } |
| // If the "clean time" is later than now plus one interval, something |
| // went wrong (like the system clock moving backwards or the file |
| // getting corrupt) so we clean and reset it. |
| if (clean_time_ms > new_clean_time_ms) { |
| message_handler_->Message(kError, |
| "Next scheduled file cache clean time %s" |
| " is implausibly remote. Cleaning now.", |
| Integer64ToString(clean_time_ms).c_str()); |
| to_return = true; |
| } |
| |
| *suggested_next_clean_time_ms = new_clean_time_ms; |
| if (!to_return) { |
| ScopedMutex lock(mutex_.get()); |
| next_clean_ms_ = new_clean_time_ms; |
| } |
| return to_return; |
| } |
| |
| void FileCache::CleanIfNeeded() { |
| DCHECK(worker_ != NULL); |
| if (worker_ != NULL) { |
| int64 suggested_next_clean_time_ms; |
| if (ShouldClean(&suggested_next_clean_time_ms)) { |
| worker_->Start(); |
| worker_->RunIfNotBusy( |
| new CacheCleanFunction(this, suggested_next_clean_time_ms)); |
| } |
| } |
| } |
| |
| } // namespace net_instaweb |