src/pagespeed/kernel/sharedmem/shared_mem_cache.cc - incubator-pagespeed-debian - Git at Google

 /*
  * Copyright 2013 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Author: morlovich@google.com (Maksim Orlovich)

 // This module provides a shared memory cache implementation.
 //
 // ----------------------------------------------------------------------------
 // Cache data structures and memory layout.
 // ----------------------------------------------------------------------------
 //
 // We first re-key everything via cryptographic (or other 'very very good')
 // hashes by using a Hasher object, so the key bits are very random.
 //
 // The cache is partitioned into sectors. The sectors are completely independent
 // --- no operation ever touches more than one, and keys are statically
 // partitioned between them.
 //
 // When we access an entry, we first select a sector number based off its key,
 // and then within the sector we choose kAssociativity (4) possible
 // directory entries storing it, and the appropriate directory entry then
 // points to some number of blocks containing the object's payload.
 //
 // In each sector we store:
 //
 // 1) Freelist --- block number of a free block, or -1 (kInvalidBlock) if there
 //    are none. Further free blocks are linked via the block successor list.
 //
 // 2) LRU front/rear links into the cache directory.
 //
 // 3) Various statistics (see struct SectorStats in shared_mem_cache_data.h
 //    for the list)
 //
 // Padding to align to 8.
 //
 // 4) Sector mutex that is used to protect the metadata (but is released while
 //    copying the payload).
 //
 // Padding to align to 8.
 //
 // 5) Block successor list. This is used to both link the blocks in each file
 //    and to link together the freelist. This is encoded as an array, indexed
 //    by block number, with values being successors, or -1 (kInvalidBlock)
 //    for end of list.
 //
 // Padding to align to 8.
 //
 // 6) The cache directory. This is an array of CacheEntry structures.
 //    (But note that the size of the hash portion is dependent on the Hasher;
 //     and the struct is padded to be 8-aligned).
 //
 // Padding to align to block size.
 //
 // 7) The data blocks. These contain the actual payload.
 //
 // ----------------------------------------------------------------------------
 // Cache directory usage
 // ----------------------------------------------------------------------------
 //
 // Presently we operate in a 4-way skew associative fashion:
 // each key determines 4 (very rarely identical) positions in the directory
 // that may be used to store it. We check both for lookup/overwrite, and use
 // timestamps to determine replacement candidates. (Experiments have shown that
 // 2-way produced way too many extra conflicts).
 //
 // ----------------------------------------------------------------------------
 // Cache entry format
 // ----------------------------------------------------------------------------
 //
 // The hash_bytes field contains the object key. As noted above, its
 // length may vary with the hasher in use.
 //
 // last_use_timestamp_ms denotes when the entry was last touched, for
 // associativity replacement.
 //
 // byte_size is the size of the actual payload in bytes (not counting
 // internal fragmentation or our bookkeeping overhead).
 //
 // lru_next/lru_prev are used to form an inline doubly-linked LRU chain
 // of non-free entries in case we need to free up some blocks on insertion
 // because the freelist doesn't have enough.
 //
 // first_block is the block number of the first block (kInvalidBlock if the
 // entry is 0-byte or not used for data). Later blocks can be found by
 // following the block successor list.
 //
 // TODO(morlovich): What if we try to outline a 500MiB file, which would make
 // ::Put() fail, but the filter would proceeds anyway as it has no way of
 // knowing?
 //
 // creating and open_count are used to lock the particular entry for
 // reading or writing while the main sector lock is released.
 //
 // The following are the possible combinations:
 // Creating?  Open_count
 // False      0           Entry unlocked --- can read, write, etc. freely
 // False      n > 0       n processes reading. More readers can freely join.
 // True       n > 0       n processes reading. Writer waiting for them to
 //                          finish. Readers can't join.
 // True       0           Writer working.
 //
 // For now, writers wait in sleep loop, while readers simply fail/miss.
 //
 // TODO(morlovich): Evaluate using chaining and one more layer of indirection
 // instead, as it should hopefully produce much better utilization and avoid
 // conflict misses entirely.

 #include "pagespeed/kernel/sharedmem/shared_mem_cache.h"

 #include <cstddef>                     // for size_t
 #include <cstring>
 #include <map>
 #include <vector>
 #include <utility>                      // for pair

 #include "base/logging.h"
 #include "pagespeed/kernel/base/abstract_mutex.h"
 #include "pagespeed/kernel/base/abstract_shared_mem.h"
 #include "pagespeed/kernel/base/base64_util.h"
 #include "pagespeed/kernel/base/basictypes.h"
 #include "pagespeed/kernel/base/cache_interface.h"
 #include "pagespeed/kernel/base/hasher.h"
 #include "pagespeed/kernel/base/message_handler.h"
 #include "pagespeed/kernel/base/proto_util.h"
 #include "pagespeed/kernel/base/scoped_ptr.h"
 #include "pagespeed/kernel/base/shared_string.h"
 #include "pagespeed/kernel/base/stl_util.h"
 #include "pagespeed/kernel/base/string.h"
 #include "pagespeed/kernel/base/string_util.h"
 #include "pagespeed/kernel/base/timer.h"
 #include "pagespeed/kernel/sharedmem/shared_mem_cache_data.h"
 #include "pagespeed/kernel/sharedmem/shared_mem_cache_snapshot.pb.h"

 namespace net_instaweb {

 using SharedMemCacheData::BlockNum;
 using SharedMemCacheData::BlockVector;
 using SharedMemCacheData::CacheEntry;
 using SharedMemCacheData::EntryNum;
 using SharedMemCacheData::Sector;
 using SharedMemCacheData::SectorStats;
 using SharedMemCacheData::kInvalidBlock;
 using SharedMemCacheData::kInvalidEntry;
 using SharedMemCacheData::kHashSize;

 namespace {

 bool IsAllNil(const StringPiece& raw_hash) {
   bool all_nil = true;
   for (size_t c = 0; c < raw_hash.length(); ++c) {
     if (raw_hash[c] != '\0') {
       all_nil = false;
       break;
     }
   }
   return all_nil;
 }

 GoogleString FormatSize(size_t size) {
   return Integer64ToString(static_cast<int64>(size));
 }

 // A couple of debug helpers.
 #ifndef NDEBUG

 GoogleString DebugTextHash(const GoogleString& raw_hash) {
   GoogleString out;
   Web64Encode(raw_hash, &out);
   return out;
 }

 GoogleString DebugTextHash(CacheEntry* entry, size_t size) {
   return DebugTextHash(StringPiece(entry->hash_bytes, size).as_string());
 }

 #endif  // NDEBUG

 }  // namespace

 template<size_t kBlockSize>
 SharedMemCache<kBlockSize>::SharedMemCache(
     AbstractSharedMem* shm_runtime, const GoogleString& filename,
     Timer* timer, const Hasher* hasher, int sectors, int entries_per_sector,
     int blocks_per_sector, MessageHandler* handler)
     : shm_runtime_(shm_runtime),
       hasher_(hasher),
       timer_(timer),
       filename_(filename),
       num_sectors_(sectors),
       entries_per_sector_(entries_per_sector),
       blocks_per_sector_(blocks_per_sector),
       handler_(handler) {
 }

 template<size_t kBlockSize>
 GoogleString SharedMemCache<kBlockSize>::FormatName() {
   return StringPrintf("SharedMemCache<%d>", static_cast<int>(kBlockSize));
 }

 template<size_t kBlockSize>
 SharedMemCache<kBlockSize>::~SharedMemCache() {
   STLDeleteElements(&sectors_);
 }

 template<size_t kBlockSize>
 bool SharedMemCache<kBlockSize>::InitCache(bool parent) {
   size_t sector_size =
       Sector<kBlockSize>::RequiredSize(shm_runtime_, entries_per_sector_,
                                        blocks_per_sector_);
   size_t size = num_sectors_ * sector_size;

   if (parent) {
     segment_.reset(shm_runtime_->CreateSegment(filename_, size, handler_));
   } else {
     segment_.reset(shm_runtime_->AttachToSegment(filename_, size, handler_));
   }

   if (segment_.get() == NULL) {
     handler_->Message(
         kError, "SharedMemCache: can't %s segment %s of size %s",
         parent ? "create" : "attach",
         filename_.c_str(), FormatSize(size).c_str());
     return false;
   }

   STLDeleteElements(&sectors_);
   sectors_.clear();
   for (int s = 0; s < num_sectors_; ++s) {
     scoped_ptr<Sector<kBlockSize> > sec(
         new Sector<kBlockSize>(segment_.get(), s * sector_size,
                                entries_per_sector_, blocks_per_sector_));
     bool ok;
     if (parent) {
       ok = sec->Initialize(handler_);
     } else {
       ok = sec->Attach(handler_);
     }

     if (!ok) {
       handler_->Message(
           kError, "SharedMemCache: can't %s sector %d of cache %s",
           parent ? "create" : "attach", s, filename_.c_str());
       return false;
     }
     sectors_.push_back(sec.release());
   }

   if (parent) {
     handler_->Message(
       kInfo, "SharedMemCache: %s, sectors = %d, entries/sector = %d, "
       " %d-byte blocks/sector = %d, total footprint: %s", filename_.c_str(),
       num_sectors_, entries_per_sector_, static_cast<int>(kBlockSize),
       blocks_per_sector_, FormatSize(size).c_str());
   }
   return true;
 }

 template<size_t kBlockSize>
 bool SharedMemCache<kBlockSize>::Initialize() {
   return InitCache(true);
 }

 template<size_t kBlockSize>
 bool SharedMemCache<kBlockSize>::Attach() {
   return InitCache(false);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::GlobalCleanup(
     AbstractSharedMem* shm_runtime, const GoogleString& filename,
     MessageHandler* message_handler) {
   shm_runtime->DestroySegment(filename, message_handler);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::ComputeDimensions(
     int64 size_kb,
     int block_entry_ratio,
     int sectors,
     int* entries_per_sector_out,
     int* blocks_per_sector_out,
     int64* size_cap_out) {
   int64 size = size_kb * 1024;
   const int kEntrySize = sizeof(CacheEntry);
   // Footprint of an entry is kEntrySize bytes. Block is kBlockSize + 4
   // bytes for successor list. We ignore sector headers for the math since
   // negligible. So:
   //
   // Size = (kBlockSize + 4) * Blocks + kEntrySize * Entries
   //      = (kBlockSize + 4) * (Blocks/Entries) * Entries + kEntrySize * Entries
   // Entries = Size / ((kBlockSize + 4) * (Blocks/Entries) + kEntrySize)
   // Blocks  = Entries * (Blocks/Entries)
   //
   // We also divide things up into some number of sectors to lower contention,
   // which reduces 'size' proportionally.
   size /= sectors;
   *entries_per_sector_out =
       size / ((kBlockSize + 4) * block_entry_ratio + kEntrySize);
   *blocks_per_sector_out = *entries_per_sector_out * block_entry_ratio;

   // The cache sets a size cap of 1/8'th of a sector for object size; let our
   // client know.
   *size_cap_out = *blocks_per_sector_out * kBlockSize / 8;
 }

 template<size_t kBlockSize>
 GoogleString SharedMemCache<kBlockSize>::DumpStats() {
   SectorStats aggregate;
   for (size_t c = 0; c < sectors_.size(); ++c) {
     sectors_[c]->mutex()->Lock();
     aggregate.Add(*sectors_[c]->sector_stats());
     sectors_[c]->mutex()->Unlock();
   }

   return aggregate.Dump(entries_per_sector_* num_sectors_,
                         blocks_per_sector_ * num_sectors_);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::AddSectorToSnapshot(
     int sector_num, SharedMemCacheDump* dest) {
   CHECK_LE(0, sector_num);
   CHECK_LT(sector_num, num_sectors_);

   Sector<kBlockSize>* sector = sectors_[sector_num];
   sector->mutex()->Lock();

   EntryNum cur = sector->OldestEntryNum();
   while (cur != kInvalidEntry) {
     CacheEntry* cur_entry = sector->EntryAt(cur);

     // It's possible that the sector got unlocked while a Put is
     // updating the payload for an entry. In that case, the entry will
     // have its creating bit set (but the metadata will be valid).
     // We skip those.
     if (!cur_entry->creating) {
       SharedMemCacheDumpEntry* dump_entry = dest->add_entry();
       dump_entry->set_raw_key(cur_entry->hash_bytes, kHashSize);
       dump_entry->set_last_use_timestamp_ms(cur_entry->last_use_timestamp_ms);

       // Gather value.
       BlockVector blocks;
       sector->BlockListForEntry(cur_entry, &blocks);

       size_t total_blocks = blocks.size();
       for (size_t b = 0; b < total_blocks; ++b) {
         int bytes = sector->BytesInPortion(cur_entry->byte_size, b,
                                            total_blocks);
         dump_entry->mutable_value()->append(
             sector->BlockBytes(blocks[b]), bytes);
       }
     }
     cur = cur_entry->lru_prev;
   }

   sector->mutex()->Unlock();
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::RestoreSnapshot(
     const SharedMemCacheDump& dump) {
   for (int i = 0; i < dump.entry_size(); ++i) {
     const SharedMemCacheDumpEntry& entry = dump.entry(i);

     // The code below assumes that the raw hash is the right size, so make sure
     // to detect this particular corruption to avoid crashing.
     if (entry.raw_key().size() != kHashSize) {
       return;
     }

     SharedString value(entry.value());
     PutRawHash(entry.raw_key(), entry.last_use_timestamp_ms(), &value);
   }
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::MarshalSnapshot(
     const SharedMemCacheDump& dump, GoogleString* out) {
   out->clear();
   StringOutputStream sstream(out);  // finalizes *out in destructor
   dump.SerializeToZeroCopyStream(&sstream);
 }


 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::DemarshalSnapshot(
     const GoogleString& marshaled, SharedMemCacheDump* out) {
   ArrayInputStream input(marshaled.data(), marshaled.size());
   out->ParseFromZeroCopyStream(&input);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::Put(const GoogleString& key,
                                      SharedString* value) {
   int64 now_ms = timer_->NowMs();
   GoogleString raw_hash = ToRawHash(key);
   PutRawHash(raw_hash, now_ms, value);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::PutRawHash(
     const GoogleString& raw_hash,
     int64 last_use_timestamp_ms,
     SharedString* value) {
   // See also ::ComputeDimensions
   const size_t kMaxSize = MaxValueSize();

   size_t value_size = static_cast<size_t>(value->size());
   if (value_size > kMaxSize) {
     handler_->Message(
       kInfo, "Unable to insert object of size: %s, cache limit is: %s",
       FormatSize(value->size()).c_str(),
       FormatSize(kMaxSize).c_str());
     return;
   }

   Position pos;
   ExtractPosition(raw_hash, &pos);

   Sector<kBlockSize>* sector = sectors_[pos.sector];
   SectorStats* stats = sector->sector_stats();
   sector->mutex()->Lock();
   ++stats->num_put;

   // See if our key already exists. Note that if it does, we will attempt to
   // write even if there are readers (we will wait for them to finish);
   // but not if there is another writer, in which case we just give up.
   // It is important, however, that we always exit if the key matches,
   // so we don't end up creating a second copy!
   for (int p = 0; p < kAssociativity; ++p) {
     EntryNum cand_key = pos.keys[p];
     CacheEntry* cand = sector->EntryAt(cand_key);
     if (KeyMatch(cand, raw_hash)) {
       if (!cand->creating) {
         ++stats->num_put_update;
         EnsureReadyForWriting(sector, cand);
         PutIntoEntry(sector, cand_key, last_use_timestamp_ms, value);
       } else {
         ++stats->num_put_concurrent_create;
         sector->mutex()->Unlock();
       }
       return;
     }
   }

   // We don't have a current entry with our key, but see if we can overwrite
   // something  unrelated. In this case, we even give up if there are only
   // readers, as it's unclear that they are any less important than us.
   EntryNum best_key = kInvalidEntry;
   CacheEntry* best = NULL;
   for (int p = 0; p < kAssociativity; ++p) {
     EntryNum cand_key = pos.keys[p];
     CacheEntry* cand = sector->EntryAt(cand_key);
     if (Writeable(cand)) {
       if ((best_key == kInvalidEntry) ||
           (cand->last_use_timestamp_ms < best->last_use_timestamp_ms)) {
         best = cand;
         best_key = cand_key;
       }
     }
   }

   if (best_key == kInvalidEntry) {
     // All slots busy. Giving up.
     ++stats->num_put_concurrent_full_set;
     sector->mutex()->Unlock();
     return;
   }

   if (best->byte_size != 0 ||
       !IsAllNil(StringPiece(best->hash_bytes, kHashSize))) {
     ++stats->num_put_replace;
   }

   // Wait for readers before touching the key.
   EnsureReadyForWriting(sector, best);
   std::memcpy(best->hash_bytes, raw_hash.data(), kHashSize);
   PutIntoEntry(sector, best_key, last_use_timestamp_ms, value);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::PutIntoEntry(
     Sector<kBlockSize>* sector, EntryNum entry_num,
     int64 last_use_timestamp_ms, SharedString* value) {
   const char* data = value->data();

   CacheEntry* entry = sector->EntryAt(entry_num);
   DCHECK(entry->creating);
   DCHECK_EQ(0u, entry->open_count);

   // Adjust space allocation....
   size_t want_blocks = sector->DataBlocksForSize(value->size());
   BlockVector blocks;
   sector->BlockListForEntry(entry, &blocks);

   // Grab more room if needed.
   if (blocks.size() < want_blocks) {
     if (!TryAllocateBlocks(sector, want_blocks - blocks.size(), &blocks)) {
       // Allocation failed. We torpedo the entry, free all the blocks
       // (both those it has originally and any the above call picked up),
       // and fail the insertion. This should be pretty much impossible.
       // TODO(morlovich): log warning?
       sector->ReturnBlocksToFreeList(blocks);
       entry->creating = false;
       MarkEntryFree(sector, entry_num);
       sector->mutex()->Unlock();
       return;
     }
   }

   // Free up any room we don't need.
   if (blocks.size() > want_blocks) {
     BlockVector extras;
     while (blocks.size() > want_blocks) {
       extras.push_back(blocks.back());
       blocks.pop_back();
     }
     sector->ReturnBlocksToFreeList(extras);
   }

   entry->byte_size = value->size();
   TouchEntry(sector, last_use_timestamp_ms, entry_num);

   // Write out successor list for the blocks we use, and point the entry to it.
   sector->LinkBlockSuccessors(blocks);

   if (!blocks.empty()) {
     entry->first_block = blocks[0];
   } else {
     entry->first_block = kInvalidBlock;
   }

   sector->mutex()->Unlock();

   // Now we can write out the data. We can do it w/o the lock held
   // since we've already removed them from the freelist, and the LRU/directory
   // entry is locked, so can't be concurrently freed.

   for (size_t b = 0; b < want_blocks; ++b) {
     size_t bytes = sector->BytesInPortion(entry->byte_size, b, want_blocks);
     std::memcpy(sector->BlockBytes(blocks[b]), data + b * kBlockSize, bytes);
   }

   // We're done, clear creating bit.
   sector->mutex()->Lock();
   entry->creating = false;
   sector->mutex()->Unlock();
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::Get(const GoogleString& key,
                                      Callback* callback) {
   GoogleString raw_hash = ToRawHash(key);
   Position pos;
   ExtractPosition(raw_hash, &pos);
   Sector<kBlockSize>* sector = sectors_[pos.sector];
   sector->mutex()->Lock();
   SectorStats* stats = sector->sector_stats();
   ++stats->num_get;

   for (int p = 0; p < kAssociativity; ++p) {
     EntryNum cand_key = pos.keys[p];
     CacheEntry* cand = sector->EntryAt(cand_key);
     if (KeyMatch(cand, raw_hash)) {
       ++stats->num_get_hit;
       GetFromEntry(key, sector, cand_key, callback);
       return;
     }
   }

   sector->mutex()->Unlock();
   ValidateAndReportResult(key, kNotFound, callback);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::GetFromEntry(
     const GoogleString& key,
     Sector<kBlockSize>* sector,
     EntryNum entry_num,
     Callback* callback) {
   CacheEntry* entry = sector->EntryAt(entry_num);
   if (entry->creating) {
     // For now, consider concurrent creation a miss.
     sector->mutex()->Unlock();
     ValidateAndReportResult(key, kNotFound, callback);
     return;
   }
   ++entry->open_count;

   TouchEntry(sector, timer_->NowMs(), entry_num);

   BlockVector blocks;
   sector->BlockListForEntry(entry, &blocks);

   // Drop the lock as the entry is now opened for reading.
   sector->mutex()->Unlock();

   // Collect the contents.
   SharedString* out = callback->value();
   out->DetachAndClear();
   out->Extend(entry->byte_size);

   size_t total_blocks = blocks.size();
   int pos = 0;
   for (size_t b = 0; b < total_blocks; ++b) {
     int bytes = sector->BytesInPortion(entry->byte_size, b, total_blocks);
     out->WriteAt(pos, sector->BlockBytes(blocks[b]), bytes);
     pos += bytes;
   }

   // Now reduce the reference count.
   // TODO(morlovich): atomic ops?
   sector->mutex()->Lock();
   --entry->open_count;
   sector->mutex()->Unlock();
   ValidateAndReportResult(key, kAvailable, callback);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::Delete(const GoogleString& key) {
   GoogleString raw_hash = ToRawHash(key);
   Position pos;
   ExtractPosition(raw_hash, &pos);

   Sector<kBlockSize>* sector = sectors_[pos.sector];
   sector->mutex()->Lock();

   for (int p = 0; p < kAssociativity; ++p) {
     EntryNum cand_key = pos.keys[p];
     if (KeyMatch(sector->EntryAt(cand_key), raw_hash)) {
       DeleteEntry(sector, cand_key);
       return;
     }
   }

   sector->mutex()->Unlock();
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::DeleteEntry(Sector<kBlockSize>* sector,
                                              EntryNum entry_num) {
   CacheEntry* entry = sector->EntryAt(entry_num);
   if (entry->creating) {
     // A multiple writers (Put or Delete) race. Let the other one proceed,
     // drop this one. (Call to EnsureReadyForWriting below will deal with any
     // outstanding readers).
     sector->mutex()->Unlock();
     return;
   }
   EnsureReadyForWriting(sector, entry);
   BlockVector blocks;
   sector->BlockListForEntry(entry, &blocks);
   sector->ReturnBlocksToFreeList(blocks);
   entry->creating = false;
   MarkEntryFree(sector, entry_num);
   sector->mutex()->Unlock();
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::SanityCheck() {
   for (int i = 0; i < num_sectors_; ++i) {
     Sector<kBlockSize>* sector = sectors_[i];
     sector->mutex()->Lock();

     // Make sure that all blocks are accounted for exactly once.

     // First collect all blocks referred to from entries
     std::map<BlockNum, int> block_occur;
     for (EntryNum e = 0; e < entries_per_sector_; ++e) {
       CacheEntry* entry = sector->EntryAt(e);
       BlockVector blocks;
       sector->BlockListForEntry(entry, &blocks);
       for (size_t i = 0; i < blocks.size(); ++i) {
         ++block_occur[blocks[i]];
       }
     }

     // Now from freelist. We re-use the API for convenience.
     BlockVector freelist_blocks;
     sector->AllocBlocksFromFreeList(blocks_per_sector_, &freelist_blocks);
     for (size_t i = 0; i < freelist_blocks.size(); ++i) {
       ++block_occur[freelist_blocks[i]];
     }
     sector->ReturnBlocksToFreeList(freelist_blocks);

     CHECK(block_occur.size() == static_cast<size_t>(blocks_per_sector_));
     for (std::map<BlockNum, int>::iterator i = block_occur.begin();
          i != block_occur.end(); ++i) {
       CHECK_EQ(1, i->second);
     }

     sector->mutex()->Unlock();
   }
 }

 template<size_t kBlockSize>
 bool SharedMemCache<kBlockSize>::TryAllocateBlocks(
     Sector<kBlockSize>* sector, int goal, BlockVector* blocks) {
   // See how much we have in freelist.
   int got = sector->AllocBlocksFromFreeList(goal, blocks);

   // If not enough, start walking back in LRU and take blocks from those files.
   EntryNum entry_num = sector->OldestEntryNum();
   while ((entry_num != kInvalidEntry) && (got < goal)) {
     CacheEntry* entry = sector->EntryAt(entry_num);
     if (Writeable(entry)) {
       got += sector->BlockListForEntry(entry, blocks);
       MarkEntryFree(sector, entry_num);
       entry_num = sector->OldestEntryNum();
     } else {
       entry_num = entry->lru_prev;
     }
   }

   return (got >= goal);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::MarkEntryFree(Sector<kBlockSize>* sector,
                                                EntryNum entry_num) {
   sector->UnlinkEntryFromLRU(entry_num);
   CacheEntry* entry = sector->EntryAt(entry_num);
   CHECK(Writeable(entry));
   std::memset(entry->hash_bytes, 0, kHashSize);
   entry->last_use_timestamp_ms = 0;
   entry->byte_size = 0;
   entry->first_block = kInvalidBlock;
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::TouchEntry(Sector<kBlockSize>* sector,
                                             int64 last_use_timestamp_ms,
                                             EntryNum entry_num) {
   CacheEntry* entry = sector->EntryAt(entry_num);
   sector->UnlinkEntryFromLRU(entry_num);
   sector->InsertEntryIntoLRU(entry_num);
   entry->last_use_timestamp_ms = last_use_timestamp_ms;
 }

 template<size_t kBlockSize>
 bool SharedMemCache<kBlockSize>::Writeable(const CacheEntry* entry) {
   return (entry->open_count == 0) && !entry->creating;
 }

 template<size_t kBlockSize>
 bool SharedMemCache<kBlockSize>::KeyMatch(CacheEntry* entry,
                                           const GoogleString& raw_hash) {
   DCHECK_EQ(kHashSize, raw_hash.size());
   return 0 == std::memcmp(entry->hash_bytes, raw_hash.data(), kHashSize);
 }

 template<size_t kBlockSize>
 GoogleString SharedMemCache<kBlockSize>::ToRawHash(const GoogleString& key) {
   GoogleString raw_hash = hasher_->RawHash(key);
   DCHECK(raw_hash.size() >= kHashSize);
   if (raw_hash.size() > kHashSize) {
     raw_hash.resize(kHashSize);
   }

   // Avoid all 0x00, that's special
   if (IsAllNil(raw_hash)) {
     raw_hash[0] = ' ';
   }
   return raw_hash;
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::ExtractPosition(
     const GoogleString& raw_hash,
     SharedMemCache<kBlockSize>::Position* out_pos) {
   // We need at least 13 bytes of hash in code below, as we split it as follows:
   // keys[0] from hash[0..3]
   // keys[1] from hash[4..7]
   // keys[2] from hash[8..11]
   // sector number (hash[12])
   DCHECK_GE(raw_hash.length(), 13u);

   // Should also be consistent with out config
   DCHECK_EQ(raw_hash.length(), kHashSize);

   // This implementation only supports associativity 4, so it will need to be
   // readjusted if we decide to use an another setting.
   COMPILE_ASSERT(kAssociativity == 4, need_different_code_for_other_assoc);

   // Get the sector # from the [12]th byte, being careful not to sign-extend;
   // we have to watch out for negatives for %
   int raw_sector = static_cast<int>(static_cast<unsigned char>(raw_hash[12]));
   out_pos->sector = (raw_sector % sectors_.size());

   const uint32* keys = reinterpret_cast<const uint32*>(raw_hash.data());
   out_pos->keys[0] = static_cast<EntryNum>(keys[0] % entries_per_sector_);
   out_pos->keys[1] = static_cast<EntryNum>(keys[1] % entries_per_sector_);
   out_pos->keys[2] = static_cast<EntryNum>(keys[2] % entries_per_sector_);

   // For entry 3, we potentially already used lower bits of key[3] word for
   // sector, so instead use higher-bits from keys[0] as lower ones.
   uint32 key3 = (keys[0] >> 16) | (keys[1] << 16);
   out_pos->keys[3] = static_cast<EntryNum>(key3 % entries_per_sector_);
 }

 template<size_t kBlockSize>
 void SharedMemCache<kBlockSize>::EnsureReadyForWriting(
     Sector<kBlockSize>* sector, CacheEntry* entry) {
   // It is possible that as we are starting to write, some other processes
   // are still in the middle of copying in read data for this entry, so we have
   // to make sure they finish up first.
   //
   // First, make sure no other readers or writers can join. With ->creating set
   // to true they will both avoid this entry. (And there are no other writers
   // as if there were, we would have given up ourselves).
   //
   entry->creating = true;

   // Now just wait for previous readers to leave.
   while (entry->open_count > 0) {
     ++sector->sector_stats()->num_put_spins;
     sector->mutex()->Unlock();
     timer_->SleepUs(50);
     sector->mutex()->Lock();
   }
 }

 template class SharedMemCache<64>;  // metadata ("rname") cache
 template class SharedMemCache<512>;  // testing
 template class SharedMemCache<4096>;  // HTTP cache

 }  // namespace net_instaweb