| // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
| // This source code is licensed under both the GPLv2 (found in the |
| // COPYING file in the root directory) and Apache 2.0 License |
| // (found in the LICENSE.Apache file in the root directory). |
| // |
| #pragma once |
| |
| #include <string> |
| #include <list> |
| #include <vector> |
| #include <set> |
| #include <deque> |
| |
| #include "db/dbformat.h" |
| #include "db/memtable.h" |
| #include "db/range_del_aggregator.h" |
| #include "monitoring/instrumented_mutex.h" |
| #include "rocksdb/db.h" |
| #include "rocksdb/iterator.h" |
| #include "rocksdb/options.h" |
| #include "rocksdb/types.h" |
| #include "util/autovector.h" |
| #include "util/filename.h" |
| #include "util/log_buffer.h" |
| |
| namespace rocksdb { |
| |
| class ColumnFamilyData; |
| class InternalKeyComparator; |
| class InstrumentedMutex; |
| class MergeIteratorBuilder; |
| |
| // keeps a list of immutable memtables in a vector. the list is immutable |
| // if refcount is bigger than one. It is used as a state for Get() and |
| // Iterator code paths |
| // |
| // This class is not thread-safe. External synchronization is required |
| // (such as holding the db mutex or being on the write thread). |
| class MemTableListVersion { |
| public: |
| explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, |
| MemTableListVersion* old = nullptr); |
| explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, |
| int max_write_buffer_number_to_maintain); |
| |
| void Ref(); |
| void Unref(autovector<MemTable*>* to_delete = nullptr); |
| |
| // Search all the memtables starting from the most recent one. |
| // Return the most recent value found, if any. |
| // |
| // If any operation was found for this key, its most recent sequence number |
| // will be stored in *seq on success (regardless of whether true/false is |
| // returned). Otherwise, *seq will be set to kMaxSequenceNumber. |
| bool Get(const LookupKey& key, std::string* value, Status* s, |
| MergeContext* merge_context, RangeDelAggregator* range_del_agg, |
| SequenceNumber* seq, const ReadOptions& read_opts, |
| bool* is_blob_index = nullptr); |
| |
| bool Get(const LookupKey& key, std::string* value, Status* s, |
| MergeContext* merge_context, RangeDelAggregator* range_del_agg, |
| const ReadOptions& read_opts, bool* is_blob_index = nullptr) { |
| SequenceNumber seq; |
| return Get(key, value, s, merge_context, range_del_agg, &seq, read_opts, |
| is_blob_index); |
| } |
| |
| // Similar to Get(), but searches the Memtable history of memtables that |
| // have already been flushed. Should only be used from in-memory only |
| // queries (such as Transaction validation) as the history may contain |
| // writes that are also present in the SST files. |
| bool GetFromHistory(const LookupKey& key, std::string* value, Status* s, |
| MergeContext* merge_context, |
| RangeDelAggregator* range_del_agg, SequenceNumber* seq, |
| const ReadOptions& read_opts, |
| bool* is_blob_index = nullptr); |
| bool GetFromHistory(const LookupKey& key, std::string* value, Status* s, |
| MergeContext* merge_context, |
| RangeDelAggregator* range_del_agg, |
| const ReadOptions& read_opts, |
| bool* is_blob_index = nullptr) { |
| SequenceNumber seq; |
| return GetFromHistory(key, value, s, merge_context, range_del_agg, &seq, |
| read_opts, is_blob_index); |
| } |
| |
| Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena, |
| RangeDelAggregator* range_del_agg); |
| Status AddRangeTombstoneIterators( |
| const ReadOptions& read_opts, |
| std::vector<InternalIterator*>* range_del_iters); |
| |
| void AddIterators(const ReadOptions& options, |
| std::vector<InternalIterator*>* iterator_list, |
| Arena* arena); |
| |
| void AddIterators(const ReadOptions& options, |
| MergeIteratorBuilder* merge_iter_builder); |
| |
| uint64_t GetTotalNumEntries() const; |
| |
| uint64_t GetTotalNumDeletes() const; |
| |
| MemTable::MemTableStats ApproximateStats(const Slice& start_ikey, |
| const Slice& end_ikey); |
| |
| // Returns the value of MemTable::GetEarliestSequenceNumber() on the most |
| // recent MemTable in this list or kMaxSequenceNumber if the list is empty. |
| // If include_history=true, will also search Memtables in MemTableList |
| // History. |
| SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const; |
| |
| private: |
| // REQUIRE: m is an immutable memtable |
| void Add(MemTable* m, autovector<MemTable*>* to_delete); |
| // REQUIRE: m is an immutable memtable |
| void Remove(MemTable* m, autovector<MemTable*>* to_delete); |
| |
| void TrimHistory(autovector<MemTable*>* to_delete); |
| |
| bool GetFromList(std::list<MemTable*>* list, const LookupKey& key, |
| std::string* value, Status* s, MergeContext* merge_context, |
| RangeDelAggregator* range_del_agg, SequenceNumber* seq, |
| const ReadOptions& read_opts, bool* is_blob_index = nullptr); |
| |
| void AddMemTable(MemTable* m); |
| |
| void UnrefMemTable(autovector<MemTable*>* to_delete, MemTable* m); |
| |
| friend class MemTableList; |
| |
| // Immutable MemTables that have not yet been flushed. |
| std::list<MemTable*> memlist_; |
| |
| // MemTables that have already been flushed |
| // (used during Transaction validation) |
| std::list<MemTable*> memlist_history_; |
| |
| // Maximum number of MemTables to keep in memory (including both flushed |
| // and not-yet-flushed tables). |
| const int max_write_buffer_number_to_maintain_; |
| |
| int refs_ = 0; |
| |
| size_t* parent_memtable_list_memory_usage_; |
| }; |
| |
| // This class stores references to all the immutable memtables. |
| // The memtables are flushed to L0 as soon as possible and in |
| // any order. If there are more than one immutable memtable, their |
| // flushes can occur concurrently. However, they are 'committed' |
| // to the manifest in FIFO order to maintain correctness and |
| // recoverability from a crash. |
| // |
| // |
| // Other than imm_flush_needed, this class is not thread-safe and requires |
| // external synchronization (such as holding the db mutex or being on the |
| // write thread.) |
| class MemTableList { |
| public: |
| // A list of memtables. |
| explicit MemTableList(int min_write_buffer_number_to_merge, |
| int max_write_buffer_number_to_maintain) |
| : imm_flush_needed(false), |
| min_write_buffer_number_to_merge_(min_write_buffer_number_to_merge), |
| current_(new MemTableListVersion(¤t_memory_usage_, |
| max_write_buffer_number_to_maintain)), |
| num_flush_not_started_(0), |
| commit_in_progress_(false), |
| flush_requested_(false) { |
| current_->Ref(); |
| current_memory_usage_ = 0; |
| } |
| |
| // Should not delete MemTableList without making sure MemTableList::current() |
| // is Unref()'d. |
| ~MemTableList() {} |
| |
| MemTableListVersion* current() { return current_; } |
| |
| // so that background threads can detect non-nullptr pointer to |
| // determine whether there is anything more to start flushing. |
| std::atomic<bool> imm_flush_needed; |
| |
| // Returns the total number of memtables in the list that haven't yet |
| // been flushed and logged. |
| int NumNotFlushed() const; |
| |
| // Returns total number of memtables in the list that have been |
| // completely flushed and logged. |
| int NumFlushed() const; |
| |
| // Returns true if there is at least one memtable on which flush has |
| // not yet started. |
| bool IsFlushPending() const; |
| |
| // Returns the earliest memtables that needs to be flushed. The returned |
| // memtables are guaranteed to be in the ascending order of created time. |
| void PickMemtablesToFlush(autovector<MemTable*>* mems); |
| |
| // Reset status of the given memtable list back to pending state so that |
| // they can get picked up again on the next round of flush. |
| void RollbackMemtableFlush(const autovector<MemTable*>& mems, |
| uint64_t file_number); |
| |
| // Commit a successful flush in the manifest file |
| Status InstallMemtableFlushResults( |
| ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, |
| const autovector<MemTable*>& m, VersionSet* vset, InstrumentedMutex* mu, |
| uint64_t file_number, autovector<MemTable*>* to_delete, |
| Directory* db_directory, LogBuffer* log_buffer); |
| |
| // New memtables are inserted at the front of the list. |
| // Takes ownership of the referenced held on *m by the caller of Add(). |
| void Add(MemTable* m, autovector<MemTable*>* to_delete); |
| |
| // Returns an estimate of the number of bytes of data in use. |
| size_t ApproximateMemoryUsage(); |
| |
| // Returns an estimate of the number of bytes of data used by |
| // the unflushed mem-tables. |
| size_t ApproximateUnflushedMemTablesMemoryUsage(); |
| |
| // Returns an estimate of the timestamp of the earliest key. |
| uint64_t ApproximateOldestKeyTime() const; |
| |
| // Request a flush of all existing memtables to storage. This will |
| // cause future calls to IsFlushPending() to return true if this list is |
| // non-empty (regardless of the min_write_buffer_number_to_merge |
| // parameter). This flush request will persist until the next time |
| // PickMemtablesToFlush() is called. |
| void FlushRequested() { flush_requested_ = true; } |
| |
| bool HasFlushRequested() { return flush_requested_; } |
| |
| // Copying allowed |
| // MemTableList(const MemTableList&); |
| // void operator=(const MemTableList&); |
| |
| size_t* current_memory_usage() { return ¤t_memory_usage_; } |
| |
| uint64_t GetMinLogContainingPrepSection(); |
| |
| private: |
| // DB mutex held |
| void InstallNewVersion(); |
| |
| const int min_write_buffer_number_to_merge_; |
| |
| MemTableListVersion* current_; |
| |
| // the number of elements that still need flushing |
| int num_flush_not_started_; |
| |
| // committing in progress |
| bool commit_in_progress_; |
| |
| // Requested a flush of all memtables to storage |
| bool flush_requested_; |
| |
| // The current memory usage. |
| size_t current_memory_usage_; |
| }; |
| |
| } // namespace rocksdb |