| // Copyright 2013 Facebook |
| /** |
| * RedisListIterator: |
| * An abstraction over the "list" concept (e.g.: for redis lists). |
| * Provides functionality to read, traverse, edit, and write these lists. |
| * |
| * Upon construction, the RedisListIterator is given a block of list data. |
| * Internally, it stores a pointer to the data and a pointer to current item. |
| * It also stores a "result" list that will be mutated over time. |
| * |
| * Traversal and mutation are done by "forward iteration". |
| * The Push() and Skip() methods will advance the iterator to the next item. |
| * However, Push() will also "write the current item to the result". |
| * Skip() will simply move to next item, causing current item to be dropped. |
| * |
| * Upon completion, the result (accessible by WriteResult()) will be saved. |
| * All "skipped" items will be gone; all "pushed" items will remain. |
| * |
| * @throws Any of the operations may throw a RedisListException if an invalid |
| * operation is performed or if the data is found to be corrupt. |
| * |
| * @notes By default, if WriteResult() is called part-way through iteration, |
| * it will automatically advance the iterator to the end, and Keep() |
| * all items that haven't been traversed yet. This may be subject |
| * to review. |
| * |
| * @notes Can access the "current" item via GetCurrent(), and other |
| * list-specific information such as Length(). |
| * |
| * @notes The internal representation is due to change at any time. Presently, |
| * the list is represented as follows: |
| * - 32-bit integer header: the number of items in the list |
| * - For each item: |
| * - 32-bit int (n): the number of bytes representing this item |
| * - n bytes of data: the actual data. |
| * |
| * @author Deon Nicholas (dnicholas@fb.com) |
| */ |
| |
| #pragma once |
| #ifndef ROCKSDB_LITE |
| |
| #include <string> |
| |
| #include "redis_list_exception.h" |
| #include "rocksdb/slice.h" |
| #include "util/coding.h" |
| |
| namespace rocksdb { |
| |
| /// An abstraction over the "list" concept. |
| /// All operations may throw a RedisListException |
| class RedisListIterator { |
| public: |
| /// Construct a redis-list-iterator based on data. |
| /// If the data is non-empty, it must formatted according to @notes above. |
| /// |
| /// If the data is valid, we can assume the following invariant(s): |
| /// a) length_, num_bytes_ are set correctly. |
| /// b) cur_byte_ always refers to the start of the current element, |
| /// just before the bytes that specify element length. |
| /// c) cur_elem_ is always the index of the current element. |
| /// d) cur_elem_length_ is always the number of bytes in current element, |
| /// excluding the 4-byte header itself. |
| /// e) result_ will always contain data_[0..cur_byte_) and a header |
| /// f) Whenever corrupt data is encountered or an invalid operation is |
| /// attempted, a RedisListException will immediately be thrown. |
| explicit RedisListIterator(const std::string& list_data) |
| : data_(list_data.data()), |
| num_bytes_(static_cast<uint32_t>(list_data.size())), |
| cur_byte_(0), |
| cur_elem_(0), |
| cur_elem_length_(0), |
| length_(0), |
| result_() { |
| // Initialize the result_ (reserve enough space for header) |
| InitializeResult(); |
| |
| // Parse the data only if it is not empty. |
| if (num_bytes_ == 0) { |
| return; |
| } |
| |
| // If non-empty, but less than 4 bytes, data must be corrupt |
| if (num_bytes_ < sizeof(length_)) { |
| ThrowError("Corrupt header."); // Will break control flow |
| } |
| |
| // Good. The first bytes specify the number of elements |
| length_ = DecodeFixed32(data_); |
| cur_byte_ = sizeof(length_); |
| |
| // If we have at least one element, point to that element. |
| // Also, read the first integer of the element (specifying the size), |
| // if possible. |
| if (length_ > 0) { |
| if (cur_byte_ + sizeof(cur_elem_length_) <= num_bytes_) { |
| cur_elem_length_ = DecodeFixed32(data_+cur_byte_); |
| } else { |
| ThrowError("Corrupt data for first element."); |
| } |
| } |
| |
| // At this point, we are fully set-up. |
| // The invariants described in the header should now be true. |
| } |
| |
| /// Reserve some space for the result_. |
| /// Equivalent to result_.reserve(bytes). |
| void Reserve(int bytes) { |
| result_.reserve(bytes); |
| } |
| |
| /// Go to next element in data file. |
| /// Also writes the current element to result_. |
| RedisListIterator& Push() { |
| WriteCurrentElement(); |
| MoveNext(); |
| return *this; |
| } |
| |
| /// Go to next element in data file. |
| /// Drops/skips the current element. It will not be written to result_. |
| RedisListIterator& Skip() { |
| MoveNext(); |
| --length_; // One less item |
| --cur_elem_; // We moved one forward, but index did not change |
| return *this; |
| } |
| |
| /// Insert elem into the result_ (just BEFORE the current element / byte) |
| /// Note: if Done() (i.e.: iterator points to end), this will append elem. |
| void InsertElement(const Slice& elem) { |
| // Ensure we are in a valid state |
| CheckErrors(); |
| |
| const int kOrigSize = static_cast<int>(result_.size()); |
| result_.resize(kOrigSize + SizeOf(elem)); |
| EncodeFixed32(result_.data() + kOrigSize, |
| static_cast<uint32_t>(elem.size())); |
| memcpy(result_.data() + kOrigSize + sizeof(uint32_t), elem.data(), |
| elem.size()); |
| ++length_; |
| ++cur_elem_; |
| } |
| |
| /// Access the current element, and save the result into *curElem |
| void GetCurrent(Slice* curElem) { |
| // Ensure we are in a valid state |
| CheckErrors(); |
| |
| // Ensure that we are not past the last element. |
| if (Done()) { |
| ThrowError("Invalid dereferencing."); |
| } |
| |
| // Dereference the element |
| *curElem = Slice(data_+cur_byte_+sizeof(cur_elem_length_), |
| cur_elem_length_); |
| } |
| |
| // Number of elements |
| int Length() const { |
| return length_; |
| } |
| |
| // Number of bytes in the final representation (i.e: WriteResult().size()) |
| int Size() const { |
| // result_ holds the currently written data |
| // data_[cur_byte..num_bytes-1] is the remainder of the data |
| return static_cast<int>(result_.size() + (num_bytes_ - cur_byte_)); |
| } |
| |
| // Reached the end? |
| bool Done() const { |
| return cur_byte_ >= num_bytes_ || cur_elem_ >= length_; |
| } |
| |
| /// Returns a string representing the final, edited, data. |
| /// Assumes that all bytes of data_ in the range [0,cur_byte_) have been read |
| /// and that result_ contains this data. |
| /// The rest of the data must still be written. |
| /// So, this method ADVANCES THE ITERATOR TO THE END before writing. |
| Slice WriteResult() { |
| CheckErrors(); |
| |
| // The header should currently be filled with dummy data (0's) |
| // Correctly update the header. |
| // Note, this is safe since result_ is a vector (guaranteed contiguous) |
| EncodeFixed32(&result_[0],length_); |
| |
| // Append the remainder of the data to the result. |
| result_.insert(result_.end(),data_+cur_byte_, data_ +num_bytes_); |
| |
| // Seek to end of file |
| cur_byte_ = num_bytes_; |
| cur_elem_ = length_; |
| cur_elem_length_ = 0; |
| |
| // Return the result |
| return Slice(result_.data(),result_.size()); |
| } |
| |
| public: // Static public functions |
| |
| /// An upper-bound on the amount of bytes needed to store this element. |
| /// This is used to hide representation information from the client. |
| /// E.G. This can be used to compute the bytes we want to Reserve(). |
| static uint32_t SizeOf(const Slice& elem) { |
| // [Integer Length . Data] |
| return static_cast<uint32_t>(sizeof(uint32_t) + elem.size()); |
| } |
| |
| private: // Private functions |
| |
| /// Initializes the result_ string. |
| /// It will fill the first few bytes with 0's so that there is |
| /// enough space for header information when we need to write later. |
| /// Currently, "header information" means: the length (number of elements) |
| /// Assumes that result_ is empty to begin with |
| void InitializeResult() { |
| assert(result_.empty()); // Should always be true. |
| result_.resize(sizeof(uint32_t),0); // Put a block of 0's as the header |
| } |
| |
| /// Go to the next element (used in Push() and Skip()) |
| void MoveNext() { |
| CheckErrors(); |
| |
| // Check to make sure we are not already in a finished state |
| if (Done()) { |
| ThrowError("Attempting to iterate past end of list."); |
| } |
| |
| // Move forward one element. |
| cur_byte_ += sizeof(cur_elem_length_) + cur_elem_length_; |
| ++cur_elem_; |
| |
| // If we are at the end, finish |
| if (Done()) { |
| cur_elem_length_ = 0; |
| return; |
| } |
| |
| // Otherwise, we should be able to read the new element's length |
| if (cur_byte_ + sizeof(cur_elem_length_) > num_bytes_) { |
| ThrowError("Corrupt element data."); |
| } |
| |
| // Set the new element's length |
| cur_elem_length_ = DecodeFixed32(data_+cur_byte_); |
| |
| return; |
| } |
| |
| /// Append the current element (pointed to by cur_byte_) to result_ |
| /// Assumes result_ has already been reserved appropriately. |
| void WriteCurrentElement() { |
| // First verify that the iterator is still valid. |
| CheckErrors(); |
| if (Done()) { |
| ThrowError("Attempting to write invalid element."); |
| } |
| |
| // Append the cur element. |
| result_.insert(result_.end(), |
| data_+cur_byte_, |
| data_+cur_byte_+ sizeof(uint32_t) + cur_elem_length_); |
| } |
| |
| /// Will ThrowError() if necessary. |
| /// Checks for common/ubiquitous errors that can arise after most operations. |
| /// This method should be called before any reading operation. |
| /// If this function succeeds, then we are guaranteed to be in a valid state. |
| /// Other member functions should check for errors and ThrowError() also |
| /// if an error occurs that is specific to it even while in a valid state. |
| void CheckErrors() { |
| // Check if any crazy thing has happened recently |
| if ((cur_elem_ > length_) || // Bad index |
| (cur_byte_ > num_bytes_) || // No more bytes |
| (cur_byte_ + cur_elem_length_ > num_bytes_) || // Item too large |
| (cur_byte_ == num_bytes_ && cur_elem_ != length_) || // Too many items |
| (cur_elem_ == length_ && cur_byte_ != num_bytes_)) { // Too many bytes |
| ThrowError("Corrupt data."); |
| } |
| } |
| |
| /// Will throw an exception based on the passed-in message. |
| /// This function is guaranteed to STOP THE CONTROL-FLOW. |
| /// (i.e.: you do not have to call "return" after calling ThrowError) |
| void ThrowError(const char* const msg = NULL) { |
| // TODO: For now we ignore the msg parameter. This can be expanded later. |
| throw RedisListException(); |
| } |
| |
| private: |
| const char* const data_; // A pointer to the data (the first byte) |
| const uint32_t num_bytes_; // The number of bytes in this list |
| |
| uint32_t cur_byte_; // The current byte being read |
| uint32_t cur_elem_; // The current element being read |
| uint32_t cur_elem_length_; // The number of bytes in current element |
| |
| uint32_t length_; // The number of elements in this list |
| std::vector<char> result_; // The output data |
| }; |
| |
| } // namespace rocksdb |
| #endif // ROCKSDB_LITE |