blob: a3519739c218d0f1e44f9cc00841d4b7e4340e2b [file] [log] [blame]
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include <vector>
#include "rocksdb/comparator.h"
#include "rocksdb/db.h"
#include "rocksdb/status.h"
namespace rocksdb {
class Iterator;
class TransactionDB;
class WriteBatchWithIndex;
using TransactionName = std::string;
using TransactionID = uint64_t;
// Provides notification to the caller of SetSnapshotOnNextOperation when
// the actual snapshot gets created
class TransactionNotifier {
public:
virtual ~TransactionNotifier() {}
// Implement this method to receive notification when a snapshot is
// requested via SetSnapshotOnNextOperation.
virtual void SnapshotCreated(const Snapshot* newSnapshot) = 0;
};
// Provides BEGIN/COMMIT/ROLLBACK transactions.
//
// To use transactions, you must first create either an OptimisticTransactionDB
// or a TransactionDB. See examples/[optimistic_]transaction_example.cc for
// more information.
//
// To create a transaction, use [Optimistic]TransactionDB::BeginTransaction().
//
// It is up to the caller to synchronize access to this object.
//
// See examples/transaction_example.cc for some simple examples.
//
// TODO(agiardullo): Not yet implemented
// -PerfContext statistics
// -Support for using Transactions with DBWithTTL
class Transaction {
public:
virtual ~Transaction() {}
// If a transaction has a snapshot set, the transaction will ensure that
// any keys successfully written(or fetched via GetForUpdate()) have not
// been modified outside of this transaction since the time the snapshot was
// set.
// If a snapshot has not been set, the transaction guarantees that keys have
// not been modified since the time each key was first written (or fetched via
// GetForUpdate()).
//
// Using SetSnapshot() will provide stricter isolation guarantees at the
// expense of potentially more transaction failures due to conflicts with
// other writes.
//
// Calling SetSnapshot() has no effect on keys written before this function
// has been called.
//
// SetSnapshot() may be called multiple times if you would like to change
// the snapshot used for different operations in this transaction.
//
// Calling SetSnapshot will not affect the version of Data returned by Get()
// methods. See Transaction::Get() for more details.
virtual void SetSnapshot() = 0;
// Similar to SetSnapshot(), but will not change the current snapshot
// until Put/Merge/Delete/GetForUpdate/MultigetForUpdate is called.
// By calling this function, the transaction will essentially call
// SetSnapshot() for you right before performing the next write/GetForUpdate.
//
// Calling SetSnapshotOnNextOperation() will not affect what snapshot is
// returned by GetSnapshot() until the next write/GetForUpdate is executed.
//
// When the snapshot is created the notifier's SnapshotCreated method will
// be called so that the caller can get access to the snapshot.
//
// This is an optimization to reduce the likelihood of conflicts that
// could occur in between the time SetSnapshot() is called and the first
// write/GetForUpdate operation. Eg, this prevents the following
// race-condition:
//
// txn1->SetSnapshot();
// txn2->Put("A", ...);
// txn2->Commit();
// txn1->GetForUpdate(opts, "A", ...); // FAIL!
virtual void SetSnapshotOnNextOperation(
std::shared_ptr<TransactionNotifier> notifier = nullptr) = 0;
// Returns the Snapshot created by the last call to SetSnapshot().
//
// REQUIRED: The returned Snapshot is only valid up until the next time
// SetSnapshot()/SetSnapshotOnNextSavePoint() is called, ClearSnapshot()
// is called, or the Transaction is deleted.
virtual const Snapshot* GetSnapshot() const = 0;
// Clears the current snapshot (i.e. no snapshot will be 'set')
//
// This removes any snapshot that currently exists or is set to be created
// on the next update operation (SetSnapshotOnNextOperation).
//
// Calling ClearSnapshot() has no effect on keys written before this function
// has been called.
//
// If a reference to a snapshot was retrieved via GetSnapshot(), it will no
// longer be valid and should be discarded after a call to ClearSnapshot().
virtual void ClearSnapshot() = 0;
// Prepare the current transation for 2PC
virtual Status Prepare() = 0;
// Write all batched keys to the db atomically.
//
// Returns OK on success.
//
// May return any error status that could be returned by DB:Write().
//
// If this transaction was created by an OptimisticTransactionDB(),
// Status::Busy() may be returned if the transaction could not guarantee
// that there are no write conflicts. Status::TryAgain() may be returned
// if the memtable history size is not large enough
// (See max_write_buffer_number_to_maintain).
//
// If this transaction was created by a TransactionDB(), Status::Expired()
// may be returned if this transaction has lived for longer than
// TransactionOptions.expiration.
virtual Status Commit() = 0;
// Discard all batched writes in this transaction.
virtual Status Rollback() = 0;
// Records the state of the transaction for future calls to
// RollbackToSavePoint(). May be called multiple times to set multiple save
// points.
virtual void SetSavePoint() = 0;
// Undo all operations in this transaction (Put, Merge, Delete, PutLogData)
// since the most recent call to SetSavePoint() and removes the most recent
// SetSavePoint().
// If there is no previous call to SetSavePoint(), returns Status::NotFound()
virtual Status RollbackToSavePoint() = 0;
// This function is similar to DB::Get() except it will also read pending
// changes in this transaction. Currently, this function will return
// Status::MergeInProgress if the most recent write to the queried key in
// this batch is a Merge.
//
// If read_options.snapshot is not set, the current version of the key will
// be read. Calling SetSnapshot() does not affect the version of the data
// returned.
//
// Note that setting read_options.snapshot will affect what is read from the
// DB but will NOT change which keys are read from this transaction (the keys
// in this transaction do not yet belong to any snapshot and will be fetched
// regardless).
virtual Status Get(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
std::string* value) = 0;
// An overload of the the above method that receives a PinnableSlice
// For backward compatiblity a default implementation is provided
virtual Status Get(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
PinnableSlice* pinnable_val) {
assert(pinnable_val != nullptr);
auto s = Get(options, column_family, key, pinnable_val->GetSelf());
pinnable_val->PinSelf();
return s;
}
virtual Status Get(const ReadOptions& options, const Slice& key,
std::string* value) = 0;
virtual Status Get(const ReadOptions& options, const Slice& key,
PinnableSlice* pinnable_val) {
assert(pinnable_val != nullptr);
auto s = Get(options, key, pinnable_val->GetSelf());
pinnable_val->PinSelf();
return s;
}
virtual std::vector<Status> MultiGet(
const ReadOptions& options,
const std::vector<ColumnFamilyHandle*>& column_family,
const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
virtual std::vector<Status> MultiGet(const ReadOptions& options,
const std::vector<Slice>& keys,
std::vector<std::string>* values) = 0;
// Read this key and ensure that this transaction will only
// be able to be committed if this key is not written outside this
// transaction after it has first been read (or after the snapshot if a
// snapshot is set in this transaction). The transaction behavior is the
// same regardless of whether the key exists or not.
//
// Note: Currently, this function will return Status::MergeInProgress
// if the most recent write to the queried key in this batch is a Merge.
//
// The values returned by this function are similar to Transaction::Get().
// If value==nullptr, then this function will not read any data, but will
// still ensure that this key cannot be written to by outside of this
// transaction.
//
// If this transaction was created by an OptimisticTransaction, GetForUpdate()
// could cause commit() to fail. Otherwise, it could return any error
// that could be returned by DB::Get().
//
// If this transaction was created by a TransactionDB, it can return
// Status::OK() on success,
// Status::Busy() if there is a write conflict,
// Status::TimedOut() if a lock could not be acquired,
// Status::TryAgain() if the memtable history size is not large enough
// (See max_write_buffer_number_to_maintain)
// Status::MergeInProgress() if merge operations cannot be resolved.
// or other errors if this key could not be read.
virtual Status GetForUpdate(const ReadOptions& options,
ColumnFamilyHandle* column_family,
const Slice& key, std::string* value,
bool exclusive = true) = 0;
// An overload of the the above method that receives a PinnableSlice
// For backward compatiblity a default implementation is provided
virtual Status GetForUpdate(const ReadOptions& options,
ColumnFamilyHandle* column_family,
const Slice& key, PinnableSlice* pinnable_val,
bool exclusive = true) {
if (pinnable_val == nullptr) {
std::string* null_str = nullptr;
return GetForUpdate(options, key, null_str);
} else {
auto s = GetForUpdate(options, key, pinnable_val->GetSelf());
pinnable_val->PinSelf();
return s;
}
}
virtual Status GetForUpdate(const ReadOptions& options, const Slice& key,
std::string* value, bool exclusive = true) = 0;
virtual std::vector<Status> MultiGetForUpdate(
const ReadOptions& options,
const std::vector<ColumnFamilyHandle*>& column_family,
const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
virtual std::vector<Status> MultiGetForUpdate(
const ReadOptions& options, const std::vector<Slice>& keys,
std::vector<std::string>* values) = 0;
// Returns an iterator that will iterate on all keys in the default
// column family including both keys in the DB and uncommitted keys in this
// transaction.
//
// Setting read_options.snapshot will affect what is read from the
// DB but will NOT change which keys are read from this transaction (the keys
// in this transaction do not yet belong to any snapshot and will be fetched
// regardless).
//
// Caller is responsible for deleting the returned Iterator.
//
// The returned iterator is only valid until Commit(), Rollback(), or
// RollbackToSavePoint() is called.
virtual Iterator* GetIterator(const ReadOptions& read_options) = 0;
virtual Iterator* GetIterator(const ReadOptions& read_options,
ColumnFamilyHandle* column_family) = 0;
// Put, Merge, Delete, and SingleDelete behave similarly to the corresponding
// functions in WriteBatch, but will also do conflict checking on the
// keys being written.
//
// If this Transaction was created on an OptimisticTransactionDB, these
// functions should always return Status::OK().
//
// If this Transaction was created on a TransactionDB, the status returned
// can be:
// Status::OK() on success,
// Status::Busy() if there is a write conflict,
// Status::TimedOut() if a lock could not be acquired,
// Status::TryAgain() if the memtable history size is not large enough
// (See max_write_buffer_number_to_maintain)
// or other errors on unexpected failures.
virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) = 0;
virtual Status Put(const Slice& key, const Slice& value) = 0;
virtual Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
const SliceParts& value) = 0;
virtual Status Put(const SliceParts& key, const SliceParts& value) = 0;
virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
const Slice& value) = 0;
virtual Status Merge(const Slice& key, const Slice& value) = 0;
virtual Status Delete(ColumnFamilyHandle* column_family,
const Slice& key) = 0;
virtual Status Delete(const Slice& key) = 0;
virtual Status Delete(ColumnFamilyHandle* column_family,
const SliceParts& key) = 0;
virtual Status Delete(const SliceParts& key) = 0;
virtual Status SingleDelete(ColumnFamilyHandle* column_family,
const Slice& key) = 0;
virtual Status SingleDelete(const Slice& key) = 0;
virtual Status SingleDelete(ColumnFamilyHandle* column_family,
const SliceParts& key) = 0;
virtual Status SingleDelete(const SliceParts& key) = 0;
// PutUntracked() will write a Put to the batch of operations to be committed
// in this transaction. This write will only happen if this transaction
// gets committed successfully. But unlike Transaction::Put(),
// no conflict checking will be done for this key.
//
// If this Transaction was created on a TransactionDB, this function will
// still acquire locks necessary to make sure this write doesn't cause
// conflicts in other transactions and may return Status::Busy().
virtual Status PutUntracked(ColumnFamilyHandle* column_family,
const Slice& key, const Slice& value) = 0;
virtual Status PutUntracked(const Slice& key, const Slice& value) = 0;
virtual Status PutUntracked(ColumnFamilyHandle* column_family,
const SliceParts& key,
const SliceParts& value) = 0;
virtual Status PutUntracked(const SliceParts& key,
const SliceParts& value) = 0;
virtual Status MergeUntracked(ColumnFamilyHandle* column_family,
const Slice& key, const Slice& value) = 0;
virtual Status MergeUntracked(const Slice& key, const Slice& value) = 0;
virtual Status DeleteUntracked(ColumnFamilyHandle* column_family,
const Slice& key) = 0;
virtual Status DeleteUntracked(const Slice& key) = 0;
virtual Status DeleteUntracked(ColumnFamilyHandle* column_family,
const SliceParts& key) = 0;
virtual Status DeleteUntracked(const SliceParts& key) = 0;
// Similar to WriteBatch::PutLogData
virtual void PutLogData(const Slice& blob) = 0;
// By default, all Put/Merge/Delete operations will be indexed in the
// transaction so that Get/GetForUpdate/GetIterator can search for these
// keys.
//
// If the caller does not want to fetch the keys about to be written,
// they may want to avoid indexing as a performance optimization.
// Calling DisableIndexing() will turn off indexing for all future
// Put/Merge/Delete operations until EnableIndexing() is called.
//
// If a key is Put/Merge/Deleted after DisableIndexing is called and then
// is fetched via Get/GetForUpdate/GetIterator, the result of the fetch is
// undefined.
virtual void DisableIndexing() = 0;
virtual void EnableIndexing() = 0;
// Returns the number of distinct Keys being tracked by this transaction.
// If this transaction was created by a TransactinDB, this is the number of
// keys that are currently locked by this transaction.
// If this transaction was created by an OptimisticTransactionDB, this is the
// number of keys that need to be checked for conflicts at commit time.
virtual uint64_t GetNumKeys() const = 0;
// Returns the number of Puts/Deletes/Merges that have been applied to this
// transaction so far.
virtual uint64_t GetNumPuts() const = 0;
virtual uint64_t GetNumDeletes() const = 0;
virtual uint64_t GetNumMerges() const = 0;
// Returns the elapsed time in milliseconds since this Transaction began.
virtual uint64_t GetElapsedTime() const = 0;
// Fetch the underlying write batch that contains all pending changes to be
// committed.
//
// Note: You should not write or delete anything from the batch directly and
// should only use the functions in the Transaction class to
// write to this transaction.
virtual WriteBatchWithIndex* GetWriteBatch() = 0;
// Change the value of TransactionOptions.lock_timeout (in milliseconds) for
// this transaction.
// Has no effect on OptimisticTransactions.
virtual void SetLockTimeout(int64_t timeout) = 0;
// Return the WriteOptions that will be used during Commit()
virtual WriteOptions* GetWriteOptions() = 0;
// Reset the WriteOptions that will be used during Commit().
virtual void SetWriteOptions(const WriteOptions& write_options) = 0;
// If this key was previously fetched in this transaction using
// GetForUpdate/MultigetForUpdate(), calling UndoGetForUpdate will tell
// the transaction that it no longer needs to do any conflict checking
// for this key.
//
// If a key has been fetched N times via GetForUpdate/MultigetForUpdate(),
// then UndoGetForUpdate will only have an effect if it is also called N
// times. If this key has been written to in this transaction,
// UndoGetForUpdate() will have no effect.
//
// If SetSavePoint() has been called after the GetForUpdate(),
// UndoGetForUpdate() will not have any effect.
//
// If this Transaction was created by an OptimisticTransactionDB,
// calling UndoGetForUpdate can affect whether this key is conflict checked
// at commit time.
// If this Transaction was created by a TransactionDB,
// calling UndoGetForUpdate may release any held locks for this key.
virtual void UndoGetForUpdate(ColumnFamilyHandle* column_family,
const Slice& key) = 0;
virtual void UndoGetForUpdate(const Slice& key) = 0;
virtual Status RebuildFromWriteBatch(WriteBatch* src_batch) = 0;
virtual WriteBatch* GetCommitTimeWriteBatch() = 0;
virtual void SetLogNumber(uint64_t log) { log_number_ = log; }
virtual uint64_t GetLogNumber() const { return log_number_; }
virtual Status SetName(const TransactionName& name) = 0;
virtual TransactionName GetName() const { return name_; }
virtual TransactionID GetID() const { return 0; }
virtual bool IsDeadlockDetect() const { return false; }
virtual std::vector<TransactionID> GetWaitingTxns(uint32_t* column_family_id,
std::string* key) const {
assert(false);
return std::vector<TransactionID>();
}
enum TransactionState {
STARTED = 0,
AWAITING_PREPARE = 1,
PREPARED = 2,
AWAITING_COMMIT = 3,
COMMITED = 4,
AWAITING_ROLLBACK = 5,
ROLLEDBACK = 6,
LOCKS_STOLEN = 7,
};
TransactionState GetState() const { return txn_state_; }
void SetState(TransactionState state) { txn_state_ = state; }
protected:
explicit Transaction(const TransactionDB* db) {}
Transaction() {}
// the log in which the prepared section for this txn resides
// (for two phase commit)
uint64_t log_number_;
TransactionName name_;
// Execution status of the transaction.
std::atomic<TransactionState> txn_state_;
private:
// No copying allowed
Transaction(const Transaction&);
void operator=(const Transaction&);
};
} // namespace rocksdb
#endif // ROCKSDB_LITE