blob: 88866ac19c1dffc78b3b73ec5ec3675050ec2f19 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef KUDU_CLIENT_CLIENT_H
#define KUDU_CLIENT_CLIENT_H
#include <stdint.h>
#include <string>
#include <vector>
#include "kudu/client/row_result.h"
#include "kudu/client/scan_batch.h"
#include "kudu/client/scan_predicate.h"
#include "kudu/client/schema.h"
#include "kudu/client/shared_ptr.h"
#ifdef KUDU_HEADERS_NO_STUBS
#include <gtest/gtest_prod.h>
#include "kudu/gutil/macros.h"
#include "kudu/gutil/port.h"
#else
#include "kudu/client/stubs.h"
#endif
#include "kudu/client/write_op.h"
#include "kudu/util/kudu_export.h"
#include "kudu/util/monotime.h"
#include "kudu/util/status.h"
#if _GLIBCXX_USE_CXX11_ABI
#error \
"Kudu will not function properly if built with gcc 5's new ABI. " \
"Please modify your application to set -D_GLIBCXX_USE_CXX11_ABI=0. " \
"For more information about the new ABI, see " \
"https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html."
#endif
namespace kudu {
class LinkedListTester;
class PartitionSchema;
namespace client {
class KuduLoggingCallback;
class KuduSession;
class KuduStatusCallback;
class KuduTable;
class KuduTableAlterer;
class KuduTableCreator;
class KuduTabletServer;
class KuduValue;
class KuduWriteOperation;
namespace internal {
class Batcher;
class GetTableSchemaRpc;
class LookupRpc;
class MetaCache;
class RemoteTablet;
class RemoteTabletServer;
class WriteRpc;
} // namespace internal
// Installs a callback for internal client logging. It is invoked for a
// log event of any severity, across any KuduClient instance.
//
// Only the first invocation has any effect; subsequent invocations are
// a no-op. The caller must ensure that 'cb' stays alive until
// UninstallLoggingCallback() is called.
//
// Before a callback is registered, all internal client log events are
// logged to stderr.
void KUDU_EXPORT InstallLoggingCallback(KuduLoggingCallback* cb);
// Removes a callback installed via InstallLoggingCallback().
//
// Only the first invocation has any effect; subsequent invocations are
// a no-op.
//
// Should be called before unloading the client library.
void KUDU_EXPORT UninstallLoggingCallback();
// Set the logging verbosity of the client library. By default, this is 0. Logs become
// progressively more verbose as the level is increased. Empirically, the highest
// verbosity level used in Kudu is 6, which includes very fine-grained tracing
// information. Most useful logging is enabled at level 1 or 2, with the higher levels
// used only in rare circumstances.
//
// Logs are emitted to stderr, or to the configured log callback at SEVERITY_INFO.
//
// This may be called safely at any point during usage of the library.
void KUDU_EXPORT SetVerboseLogLevel(int level);
// The Kudu client library uses signals internally in some cases. By default, it uses
// SIGUSR2. If your application makes use of SIGUSR2, this advanced API can help
// workaround conflicts.
Status KUDU_EXPORT SetInternalSignalNumber(int signum);
// Return a single-version string identifying the Kudu client.
std::string KUDU_EXPORT GetShortVersionString();
// Return a longer multi-line version string identifying the client, including
// build time, etc.
std::string KUDU_EXPORT GetAllVersionInfo();
// Creates a new KuduClient with the desired options.
//
// Note that KuduClients are shared amongst multiple threads and, as such,
// are stored in shared pointers.
class KUDU_EXPORT KuduClientBuilder {
public:
KuduClientBuilder();
~KuduClientBuilder();
KuduClientBuilder& clear_master_server_addrs();
// Add RPC addresses of multiple masters.
KuduClientBuilder& master_server_addrs(const std::vector<std::string>& addrs);
// Add an RPC address of a master. At least one master is required.
KuduClientBuilder& add_master_server_addr(const std::string& addr);
// The default timeout used for administrative operations (e.g. CreateTable,
// AlterTable, ...). Optional.
//
// If not provided, defaults to 10s.
KuduClientBuilder& default_admin_operation_timeout(const MonoDelta& timeout);
// The default timeout for individual RPCs. Optional.
//
// If not provided, defaults to 5s.
KuduClientBuilder& default_rpc_timeout(const MonoDelta& timeout);
// Creates the client.
//
// The return value may indicate an error in the create operation, or a
// misuse of the builder; in the latter case, only the last error is
// returned.
Status Build(sp::shared_ptr<KuduClient>* client);
private:
class KUDU_NO_EXPORT Data;
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduClientBuilder);
};
// The KuduClient represents a connection to a cluster. From the user
// perspective, they should only need to create one of these in their
// application, likely a singleton -- but it's not a singleton in Kudu in any
// way. Different Client objects do not interact with each other -- no
// connection pooling, etc. Each KuduClient instance is sandboxed with no
// global cross-client state.
//
// In the implementation, the client holds various pieces of common
// infrastructure which is not table-specific:
//
// - RPC messenger: reactor threads and RPC connections are pooled here
// - Authentication: the client is initialized with some credentials, and
// all accesses through it share those credentials.
// - Caches: caches of table schemas, tablet locations, tablet server IP
// addresses, etc are shared per-client.
//
// In order to actually access data on the cluster, callers must first
// create a KuduSession object using NewSession(). A KuduClient may
// have several associated sessions.
//
// TODO: Cluster administration functions are likely to be in this class
// as well.
//
// This class is thread-safe.
class KUDU_EXPORT KuduClient : public sp::enable_shared_from_this<KuduClient> {
public:
~KuduClient();
// Creates a KuduTableCreator; it is the caller's responsibility to free it.
KuduTableCreator* NewTableCreator();
// set 'create_in_progress' to true if a CreateTable operation is in-progress
Status IsCreateTableInProgress(const std::string& table_name,
bool *create_in_progress);
Status DeleteTable(const std::string& table_name);
// Creates a KuduTableAlterer; it is the caller's responsibility to free it.
KuduTableAlterer* NewTableAlterer(const std::string& table_name);
// set 'alter_in_progress' to true if an AlterTable operation is in-progress
Status IsAlterTableInProgress(const std::string& table_name,
bool *alter_in_progress);
Status GetTableSchema(const std::string& table_name,
KuduSchema* schema);
Status ListTabletServers(std::vector<KuduTabletServer*>* tablet_servers);
// List only those tables whose names pass a substring match on 'filter'.
//
// 'tables' is appended to only on success.
Status ListTables(std::vector<std::string>* tables,
const std::string& filter = "");
// Check if the table given by 'table_name' exists.
//
// 'exists' is set only on success.
Status TableExists(const std::string& table_name, bool* exists);
// Open the table with the given name. If the table has not been opened before
// in this client, this will do an RPC to ensure that the table exists and
// look up its schema.
//
// TODO: should we offer an async version of this as well?
// TODO: probably should have a configurable timeout in KuduClientBuilder?
Status OpenTable(const std::string& table_name,
sp::shared_ptr<KuduTable>* table);
// Create a new session for interacting with the cluster.
// User is responsible for destroying the session object.
// This is a fully local operation (no RPCs or blocking).
sp::shared_ptr<KuduSession> NewSession();
// Policy with which to choose amongst multiple replicas.
enum ReplicaSelection {
// Select the LEADER replica.
LEADER_ONLY,
// Select the closest replica to the client, or a random one if all
// replicas are equidistant.
CLOSEST_REPLICA,
// Select the first replica in the list.
FIRST_REPLICA
};
bool IsMultiMaster() const;
const MonoDelta& default_admin_operation_timeout() const;
const MonoDelta& default_rpc_timeout() const;
// Value for the latest observed timestamp when none has been observed or set.
static const uint64_t kNoTimestamp;
// Returns highest HybridTime timestamp observed by the client.
// The latest observed timestamp can be used to start a snapshot scan on a
// table which is guaranteed to contain all data written or previously read by
// this client. See KuduScanner for more details on timestamps.
uint64_t GetLatestObservedTimestamp() const;
// Sets the latest observed HybridTime timestamp, encoded in the HybridTime format.
// This is only useful when forwarding timestamps between clients to enforce
// external consistency when using KuduSession::CLIENT_PROPAGATED external consistency
// mode.
// To use this the user must obtain the HybridTime encoded timestamp from the first
// client with KuduClient::GetLatestObservedTimestamp() and the set it in the new
// client with this method.
void SetLatestObservedTimestamp(uint64_t ht_timestamp);
private:
class KUDU_NO_EXPORT Data;
friend class KuduClientBuilder;
friend class KuduScanner;
friend class KuduTable;
friend class KuduTableAlterer;
friend class KuduTableCreator;
friend class internal::Batcher;
friend class internal::GetTableSchemaRpc;
friend class internal::LookupRpc;
friend class internal::MetaCache;
friend class internal::RemoteTablet;
friend class internal::RemoteTabletServer;
friend class internal::WriteRpc;
FRIEND_TEST(ClientTest, TestGetTabletServerBlacklist);
FRIEND_TEST(ClientTest, TestMasterDown);
FRIEND_TEST(ClientTest, TestMasterLookupPermits);
FRIEND_TEST(ClientTest, TestReplicatedMultiTabletTableFailover);
FRIEND_TEST(ClientTest, TestReplicatedTabletWritesWithLeaderElection);
FRIEND_TEST(ClientTest, TestScanFaultTolerance);
FRIEND_TEST(ClientTest, TestScanTimeout);
FRIEND_TEST(ClientTest, TestWriteWithDeadMaster);
FRIEND_TEST(MasterFailoverTest, TestPauseAfterCreateTableIssued);
KuduClient();
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduClient);
};
// Creates a new table with the desired options.
class KUDU_EXPORT KuduTableCreator {
public:
~KuduTableCreator();
// Sets the name to give the table. It is copied. Required.
KuduTableCreator& table_name(const std::string& name);
// Sets the schema with which to create the table. Must remain valid for
// the lifetime of the builder. Required.
KuduTableCreator& schema(const KuduSchema* schema);
// Adds a set of hash partitions to the table.
//
// For each set of hash partitions added to the table, the total number of
// table partitions is multiplied by the number of buckets. For example, if a
// table is created with 3 split rows, and two hash partitions with 4 and 5
// buckets respectively, the total number of table partitions will be 80
// (4 range partitions * 4 hash buckets * 5 hash buckets).
KuduTableCreator& add_hash_partitions(const std::vector<std::string>& columns,
int32_t num_buckets);
// Adds a set of hash partitions to the table.
//
// This constructor takes a seed value, which can be used to randomize the
// mapping of rows to hash buckets. Setting the seed may provide some
// amount of protection against denial of service attacks when the hashed
// columns contain user provided values.
KuduTableCreator& add_hash_partitions(const std::vector<std::string>& columns,
int32_t num_buckets, int32_t seed);
// Sets the columns on which the table will be range-partitioned.
//
// Every column must be a part of the table's primary key. If not set, the
// table will be created with the primary-key columns as the range-partition
// columns. If called with an empty vector, the table will be created without
// range partitioning.
//
// Optional.
KuduTableCreator& set_range_partition_columns(const std::vector<std::string>& columns);
// Sets the rows on which to pre-split the table.
// The table creator takes ownership of the rows.
//
// If any provided row is missing a value for any of the range partition
// columns, the logical minimum value for that column type will be used by
// default.
//
// If not provided, no range-based pre-splitting is performed.
//
// Optional.
KuduTableCreator& split_rows(const std::vector<const KuduPartialRow*>& split_rows);
// Sets the number of replicas for each tablet in the table.
// This should be an odd number. Optional.
//
// If not provided (or if <= 0), falls back to the server-side default.
KuduTableCreator& num_replicas(int n_replicas);
// Set the timeout for the operation. This includes any waiting
// after the create has been submitted (i.e if the create is slow
// to be performed for a large table, it may time out and then
// later be successful).
KuduTableCreator& timeout(const MonoDelta& timeout);
// Wait for the table to be fully created before returning.
// Optional.
//
// If not provided, defaults to true.
KuduTableCreator& wait(bool wait);
// Creates the table.
//
// The return value may indicate an error in the create table operation,
// or a misuse of the builder; in the latter case, only the last error is
// returned.
Status Create();
private:
class KUDU_NO_EXPORT Data;
friend class KuduClient;
explicit KuduTableCreator(KuduClient* client);
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduTableCreator);
};
// A KuduTable represents a table on a particular cluster. It holds the current
// schema of the table. Any given KuduTable instance belongs to a specific KuduClient
// instance.
//
// Upon construction, the table is looked up in the catalog (or catalog cache),
// and the schema fetched for introspection.
//
// This class is thread-safe.
class KUDU_EXPORT KuduTable : public sp::enable_shared_from_this<KuduTable> {
public:
~KuduTable();
const std::string& name() const;
// Return the table's ID. This is an internal identifier which uniquely
// identifies a table. If the table is deleted and recreated with the same
// name, the ID will distinguish the old table from the new.
const std::string& id() const;
const KuduSchema& schema() const;
// Create a new write operation for this table. It is the caller's
// responsibility to free it, unless it is passed to KuduSession::Apply().
KuduInsert* NewInsert();
KuduUpdate* NewUpdate();
KuduDelete* NewDelete();
// Create a new comparison predicate which can be used for scanners
// on this table.
//
// The type of 'value' must correspond to the type of the column to which
// the predicate is to be applied. For example, if the given column is
// any type of integer, the KuduValue should also be an integer, with its
// value in the valid range for the column type. No attempt is made to cast
// between floating point and integer values, or numeric and string values.
//
// The caller owns the result until it is passed into KuduScanner::AddConjunctPredicate().
// The returned predicate takes ownership of 'value'.
//
// In the case of an error (e.g. an invalid column name), a non-NULL value
// is still returned. The error will be returned when attempting to add this
// predicate to a KuduScanner.
KuduPredicate* NewComparisonPredicate(const Slice& col_name,
KuduPredicate::ComparisonOp op,
KuduValue* value);
KuduClient* client() const;
const PartitionSchema& partition_schema() const;
private:
class KUDU_NO_EXPORT Data;
friend class KuduClient;
KuduTable(const sp::shared_ptr<KuduClient>& client,
const std::string& name,
const std::string& table_id,
const KuduSchema& schema,
const PartitionSchema& partition_schema);
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduTable);
};
// Alters an existing table based on the provided steps.
//
// Sample usage:
// KuduTableAlterer* alterer = client->NewTableAlterer("table-name");
// alterer->AddColumn("foo")->Type(KuduColumnSchema::INT32)->NotNull();
// alterer->AlterColumn("bar")->Compression(KuduColumnStorageAttributes::LZ4);
// Status s = alterer->Alter();
// delete alterer;
class KUDU_EXPORT KuduTableAlterer {
public:
~KuduTableAlterer();
// Renames the table.
KuduTableAlterer* RenameTo(const std::string& new_name);
// Adds a new column to the table.
//
// When adding a column, you must specify the default value of the new
// column using KuduColumnSpec::DefaultValue(...).
KuduColumnSpec* AddColumn(const std::string& name);
// Alter an existing column.
KuduColumnSpec* AlterColumn(const std::string& name);
// Drops an existing column from the table.
KuduTableAlterer* DropColumn(const std::string& name);
// Set the timeout for the operation. This includes any waiting
// after the alter has been submitted (i.e if the alter is slow
// to be performed on a large table, it may time out and then
// later be successful).
KuduTableAlterer* timeout(const MonoDelta& timeout);
// Wait for the table to be fully altered before returning.
//
// If not provided, defaults to true.
KuduTableAlterer* wait(bool wait);
// Alters the table.
//
// The return value may indicate an error in the alter operation, or a
// misuse of the builder (e.g. add_column() with default_value=NULL); in
// the latter case, only the last error is returned.
Status Alter();
private:
class KUDU_NO_EXPORT Data;
friend class KuduClient;
KuduTableAlterer(KuduClient* client,
const std::string& name);
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduTableAlterer);
};
// An error which occurred in a given operation. This tracks the operation
// which caused the error, along with whatever the actual error was.
class KUDU_EXPORT KuduError {
public:
~KuduError();
// Return the actual error which occurred.
const Status& status() const;
// Return the operation which failed.
const KuduWriteOperation& failed_op() const;
// Release the operation that failed. The caller takes ownership. Must only
// be called once.
KuduWriteOperation* release_failed_op();
// In some cases, it's possible that the server did receive and successfully
// perform the requested operation, but the client can't tell whether or not
// it was successful. For example, if the call times out, the server may still
// succeed in processing at a later time.
//
// This function returns true if there is some chance that the server did
// process the operation, and false if it can guarantee that the operation
// did not succeed.
bool was_possibly_successful() const;
private:
class KUDU_NO_EXPORT Data;
friend class internal::Batcher;
friend class KuduSession;
KuduError(KuduWriteOperation* failed_op, const Status& error);
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduError);
};
// A KuduSession belongs to a specific KuduClient, and represents a context in
// which all read/write data access should take place. Within a session,
// multiple operations may be accumulated and batched together for better
// efficiency. Settings like timeouts, priorities, and trace IDs are also set
// per session.
//
// A KuduSession's main purpose is for grouping together multiple data-access
// operations together into batches or transactions. It is important to note
// the distinction between these two:
//
// * A batch is a set of operations which are grouped together in order to
// amortize fixed costs such as RPC call overhead and round trip times.
// A batch DOES NOT imply any ACID-like guarantees. Within a batch, some
// operations may succeed while others fail, and concurrent readers may see
// partial results. If the client crashes mid-batch, it is possible that some
// of the operations will be made durable while others were lost.
//
// * In contrast, a transaction is a set of operations which are treated as an
// indivisible semantic unit, per the usual definitions of database transactions
// and isolation levels.
//
// NOTE: Kudu does not currently support transactions! They are only mentioned
// in the above documentation to clarify that batches are not transactional and
// should only be used for efficiency.
//
// KuduSession is separate from KuduTable because a given batch or transaction
// may span multiple tables. This is particularly important in the future when
// we add ACID support, but even in the context of batching, we may be able to
// coalesce writes to different tables hosted on the same server into the same
// RPC.
//
// KuduSession is separate from KuduClient because, in a multi-threaded
// application, different threads may need to concurrently execute
// transactions. Similar to a JDBC "session", transaction boundaries will be
// delineated on a per-session basis -- in between a "BeginTransaction" and
// "Commit" call on a given session, all operations will be part of the same
// transaction. Meanwhile another concurrent Session object can safely run
// non-transactional work or other transactions without interfering.
//
// Additionally, there is a guarantee that writes from different sessions do not
// get batched together into the same RPCs -- this means that latency-sensitive
// clients can run through the same KuduClient object as throughput-oriented
// clients, perhaps by setting the latency-sensitive session's timeouts low and
// priorities high. Without the separation of batches, a latency-sensitive
// single-row insert might get batched along with 10MB worth of inserts from the
// batch writer, thus delaying the response significantly.
//
// Though we currently do not have transactional support, users will be forced
// to use a KuduSession to instantiate reads as well as writes. This will make
// it more straight-forward to add RW transactions in the future without
// significant modifications to the API.
//
// Users who are familiar with the Hibernate ORM framework should find this
// concept of a Session familiar.
//
// This class is not thread-safe except where otherwise specified.
class KUDU_EXPORT KuduSession : public sp::enable_shared_from_this<KuduSession> {
public:
~KuduSession();
enum FlushMode {
// Every write will be sent to the server in-band with the Apply()
// call. No batching will occur. This is the default flush mode. In this
// mode, the Flush() call never has any effect, since each Apply() call
// has already flushed the buffer. This is the default flush mode.
AUTO_FLUSH_SYNC,
// Apply() calls will return immediately, but the writes will be sent in
// the background, potentially batched together with other writes from
// the same session. If there is not sufficient buffer space, then Apply()
// may block for buffer space to be available.
//
// Because writes are applied in the background, any errors will be stored
// in a session-local buffer. Call CountPendingErrors() or GetPendingErrors()
// to retrieve them.
// TODO: provide an API for the user to specify a callback to do their own
// error reporting.
// TODO: specify which threads the background activity runs on (probably the
// messenger IO threads?)
//
// NOTE: This is not implemented yet, see KUDU-456.
//
// The Flush() call can be used to block until the buffer is empty.
AUTO_FLUSH_BACKGROUND,
// Apply() calls will return immediately, and the writes will not be
// sent until the user calls Flush(). If the buffer runs past the
// configured space limit, then Apply() will return an error.
MANUAL_FLUSH
};
// Set the flush mode.
// REQUIRES: there should be no pending writes -- call Flush() first to ensure.
Status SetFlushMode(FlushMode m) WARN_UNUSED_RESULT;
// The possible external consistency modes on which Kudu operates.
enum ExternalConsistencyMode {
// The response to any write will contain a timestamp. Any further calls from the same
// client to other servers will update those servers with that timestamp. Following
// write operations from the same client will be assigned timestamps that are strictly
// higher, enforcing external consistency without having to wait or incur any latency
// penalties.
//
// In order to maintain external consistency for writes between two different clients
// in this mode, the user must forward the timestamp from the first client to the
// second by using KuduClient::GetLatestObservedTimestamp() and
// KuduClient::SetLatestObservedTimestamp().
//
// WARNING: Failure to propagate timestamp information through back-channels between
// two different clients will negate any external consistency guarantee under this
// mode.
//
// This is the default mode.
CLIENT_PROPAGATED,
// The server will guarantee that write operations from the same or from other client
// are externally consistent, without the need to propagate timestamps across clients.
// This is done by making write operations wait until there is certainty that all
// follow up write operations (operations that start after the previous one finishes)
// will be assigned a timestamp that is strictly higher, enforcing external consistency.
//
// WARNING: Depending on the clock synchronization state of TabletServers this may
// imply considerable latency. Moreover operations in COMMIT_WAIT external consistency
// mode will outright fail if TabletServer clocks are either unsynchronized or
// synchronized but with a maximum error which surpasses a pre-configured threshold.
COMMIT_WAIT
};
// Set the new external consistency mode for this session.
Status SetExternalConsistencyMode(ExternalConsistencyMode m) WARN_UNUSED_RESULT;
// Set the amount of buffer space used by this session for outbound writes.
// The effect of the buffer size varies based on the flush mode of the
// session:
//
// AUTO_FLUSH_SYNC:
// since no buffering is done, this has no effect
// AUTO_FLUSH_BACKGROUND:
// if the buffer space is exhausted, then write calls will block until there
// is space available in the buffer.
// MANUAL_FLUSH:
// if the buffer space is exhausted, then write calls will return an error.
Status SetMutationBufferSpace(size_t size) WARN_UNUSED_RESULT;
// Set the timeout for writes made in this session.
void SetTimeoutMillis(int millis);
// TODO: add "doAs" ability here for proxy servers to be able to act on behalf of
// other users, assuming access rights.
// Apply the write operation. Transfers the write_op's ownership to the KuduSession.
//
// The behavior of this function depends on the current flush mode. Regardless
// of flush mode, however, Apply may begin to perform processing in the background
// for the call (e.g looking up the tablet, etc). Given that, an error may be
// queued into the PendingErrors structure prior to flushing, even in MANUAL_FLUSH
// mode.
//
// In case of any error, which may occur during flushing or because the write_op
// is malformed, the write_op is stored in the session's error collector which
// may be retrieved at any time.
//
// This is thread safe.
Status Apply(KuduWriteOperation* write_op) WARN_UNUSED_RESULT;
// Similar to the above, except never blocks. Even in the flush modes that
// return immediately, 'cb' is triggered with the result. The callback may be
// called by a reactor thread, or in some cases may be called inline by the
// same thread which calls ApplyAsync(). 'cb' must remain valid until it called.
//
// TODO: not yet implemented.
void ApplyAsync(KuduWriteOperation* write_op, KuduStatusCallback* cb);
// Flush any pending writes.
//
// Returns a bad status if there are any pending errors after the rows have
// been flushed. Callers should then use GetPendingErrors to determine which
// specific operations failed.
//
// In AUTO_FLUSH_SYNC mode, this has no effect, since every Apply() call flushes
// itself inline.
//
// In the case that the async version of this method is used, then the callback
// will be called upon completion of the operations which were buffered since the
// last flush. In other words, in the following sequence:
//
// session->Insert(a);
// session->FlushAsync(callback_1);
// session->Insert(b);
// session->FlushAsync(callback_2);
//
// ... 'callback_2' will be triggered once 'b' has been inserted, regardless of whether
// 'a' has completed or not.
//
// Note that this also means that, if FlushAsync is called twice in succession, with
// no intervening operations, the second flush will return immediately. For example:
//
// session->Insert(a);
// session->FlushAsync(callback_1); // called when 'a' is inserted
// session->FlushAsync(callback_2); // called immediately!
//
// Note that, as in all other async functions in Kudu, the callback may be called
// either from an IO thread or the same thread which calls FlushAsync. The callback
// should not block.
//
// For FlushAsync, 'cb' must remain valid until it is invoked.
//
// This function is thread-safe.
Status Flush() WARN_UNUSED_RESULT;
void FlushAsync(KuduStatusCallback* cb);
// Close the session.
// Returns an error if there are unflushed or in-flight operations.
Status Close() WARN_UNUSED_RESULT;
// Return true if there are operations which have not yet been delivered to the
// cluster. This may include buffered operations (i.e those that have not yet been
// flushed) as well as in-flight operations (i.e those that are in the process of
// being sent to the servers).
// TODO: maybe "incomplete" or "undelivered" is clearer?
//
// This function is thread-safe.
bool HasPendingOperations() const;
// Return the number of buffered operations. These are operations that have
// not yet been flushed - i.e they are not en-route yet.
//
// Note that this is different than HasPendingOperations() above, which includes
// operations which have been sent and not yet responded to.
//
// This is only relevant in MANUAL_FLUSH mode, where the result will not
// decrease except for after a manual Flush, after which point it will be 0.
// In the other flush modes, data is immediately put en-route to the destination,
// so this will return 0.
//
// This function is thread-safe.
int CountBufferedOperations() const;
// Return the number of errors which are pending. Errors may accumulate when
// using the AUTO_FLUSH_BACKGROUND mode.
//
// This function is thread-safe.
int CountPendingErrors() const;
// Return any errors from previous calls. If there were more errors
// than could be held in the session's error storage, then sets *overflowed to true.
//
// Caller takes ownership of the returned errors.
//
// This function is thread-safe.
void GetPendingErrors(std::vector<KuduError*>* errors, bool* overflowed);
KuduClient* client() const;
private:
class KUDU_NO_EXPORT Data;
friend class KuduClient;
friend class internal::Batcher;
explicit KuduSession(const sp::shared_ptr<KuduClient>& client);
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduSession);
};
// A single scanner. This class is not thread-safe, though different
// scanners on different threads may share a single KuduTable object.
class KUDU_EXPORT KuduScanner {
public:
// The possible read modes for scanners.
enum ReadMode {
// When READ_LATEST is specified the server will always return committed writes at
// the time the request was received. This type of read does not return a snapshot
// timestamp and is not repeatable.
//
// In ACID terms this corresponds to Isolation mode: "Read Committed"
//
// This is the default mode.
READ_LATEST,
// When READ_AT_SNAPSHOT is specified the server will attempt to perform a read
// at the provided timestamp. If no timestamp is provided the server will take the
// current time as the snapshot timestamp. In this mode reads are repeatable, i.e.
// all future reads at the same timestamp will yield the same data. This is
// performed at the expense of waiting for in-flight transactions whose timestamp
// is lower than the snapshot's timestamp to complete, so it might incur a latency
// penalty.
//
// In ACID terms this, by itself, corresponds to Isolation mode "Repeatable
// Read". If all writes to the scanned tablet are made externally consistent,
// then this corresponds to Isolation mode "Strict-Serializable".
//
// Note: there currently "holes", which happen in rare edge conditions, by which writes
// are sometimes not externally consistent even when action was taken to make them so.
// In these cases Isolation may degenerate to mode "Read Committed". See KUDU-430.
READ_AT_SNAPSHOT
};
// Whether the rows should be returned in order. This affects the fault-tolerance properties
// of a scanner.
enum OrderMode {
// Rows will be returned in an arbitrary order determined by the tablet server.
// This is efficient, but unordered scans are not fault-tolerant and cannot be resumed
// in the case of tablet server failure.
//
// This is the default mode.
UNORDERED,
// Rows will be returned ordered by primary key. Sorting the rows imposes additional overhead
// on the tablet server, but means that scans are fault-tolerant and will be resumed at
// another tablet server in the case of failure.
ORDERED
};
// Default scanner timeout.
// This is set to 3x the default RPC timeout (see KuduClientBuilder::default_rpc_timeout()).
enum { kScanTimeoutMillis = 15000 };
// Initialize the scanner. The given 'table' object must remain valid
// for the lifetime of this scanner object.
// TODO: should table be a const pointer?
explicit KuduScanner(KuduTable* table);
~KuduScanner();
// Set the projection used for this scanner by passing the column names to read.
//
// This overrides any previous call to SetProjectedColumns.
Status SetProjectedColumnNames(const std::vector<std::string>& col_names) WARN_UNUSED_RESULT;
// Set the projection used for this scanner by passing the column indexes to read.
//
// This overrides any previous call to SetProjectedColumns/SetProjectedColumnIndexes.
Status SetProjectedColumnIndexes(const std::vector<int>& col_indexes) WARN_UNUSED_RESULT;
// DEPRECATED: See SetProjectedColumnNames
Status SetProjectedColumns(const std::vector<std::string>& col_names) WARN_UNUSED_RESULT;
// Add a predicate to this scanner.
//
// The predicates act as conjunctions -- i.e, they all must pass for
// a row to be returned.
//
// The Scanner takes ownership of 'pred', even if a bad Status is returned.
Status AddConjunctPredicate(KuduPredicate* pred) WARN_UNUSED_RESULT;
// Add a lower bound (inclusive) primary key for the scan.
// If any bound is already added, this bound is intersected with that one.
//
// The scanner does not take ownership of 'key'; the caller may free it afterward.
Status AddLowerBound(const KuduPartialRow& key);
// Like AddLowerBound(), but the encoded primary key is an opaque slice of data
// obtained elsewhere.
//
// DEPRECATED: use AddLowerBound
Status AddLowerBoundRaw(const Slice& key);
// Add an upper bound (exclusive) primary key for the scan.
// If any bound is already added, this bound is intersected with that one.
//
// The scanner makes a copy of 'key'; the caller may free it afterward.
Status AddExclusiveUpperBound(const KuduPartialRow& key);
// Like AddExclusiveUpperBound(), but the encoded primary key is an opaque slice of data
// obtained elsewhere.
//
// DEPRECATED: use AddExclusiveUpperBound
Status AddExclusiveUpperBoundRaw(const Slice& key);
// Add a lower bound (inclusive) partition key for the scan.
//
// The scanner makes a copy of 'partition_key'; the caller may free it afterward.
//
// This method is unstable, and for internal use only.
Status AddLowerBoundPartitionKeyRaw(const Slice& partition_key);
// Add an upper bound (exclusive) partition key for the scan.
//
// The scanner makes a copy of 'partition_key'; the caller may free it afterward.
//
// This method is unstable, and for internal use only.
Status AddExclusiveUpperBoundPartitionKeyRaw(const Slice& partition_key);
// Set the block caching policy for this scanner. If true, scanned data blocks will be cached
// in memory and made available for future scans. Default is true.
Status SetCacheBlocks(bool cache_blocks);
// Begin scanning.
Status Open();
// Keeps the current remote scanner alive on the Tablet server for an additional
// time-to-live (set by a configuration flag on the tablet server).
// This is useful if the interval in between NextBatch() calls is big enough that the
// remote scanner might be garbage collected (default ttl is set to 60 secs.).
// This does not invalidate any previously fetched results.
// This returns a non-OK status if the scanner was already garbage collected or if
// the TabletServer was unreachable, for any reason.
//
// NOTE: A non-OK status returned by this method should not be taken as indication that
// the scan has failed. Subsequent calls to NextBatch() might still be successful,
// particularly if SetFaultTolerant() was called.
Status KeepAlive();
// Close the scanner.
// This releases resources on the server.
//
// This call does not block, and will not ever fail, even if the server
// cannot be contacted.
//
// NOTE: the scanner is reset to its initial state by this function.
// You'll have to re-add any projection, predicates, etc if you want
// to reuse this Scanner object.
void Close();
// Return true if there may be rows to be fetched from this scanner.
//
// Note: will be true provided there's at least one more tablet left to
// scan, even if that tablet has no data (we'll only know once we scan it).
// It will also be true after the initially opening the scanner before
// NextBatch is called for the first time.
bool HasMoreRows() const;
// Clears 'rows' and populates it with the next batch of rows from the tablet server.
// A call to NextBatch() invalidates all previously fetched results which might
// now be pointing to garbage memory.
//
// DEPRECATED: Use NextBatch(KuduScanBatch*) instead.
Status NextBatch(std::vector<KuduRowResult>* rows);
// Fetches the next batch of results for this scanner.
//
// A single KuduScanBatch instance may be reused. Each subsequent call replaces the data
// from the previous call, and invalidates any KuduScanBatch::RowPtr objects previously
// obtained from the batch.
Status NextBatch(KuduScanBatch* batch);
// Get the KuduTabletServer that is currently handling the scan.
// More concretely, this is the server that handled the most recent Open or NextBatch
// RPC made by the server.
Status GetCurrentServer(KuduTabletServer** server);
// Set the hint for the size of the next batch in bytes.
// If setting to 0 before calling Open(), it means that the first call
// to the tablet server won't return data.
Status SetBatchSizeBytes(uint32_t batch_size);
// Sets the replica selection policy while scanning.
//
// TODO: kill this in favor of a consistency-level-based API
Status SetSelection(KuduClient::ReplicaSelection selection) WARN_UNUSED_RESULT;
// Sets the ReadMode. Default is READ_LATEST.
Status SetReadMode(ReadMode read_mode) WARN_UNUSED_RESULT;
// DEPRECATED: use SetFaultTolerant.
Status SetOrderMode(OrderMode order_mode) WARN_UNUSED_RESULT;
// Scans are by default non fault-tolerant, and scans will fail if scanning an
// individual tablet fails (for example, if a tablet server crashes in the
// middle of a tablet scan).
//
// If this method is called, the scan will be resumed at another tablet server
// in the case of failure.
//
// Fault tolerant scans typically have lower throughput than non
// fault-tolerant scans. Fault tolerant scans use READ_AT_SNAPSHOT mode,
// if no snapshot timestamp is provided, the server will pick one.
Status SetFaultTolerant() WARN_UNUSED_RESULT;
// Sets the snapshot timestamp, in microseconds since the epoch, for scans in
// READ_AT_SNAPSHOT mode.
Status SetSnapshotMicros(uint64_t snapshot_timestamp_micros) WARN_UNUSED_RESULT;
// Sets the snapshot timestamp in raw encoded form (i.e. as returned by a
// previous call to a server), for scans in READ_AT_SNAPSHOT mode.
Status SetSnapshotRaw(uint64_t snapshot_timestamp) WARN_UNUSED_RESULT;
// Sets the maximum time that Open() and NextBatch() are allowed to take.
Status SetTimeoutMillis(int millis);
// Returns the schema of the projection being scanned.
KuduSchema GetProjectionSchema() const;
// Returns a string representation of this scan.
std::string ToString() const;
private:
class KUDU_NO_EXPORT Data;
FRIEND_TEST(ClientTest, TestScanCloseProxy);
FRIEND_TEST(ClientTest, TestScanFaultTolerance);
FRIEND_TEST(ClientTest, TestScanNoBlockCaching);
FRIEND_TEST(ClientTest, TestScanTimeout);
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduScanner);
};
// In-memory representation of a remote tablet server.
class KUDU_EXPORT KuduTabletServer {
public:
~KuduTabletServer();
// Returns the UUID of this tablet server. Is globally unique and
// guaranteed not to change for the lifetime of the tablet server.
const std::string& uuid() const;
// Returns the hostname of the first RPC address that this tablet server
// is listening on.
const std::string& hostname() const;
private:
class KUDU_NO_EXPORT Data;
friend class KuduClient;
friend class KuduScanner;
KuduTabletServer();
// Owned.
Data* data_;
DISALLOW_COPY_AND_ASSIGN(KuduTabletServer);
};
} // namespace client
} // namespace kudu
#endif