src/kudu/client/client.h - kudu - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 #ifndef KUDU_CLIENT_CLIENT_H
 #define KUDU_CLIENT_CLIENT_H

 #include <stdint.h>
 #include <string>
 #include <vector>

 #include "kudu/client/row_result.h"
 #include "kudu/client/scan_batch.h"
 #include "kudu/client/scan_predicate.h"
 #include "kudu/client/schema.h"
 #include "kudu/client/shared_ptr.h"
 #ifdef KUDU_HEADERS_NO_STUBS
 #include <gtest/gtest_prod.h>
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/port.h"
 #else
 #include "kudu/client/stubs.h"
 #endif
 #include "kudu/client/write_op.h"
 #include "kudu/util/kudu_export.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/status.h"

 #if _GLIBCXX_USE_CXX11_ABI
 #error \
   "Kudu will not function properly if built with gcc 5's new ABI. " \
   "Please modify your application to set -D_GLIBCXX_USE_CXX11_ABI=0. " \
   "For more information about the new ABI, see " \
   "https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html."
 #endif

 namespace kudu {

 class LinkedListTester;
 class PartitionSchema;

 namespace client {

 class KuduLoggingCallback;
 class KuduSession;
 class KuduStatusCallback;
 class KuduTable;
 class KuduTableAlterer;
 class KuduTableCreator;
 class KuduTabletServer;
 class KuduValue;
 class KuduWriteOperation;

 namespace internal {
 class Batcher;
 class GetTableSchemaRpc;
 class LookupRpc;
 class MetaCache;
 class RemoteTablet;
 class RemoteTabletServer;
 class WriteRpc;
 } // namespace internal

 // Installs a callback for internal client logging. It is invoked for a
 // log event of any severity, across any KuduClient instance.
 //
 // Only the first invocation has any effect; subsequent invocations are
 // a no-op. The caller must ensure that 'cb' stays alive until
 // UninstallLoggingCallback() is called.
 //
 // Before a callback is registered, all internal client log events are
 // logged to stderr.
 void KUDU_EXPORT InstallLoggingCallback(KuduLoggingCallback* cb);

 // Removes a callback installed via InstallLoggingCallback().
 //
 // Only the first invocation has any effect; subsequent invocations are
 // a no-op.
 //
 // Should be called before unloading the client library.
 void KUDU_EXPORT UninstallLoggingCallback();

 // Set the logging verbosity of the client library. By default, this is 0. Logs become
 // progressively more verbose as the level is increased. Empirically, the highest
 // verbosity level used in Kudu is 6, which includes very fine-grained tracing
 // information. Most useful logging is enabled at level 1 or 2, with the higher levels
 // used only in rare circumstances.
 //
 // Logs are emitted to stderr, or to the configured log callback at SEVERITY_INFO.
 //
 // This may be called safely at any point during usage of the library.
 void KUDU_EXPORT SetVerboseLogLevel(int level);

 // The Kudu client library uses signals internally in some cases. By default, it uses
 // SIGUSR2. If your application makes use of SIGUSR2, this advanced API can help
 // workaround conflicts.
 Status KUDU_EXPORT SetInternalSignalNumber(int signum);

 // Return a single-version string identifying the Kudu client.
 std::string KUDU_EXPORT GetShortVersionString();

 // Return a longer multi-line version string identifying the client, including
 // build time, etc.
 std::string KUDU_EXPORT GetAllVersionInfo();

 // Creates a new KuduClient with the desired options.
 //
 // Note that KuduClients are shared amongst multiple threads and, as such,
 // are stored in shared pointers.
 class KUDU_EXPORT KuduClientBuilder {
  public:
   KuduClientBuilder();
   ~KuduClientBuilder();

   KuduClientBuilder& clear_master_server_addrs();

   // Add RPC addresses of multiple masters.
   KuduClientBuilder& master_server_addrs(const std::vector<std::string>& addrs);

   // Add an RPC address of a master. At least one master is required.
   KuduClientBuilder& add_master_server_addr(const std::string& addr);

   // The default timeout used for administrative operations (e.g. CreateTable,
   // AlterTable, ...). Optional.
   //
   // If not provided, defaults to 10s.
   KuduClientBuilder& default_admin_operation_timeout(const MonoDelta& timeout);

   // The default timeout for individual RPCs. Optional.
   //
   // If not provided, defaults to 5s.
   KuduClientBuilder& default_rpc_timeout(const MonoDelta& timeout);

   // Creates the client.
   //
   // The return value may indicate an error in the create operation, or a
   // misuse of the builder; in the latter case, only the last error is
   // returned.
   Status Build(sp::shared_ptr<KuduClient>* client);
  private:
   class KUDU_NO_EXPORT Data;

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduClientBuilder);
 };

 // The KuduClient represents a connection to a cluster. From the user
 // perspective, they should only need to create one of these in their
 // application, likely a singleton -- but it's not a singleton in Kudu in any
 // way. Different Client objects do not interact with each other -- no
 // connection pooling, etc. Each KuduClient instance is sandboxed with no
 // global cross-client state.
 //
 // In the implementation, the client holds various pieces of common
 // infrastructure which is not table-specific:
 //
 // - RPC messenger: reactor threads and RPC connections are pooled here
 // - Authentication: the client is initialized with some credentials, and
 //   all accesses through it share those credentials.
 // - Caches: caches of table schemas, tablet locations, tablet server IP
 //   addresses, etc are shared per-client.
 //
 // In order to actually access data on the cluster, callers must first
 // create a KuduSession object using NewSession(). A KuduClient may
 // have several associated sessions.
 //
 // TODO: Cluster administration functions are likely to be in this class
 // as well.
 //
 // This class is thread-safe.
 class KUDU_EXPORT KuduClient : public sp::enable_shared_from_this<KuduClient> {
  public:
   ~KuduClient();

   // Creates a KuduTableCreator; it is the caller's responsibility to free it.
   KuduTableCreator* NewTableCreator();

   // set 'create_in_progress' to true if a CreateTable operation is in-progress
   Status IsCreateTableInProgress(const std::string& table_name,
                                  bool *create_in_progress);

   Status DeleteTable(const std::string& table_name);

   // Creates a KuduTableAlterer; it is the caller's responsibility to free it.
   KuduTableAlterer* NewTableAlterer(const std::string& table_name);

   // set 'alter_in_progress' to true if an AlterTable operation is in-progress
   Status IsAlterTableInProgress(const std::string& table_name,
                                 bool *alter_in_progress);

   Status GetTableSchema(const std::string& table_name,
                         KuduSchema* schema);

   Status ListTabletServers(std::vector<KuduTabletServer*>* tablet_servers);

   // List only those tables whose names pass a substring match on 'filter'.
   //
   // 'tables' is appended to only on success.
   Status ListTables(std::vector<std::string>* tables,
                     const std::string& filter = "");

   // Check if the table given by 'table_name' exists.
   //
   // 'exists' is set only on success.
   Status TableExists(const std::string& table_name, bool* exists);

   // Open the table with the given name. If the table has not been opened before
   // in this client, this will do an RPC to ensure that the table exists and
   // look up its schema.
   //
   // TODO: should we offer an async version of this as well?
   // TODO: probably should have a configurable timeout in KuduClientBuilder?
   Status OpenTable(const std::string& table_name,
                    sp::shared_ptr<KuduTable>* table);

   // Create a new session for interacting with the cluster.
   // User is responsible for destroying the session object.
   // This is a fully local operation (no RPCs or blocking).
   sp::shared_ptr<KuduSession> NewSession();

   // Policy with which to choose amongst multiple replicas.
   enum ReplicaSelection {
     // Select the LEADER replica.
     LEADER_ONLY,

     // Select the closest replica to the client, or a random one if all
     // replicas are equidistant.
     CLOSEST_REPLICA,

     // Select the first replica in the list.
     FIRST_REPLICA
   };

   bool IsMultiMaster() const;

   const MonoDelta& default_admin_operation_timeout() const;
   const MonoDelta& default_rpc_timeout() const;

   // Value for the latest observed timestamp when none has been observed or set.
   static const uint64_t kNoTimestamp;

   // Returns highest HybridTime timestamp observed by the client.
   // The latest observed timestamp can be used to start a snapshot scan on a
   // table which is guaranteed to contain all data written or previously read by
   // this client. See KuduScanner for more details on timestamps.
   uint64_t GetLatestObservedTimestamp() const;

   // Sets the latest observed HybridTime timestamp, encoded in the HybridTime format.
   // This is only useful when forwarding timestamps between clients to enforce
   // external consistency when using KuduSession::CLIENT_PROPAGATED external consistency
   // mode.
   // To use this the user must obtain the HybridTime encoded timestamp from the first
   // client with KuduClient::GetLatestObservedTimestamp() and the set it in the new
   // client with this method.
   void SetLatestObservedTimestamp(uint64_t ht_timestamp);

  private:
   class KUDU_NO_EXPORT Data;

   friend class KuduClientBuilder;
   friend class KuduScanner;
   friend class KuduTable;
   friend class KuduTableAlterer;
   friend class KuduTableCreator;
   friend class internal::Batcher;
   friend class internal::GetTableSchemaRpc;
   friend class internal::LookupRpc;
   friend class internal::MetaCache;
   friend class internal::RemoteTablet;
   friend class internal::RemoteTabletServer;
   friend class internal::WriteRpc;

   FRIEND_TEST(ClientTest, TestGetTabletServerBlacklist);
   FRIEND_TEST(ClientTest, TestMasterDown);
   FRIEND_TEST(ClientTest, TestMasterLookupPermits);
   FRIEND_TEST(ClientTest, TestReplicatedMultiTabletTableFailover);
   FRIEND_TEST(ClientTest, TestReplicatedTabletWritesWithLeaderElection);
   FRIEND_TEST(ClientTest, TestScanFaultTolerance);
   FRIEND_TEST(ClientTest, TestScanTimeout);
   FRIEND_TEST(ClientTest, TestWriteWithDeadMaster);
   FRIEND_TEST(MasterFailoverTest, TestPauseAfterCreateTableIssued);

   KuduClient();

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduClient);
 };

 // Creates a new table with the desired options.
 class KUDU_EXPORT KuduTableCreator {
  public:
   ~KuduTableCreator();

   // Sets the name to give the table. It is copied. Required.
   KuduTableCreator& table_name(const std::string& name);

   // Sets the schema with which to create the table. Must remain valid for
   // the lifetime of the builder. Required.
   KuduTableCreator& schema(const KuduSchema* schema);

   // Adds a set of hash partitions to the table.
   //
   // For each set of hash partitions added to the table, the total number of
   // table partitions is multiplied by the number of buckets. For example, if a
   // table is created with 3 split rows, and two hash partitions with 4 and 5
   // buckets respectively, the total number of table partitions will be 80
   // (4 range partitions * 4 hash buckets * 5 hash buckets).
   KuduTableCreator& add_hash_partitions(const std::vector<std::string>& columns,
                                         int32_t num_buckets);

   // Adds a set of hash partitions to the table.
   //
   // This constructor takes a seed value, which can be used to randomize the
   // mapping of rows to hash buckets. Setting the seed may provide some
   // amount of protection against denial of service attacks when the hashed
   // columns contain user provided values.
   KuduTableCreator& add_hash_partitions(const std::vector<std::string>& columns,
                                         int32_t num_buckets, int32_t seed);

   // Sets the columns on which the table will be range-partitioned.
   //
   // Every column must be a part of the table's primary key. If not set, the
   // table will be created with the primary-key columns as the range-partition
   // columns. If called with an empty vector, the table will be created without
   // range partitioning.
   //
   // Optional.
   KuduTableCreator& set_range_partition_columns(const std::vector<std::string>& columns);

   // Sets the rows on which to pre-split the table.
   // The table creator takes ownership of the rows.
   //
   // If any provided row is missing a value for any of the range partition
   // columns, the logical minimum value for that column type will be used by
   // default.
   //
   // If not provided, no range-based pre-splitting is performed.
   //
   // Optional.
   KuduTableCreator& split_rows(const std::vector<const KuduPartialRow*>& split_rows);

   // Sets the number of replicas for each tablet in the table.
   // This should be an odd number. Optional.
   //
   // If not provided (or if <= 0), falls back to the server-side default.
   KuduTableCreator& num_replicas(int n_replicas);

   // Set the timeout for the operation. This includes any waiting
   // after the create has been submitted (i.e if the create is slow
   // to be performed for a large table, it may time out and then
   // later be successful).
   KuduTableCreator& timeout(const MonoDelta& timeout);

   // Wait for the table to be fully created before returning.
   // Optional.
   //
   // If not provided, defaults to true.
   KuduTableCreator& wait(bool wait);

   // Creates the table.
   //
   // The return value may indicate an error in the create table operation,
   // or a misuse of the builder; in the latter case, only the last error is
   // returned.
   Status Create();
  private:
   class KUDU_NO_EXPORT Data;

   friend class KuduClient;

   explicit KuduTableCreator(KuduClient* client);

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduTableCreator);
 };

 // A KuduTable represents a table on a particular cluster. It holds the current
 // schema of the table. Any given KuduTable instance belongs to a specific KuduClient
 // instance.
 //
 // Upon construction, the table is looked up in the catalog (or catalog cache),
 // and the schema fetched for introspection.
 //
 // This class is thread-safe.
 class KUDU_EXPORT KuduTable : public sp::enable_shared_from_this<KuduTable> {
  public:
   ~KuduTable();

   const std::string& name() const;

   // Return the table's ID. This is an internal identifier which uniquely
   // identifies a table. If the table is deleted and recreated with the same
   // name, the ID will distinguish the old table from the new.
   const std::string& id() const;

   const KuduSchema& schema() const;

   // Create a new write operation for this table. It is the caller's
   // responsibility to free it, unless it is passed to KuduSession::Apply().
   KuduInsert* NewInsert();
   KuduUpdate* NewUpdate();
   KuduDelete* NewDelete();

   // Create a new comparison predicate which can be used for scanners
   // on this table.
   //
   // The type of 'value' must correspond to the type of the column to which
   // the predicate is to be applied. For example, if the given column is
   // any type of integer, the KuduValue should also be an integer, with its
   // value in the valid range for the column type. No attempt is made to cast
   // between floating point and integer values, or numeric and string values.
   //
   // The caller owns the result until it is passed into KuduScanner::AddConjunctPredicate().
   // The returned predicate takes ownership of 'value'.
   //
   // In the case of an error (e.g. an invalid column name), a non-NULL value
   // is still returned. The error will be returned when attempting to add this
   // predicate to a KuduScanner.
   KuduPredicate* NewComparisonPredicate(const Slice& col_name,
                                         KuduPredicate::ComparisonOp op,
                                         KuduValue* value);

   KuduClient* client() const;

   const PartitionSchema& partition_schema() const;

  private:
   class KUDU_NO_EXPORT Data;

   friend class KuduClient;

   KuduTable(const sp::shared_ptr<KuduClient>& client,
             const std::string& name,
             const std::string& table_id,
             const KuduSchema& schema,
             const PartitionSchema& partition_schema);

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduTable);
 };

 // Alters an existing table based on the provided steps.
 //
 // Sample usage:
 //   KuduTableAlterer* alterer = client->NewTableAlterer("table-name");
 //   alterer->AddColumn("foo")->Type(KuduColumnSchema::INT32)->NotNull();
 //   alterer->AlterColumn("bar")->Compression(KuduColumnStorageAttributes::LZ4);
 //   Status s = alterer->Alter();
 //   delete alterer;
 class KUDU_EXPORT KuduTableAlterer {
  public:
   ~KuduTableAlterer();

   // Renames the table.
   KuduTableAlterer* RenameTo(const std::string& new_name);

   // Adds a new column to the table.
   //
   // When adding a column, you must specify the default value of the new
   // column using KuduColumnSpec::DefaultValue(...).
   KuduColumnSpec* AddColumn(const std::string& name);

   // Alter an existing column.
   KuduColumnSpec* AlterColumn(const std::string& name);

   // Drops an existing column from the table.
   KuduTableAlterer* DropColumn(const std::string& name);

   // Set the timeout for the operation. This includes any waiting
   // after the alter has been submitted (i.e if the alter is slow
   // to be performed on a large table, it may time out and then
   // later be successful).
   KuduTableAlterer* timeout(const MonoDelta& timeout);

   // Wait for the table to be fully altered before returning.
   //
   // If not provided, defaults to true.
   KuduTableAlterer* wait(bool wait);

   // Alters the table.
   //
   // The return value may indicate an error in the alter operation, or a
   // misuse of the builder (e.g. add_column() with default_value=NULL); in
   // the latter case, only the last error is returned.
   Status Alter();

  private:
   class KUDU_NO_EXPORT Data;
   friend class KuduClient;

   KuduTableAlterer(KuduClient* client,
                    const std::string& name);

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduTableAlterer);
 };

 // An error which occurred in a given operation. This tracks the operation
 // which caused the error, along with whatever the actual error was.
 class KUDU_EXPORT KuduError {
  public:
   ~KuduError();

   // Return the actual error which occurred.
   const Status& status() const;

   // Return the operation which failed.
   const KuduWriteOperation& failed_op() const;

   // Release the operation that failed. The caller takes ownership. Must only
   // be called once.
   KuduWriteOperation* release_failed_op();

   // In some cases, it's possible that the server did receive and successfully
   // perform the requested operation, but the client can't tell whether or not
   // it was successful. For example, if the call times out, the server may still
   // succeed in processing at a later time.
   //
   // This function returns true if there is some chance that the server did
   // process the operation, and false if it can guarantee that the operation
   // did not succeed.
   bool was_possibly_successful() const;

  private:
   class KUDU_NO_EXPORT Data;

   friend class internal::Batcher;
   friend class KuduSession;

   KuduError(KuduWriteOperation* failed_op, const Status& error);

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduError);
 };


 // A KuduSession belongs to a specific KuduClient, and represents a context in
 // which all read/write data access should take place. Within a session,
 // multiple operations may be accumulated and batched together for better
 // efficiency. Settings like timeouts, priorities, and trace IDs are also set
 // per session.
 //
 // A KuduSession's main purpose is for grouping together multiple data-access
 // operations together into batches or transactions. It is important to note
 // the distinction between these two:
 //
 // * A batch is a set of operations which are grouped together in order to
 //   amortize fixed costs such as RPC call overhead and round trip times.
 //   A batch DOES NOT imply any ACID-like guarantees. Within a batch, some
 //   operations may succeed while others fail, and concurrent readers may see
 //   partial results. If the client crashes mid-batch, it is possible that some
 //   of the operations will be made durable while others were lost.
 //
 // * In contrast, a transaction is a set of operations which are treated as an
 //   indivisible semantic unit, per the usual definitions of database transactions
 //   and isolation levels.
 //
 // NOTE: Kudu does not currently support transactions! They are only mentioned
 // in the above documentation to clarify that batches are not transactional and
 // should only be used for efficiency.
 //
 // KuduSession is separate from KuduTable because a given batch or transaction
 // may span multiple tables. This is particularly important in the future when
 // we add ACID support, but even in the context of batching, we may be able to
 // coalesce writes to different tables hosted on the same server into the same
 // RPC.
 //
 // KuduSession is separate from KuduClient because, in a multi-threaded
 // application, different threads may need to concurrently execute
 // transactions. Similar to a JDBC "session", transaction boundaries will be
 // delineated on a per-session basis -- in between a "BeginTransaction" and
 // "Commit" call on a given session, all operations will be part of the same
 // transaction. Meanwhile another concurrent Session object can safely run
 // non-transactional work or other transactions without interfering.
 //
 // Additionally, there is a guarantee that writes from different sessions do not
 // get batched together into the same RPCs -- this means that latency-sensitive
 // clients can run through the same KuduClient object as throughput-oriented
 // clients, perhaps by setting the latency-sensitive session's timeouts low and
 // priorities high. Without the separation of batches, a latency-sensitive
 // single-row insert might get batched along with 10MB worth of inserts from the
 // batch writer, thus delaying the response significantly.
 //
 // Though we currently do not have transactional support, users will be forced
 // to use a KuduSession to instantiate reads as well as writes.  This will make
 // it more straight-forward to add RW transactions in the future without
 // significant modifications to the API.
 //
 // Users who are familiar with the Hibernate ORM framework should find this
 // concept of a Session familiar.
 //
 // This class is not thread-safe except where otherwise specified.
 class KUDU_EXPORT KuduSession : public sp::enable_shared_from_this<KuduSession> {
  public:
   ~KuduSession();

   enum FlushMode {
     // Every write will be sent to the server in-band with the Apply()
     // call. No batching will occur. This is the default flush mode. In this
     // mode, the Flush() call never has any effect, since each Apply() call
     // has already flushed the buffer. This is the default flush mode.
     AUTO_FLUSH_SYNC,

     // Apply() calls will return immediately, but the writes will be sent in
     // the background, potentially batched together with other writes from
     // the same session. If there is not sufficient buffer space, then Apply()
     // may block for buffer space to be available.
     //
     // Because writes are applied in the background, any errors will be stored
     // in a session-local buffer. Call CountPendingErrors() or GetPendingErrors()
     // to retrieve them.
     // TODO: provide an API for the user to specify a callback to do their own
     // error reporting.
     // TODO: specify which threads the background activity runs on (probably the
     // messenger IO threads?)
     //
     // NOTE: This is not implemented yet, see KUDU-456.
     //
     // The Flush() call can be used to block until the buffer is empty.
     AUTO_FLUSH_BACKGROUND,

     // Apply() calls will return immediately, and the writes will not be
     // sent until the user calls Flush(). If the buffer runs past the
     // configured space limit, then Apply() will return an error.
     MANUAL_FLUSH
   };

   // Set the flush mode.
   // REQUIRES: there should be no pending writes -- call Flush() first to ensure.
   Status SetFlushMode(FlushMode m) WARN_UNUSED_RESULT;

   // The possible external consistency modes on which Kudu operates.
   enum ExternalConsistencyMode {
     // The response to any write will contain a timestamp. Any further calls from the same
     // client to other servers will update those servers with that timestamp. Following
     // write operations from the same client will be assigned timestamps that are strictly
     // higher, enforcing external consistency without having to wait or incur any latency
     // penalties.
     //
     // In order to maintain external consistency for writes between two different clients
     // in this mode, the user must forward the timestamp from the first client to the
     // second by using KuduClient::GetLatestObservedTimestamp() and
     // KuduClient::SetLatestObservedTimestamp().
     //
     // WARNING: Failure to propagate timestamp information through back-channels between
     // two different clients will negate any external consistency guarantee under this
     // mode.
     //
     // This is the default mode.
     CLIENT_PROPAGATED,

     // The server will guarantee that write operations from the same or from other client
     // are externally consistent, without the need to propagate timestamps across clients.
     // This is done by making write operations wait until there is certainty that all
     // follow up write operations (operations that start after the previous one finishes)
     // will be assigned a timestamp that is strictly higher, enforcing external consistency.
     //
     // WARNING: Depending on the clock synchronization state of TabletServers this may
     // imply considerable latency. Moreover operations in COMMIT_WAIT external consistency
     // mode will outright fail if TabletServer clocks are either unsynchronized or
     // synchronized but with a maximum error which surpasses a pre-configured threshold.
     COMMIT_WAIT
   };

   // Set the new external consistency mode for this session.
   Status SetExternalConsistencyMode(ExternalConsistencyMode m) WARN_UNUSED_RESULT;

   // Set the amount of buffer space used by this session for outbound writes.
   // The effect of the buffer size varies based on the flush mode of the
   // session:
   //
   // AUTO_FLUSH_SYNC:
   //   since no buffering is done, this has no effect
   // AUTO_FLUSH_BACKGROUND:
   //   if the buffer space is exhausted, then write calls will block until there
   //   is space available in the buffer.
   // MANUAL_FLUSH:
   //   if the buffer space is exhausted, then write calls will return an error.
   Status SetMutationBufferSpace(size_t size) WARN_UNUSED_RESULT;

   // Set the timeout for writes made in this session.
   void SetTimeoutMillis(int millis);

   // TODO: add "doAs" ability here for proxy servers to be able to act on behalf of
   // other users, assuming access rights.

   // Apply the write operation. Transfers the write_op's ownership to the KuduSession.
   //
   // The behavior of this function depends on the current flush mode. Regardless
   // of flush mode, however, Apply may begin to perform processing in the background
   // for the call (e.g looking up the tablet, etc). Given that, an error may be
   // queued into the PendingErrors structure prior to flushing, even in MANUAL_FLUSH
   // mode.
   //
   // In case of any error, which may occur during flushing or because the write_op
   // is malformed, the write_op is stored in the session's error collector which
   // may be retrieved at any time.
   //
   // This is thread safe.
   Status Apply(KuduWriteOperation* write_op) WARN_UNUSED_RESULT;

   // Similar to the above, except never blocks. Even in the flush modes that
   // return immediately, 'cb' is triggered with the result. The callback may be
   // called by a reactor thread, or in some cases may be called inline by the
   // same thread which calls ApplyAsync(). 'cb' must remain valid until it called.
   //
   // TODO: not yet implemented.
   void ApplyAsync(KuduWriteOperation* write_op, KuduStatusCallback* cb);

   // Flush any pending writes.
   //
   // Returns a bad status if there are any pending errors after the rows have
   // been flushed. Callers should then use GetPendingErrors to determine which
   // specific operations failed.
   //
   // In AUTO_FLUSH_SYNC mode, this has no effect, since every Apply() call flushes
   // itself inline.
   //
   // In the case that the async version of this method is used, then the callback
   // will be called upon completion of the operations which were buffered since the
   // last flush. In other words, in the following sequence:
   //
   //    session->Insert(a);
   //    session->FlushAsync(callback_1);
   //    session->Insert(b);
   //    session->FlushAsync(callback_2);
   //
   // ... 'callback_2' will be triggered once 'b' has been inserted, regardless of whether
   // 'a' has completed or not.
   //
   // Note that this also means that, if FlushAsync is called twice in succession, with
   // no intervening operations, the second flush will return immediately. For example:
   //
   //    session->Insert(a);
   //    session->FlushAsync(callback_1); // called when 'a' is inserted
   //    session->FlushAsync(callback_2); // called immediately!
   //
   // Note that, as in all other async functions in Kudu, the callback may be called
   // either from an IO thread or the same thread which calls FlushAsync. The callback
   // should not block.
   //
   // For FlushAsync, 'cb' must remain valid until it is invoked.
   //
   // This function is thread-safe.
   Status Flush() WARN_UNUSED_RESULT;
   void FlushAsync(KuduStatusCallback* cb);

   // Close the session.
   // Returns an error if there are unflushed or in-flight operations.
   Status Close() WARN_UNUSED_RESULT;

   // Return true if there are operations which have not yet been delivered to the
   // cluster. This may include buffered operations (i.e those that have not yet been
   // flushed) as well as in-flight operations (i.e those that are in the process of
   // being sent to the servers).
   // TODO: maybe "incomplete" or "undelivered" is clearer?
   //
   // This function is thread-safe.
   bool HasPendingOperations() const;

   // Return the number of buffered operations. These are operations that have
   // not yet been flushed - i.e they are not en-route yet.
   //
   // Note that this is different than HasPendingOperations() above, which includes
   // operations which have been sent and not yet responded to.
   //
   // This is only relevant in MANUAL_FLUSH mode, where the result will not
   // decrease except for after a manual Flush, after which point it will be 0.
   // In the other flush modes, data is immediately put en-route to the destination,
   // so this will return 0.
   //
   // This function is thread-safe.
   int CountBufferedOperations() const;

   // Return the number of errors which are pending. Errors may accumulate when
   // using the AUTO_FLUSH_BACKGROUND mode.
   //
   // This function is thread-safe.
   int CountPendingErrors() const;

   // Return any errors from previous calls. If there were more errors
   // than could be held in the session's error storage, then sets *overflowed to true.
   //
   // Caller takes ownership of the returned errors.
   //
   // This function is thread-safe.
   void GetPendingErrors(std::vector<KuduError*>* errors, bool* overflowed);

   KuduClient* client() const;

  private:
   class KUDU_NO_EXPORT Data;

   friend class KuduClient;
   friend class internal::Batcher;
   explicit KuduSession(const sp::shared_ptr<KuduClient>& client);

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduSession);
 };


 // A single scanner. This class is not thread-safe, though different
 // scanners on different threads may share a single KuduTable object.
 class KUDU_EXPORT KuduScanner {
  public:
   // The possible read modes for scanners.
   enum ReadMode {
     // When READ_LATEST is specified the server will always return committed writes at
     // the time the request was received. This type of read does not return a snapshot
     // timestamp and is not repeatable.
     //
     // In ACID terms this corresponds to Isolation mode: "Read Committed"
     //
     // This is the default mode.
     READ_LATEST,

     // When READ_AT_SNAPSHOT is specified the server will attempt to perform a read
     // at the provided timestamp. If no timestamp is provided the server will take the
     // current time as the snapshot timestamp. In this mode reads are repeatable, i.e.
     // all future reads at the same timestamp will yield the same data. This is
     // performed at the expense of waiting for in-flight transactions whose timestamp
     // is lower than the snapshot's timestamp to complete, so it might incur a latency
     // penalty.
     //
     // In ACID terms this, by itself, corresponds to Isolation mode "Repeatable
     // Read". If all writes to the scanned tablet are made externally consistent,
     // then this corresponds to Isolation mode "Strict-Serializable".
     //
     // Note: there currently "holes", which happen in rare edge conditions, by which writes
     // are sometimes not externally consistent even when action was taken to make them so.
     // In these cases Isolation may degenerate to mode "Read Committed". See KUDU-430.
     READ_AT_SNAPSHOT
   };

   // Whether the rows should be returned in order. This affects the fault-tolerance properties
   // of a scanner.
   enum OrderMode {
     // Rows will be returned in an arbitrary order determined by the tablet server.
     // This is efficient, but unordered scans are not fault-tolerant and cannot be resumed
     // in the case of tablet server failure.
     //
     // This is the default mode.
     UNORDERED,
     // Rows will be returned ordered by primary key. Sorting the rows imposes additional overhead
     // on the tablet server, but means that scans are fault-tolerant and will be resumed at
     // another tablet server in the case of failure.
     ORDERED
   };

   // Default scanner timeout.
   // This is set to 3x the default RPC timeout (see KuduClientBuilder::default_rpc_timeout()).
   enum { kScanTimeoutMillis = 15000 };

   // Initialize the scanner. The given 'table' object must remain valid
   // for the lifetime of this scanner object.
   // TODO: should table be a const pointer?
   explicit KuduScanner(KuduTable* table);
   ~KuduScanner();

   // Set the projection used for this scanner by passing the column names to read.
   //
   // This overrides any previous call to SetProjectedColumns.
   Status SetProjectedColumnNames(const std::vector<std::string>& col_names) WARN_UNUSED_RESULT;

   // Set the projection used for this scanner by passing the column indexes to read.
   //
   // This overrides any previous call to SetProjectedColumns/SetProjectedColumnIndexes.
   Status SetProjectedColumnIndexes(const std::vector<int>& col_indexes) WARN_UNUSED_RESULT;

   // DEPRECATED: See SetProjectedColumnNames
   Status SetProjectedColumns(const std::vector<std::string>& col_names) WARN_UNUSED_RESULT;

   // Add a predicate to this scanner.
   //
   // The predicates act as conjunctions -- i.e, they all must pass for
   // a row to be returned.
   //
   // The Scanner takes ownership of 'pred', even if a bad Status is returned.
   Status AddConjunctPredicate(KuduPredicate* pred) WARN_UNUSED_RESULT;

   // Add a lower bound (inclusive) primary key for the scan.
   // If any bound is already added, this bound is intersected with that one.
   //
   // The scanner does not take ownership of 'key'; the caller may free it afterward.
   Status AddLowerBound(const KuduPartialRow& key);

   // Like AddLowerBound(), but the encoded primary key is an opaque slice of data
   // obtained elsewhere.
   //
   // DEPRECATED: use AddLowerBound
   Status AddLowerBoundRaw(const Slice& key);

   // Add an upper bound (exclusive) primary key for the scan.
   // If any bound is already added, this bound is intersected with that one.
   //
   // The scanner makes a copy of 'key'; the caller may free it afterward.
   Status AddExclusiveUpperBound(const KuduPartialRow& key);

   // Like AddExclusiveUpperBound(), but the encoded primary key is an opaque slice of data
   // obtained elsewhere.
   //
   // DEPRECATED: use AddExclusiveUpperBound
   Status AddExclusiveUpperBoundRaw(const Slice& key);

   // Add a lower bound (inclusive) partition key for the scan.
   //
   // The scanner makes a copy of 'partition_key'; the caller may free it afterward.
   //
   // This method is unstable, and for internal use only.
   Status AddLowerBoundPartitionKeyRaw(const Slice& partition_key);

   // Add an upper bound (exclusive) partition key for the scan.
   //
   // The scanner makes a copy of 'partition_key'; the caller may free it afterward.
   //
   // This method is unstable, and for internal use only.
   Status AddExclusiveUpperBoundPartitionKeyRaw(const Slice& partition_key);

   // Set the block caching policy for this scanner. If true, scanned data blocks will be cached
   // in memory and made available for future scans. Default is true.
   Status SetCacheBlocks(bool cache_blocks);

   // Begin scanning.
   Status Open();

   // Keeps the current remote scanner alive on the Tablet server for an additional
   // time-to-live (set by a configuration flag on the tablet server).
   // This is useful if the interval in between NextBatch() calls is big enough that the
   // remote scanner might be garbage collected (default ttl is set to 60 secs.).
   // This does not invalidate any previously fetched results.
   // This returns a non-OK status if the scanner was already garbage collected or if
   // the TabletServer was unreachable, for any reason.
   //
   // NOTE: A non-OK status returned by this method should not be taken as indication that
   // the scan has failed. Subsequent calls to NextBatch() might still be successful,
   // particularly if SetFaultTolerant() was called.
   Status KeepAlive();

   // Close the scanner.
   // This releases resources on the server.
   //
   // This call does not block, and will not ever fail, even if the server
   // cannot be contacted.
   //
   // NOTE: the scanner is reset to its initial state by this function.
   // You'll have to re-add any projection, predicates, etc if you want
   // to reuse this Scanner object.
   void Close();

   // Return true if there may be rows to be fetched from this scanner.
   //
   // Note: will be true provided there's at least one more tablet left to
   // scan, even if that tablet has no data (we'll only know once we scan it).
   // It will also be true after the initially opening the scanner before
   // NextBatch is called for the first time.
   bool HasMoreRows() const;

   // Clears 'rows' and populates it with the next batch of rows from the tablet server.
   // A call to NextBatch() invalidates all previously fetched results which might
   // now be pointing to garbage memory.
   //
   // DEPRECATED: Use NextBatch(KuduScanBatch*) instead.
   Status NextBatch(std::vector<KuduRowResult>* rows);

   // Fetches the next batch of results for this scanner.
   //
   // A single KuduScanBatch instance may be reused. Each subsequent call replaces the data
   // from the previous call, and invalidates any KuduScanBatch::RowPtr objects previously
   // obtained from the batch.
   Status NextBatch(KuduScanBatch* batch);

   // Get the KuduTabletServer that is currently handling the scan.
   // More concretely, this is the server that handled the most recent Open or NextBatch
   // RPC made by the server.
   Status GetCurrentServer(KuduTabletServer** server);

   // Set the hint for the size of the next batch in bytes.
   // If setting to 0 before calling Open(), it means that the first call
   // to the tablet server won't return data.
   Status SetBatchSizeBytes(uint32_t batch_size);

   // Sets the replica selection policy while scanning.
   //
   // TODO: kill this in favor of a consistency-level-based API
   Status SetSelection(KuduClient::ReplicaSelection selection) WARN_UNUSED_RESULT;

   // Sets the ReadMode. Default is READ_LATEST.
   Status SetReadMode(ReadMode read_mode) WARN_UNUSED_RESULT;

   // DEPRECATED: use SetFaultTolerant.
   Status SetOrderMode(OrderMode order_mode) WARN_UNUSED_RESULT;

   // Scans are by default non fault-tolerant, and scans will fail if scanning an
   // individual tablet fails (for example, if a tablet server crashes in the
   // middle of a tablet scan).
   //
   // If this method is called, the scan will be resumed at another tablet server
   // in the case of failure.
   //
   // Fault tolerant scans typically have lower throughput than non
   // fault-tolerant scans. Fault tolerant scans use READ_AT_SNAPSHOT mode,
   // if no snapshot timestamp is provided, the server will pick one.
   Status SetFaultTolerant() WARN_UNUSED_RESULT;

   // Sets the snapshot timestamp, in microseconds since the epoch, for scans in
   // READ_AT_SNAPSHOT mode.
   Status SetSnapshotMicros(uint64_t snapshot_timestamp_micros) WARN_UNUSED_RESULT;

   // Sets the snapshot timestamp in raw encoded form (i.e. as returned by a
   // previous call to a server), for scans in READ_AT_SNAPSHOT mode.
   Status SetSnapshotRaw(uint64_t snapshot_timestamp) WARN_UNUSED_RESULT;

   // Sets the maximum time that Open() and NextBatch() are allowed to take.
   Status SetTimeoutMillis(int millis);

   // Returns the schema of the projection being scanned.
   KuduSchema GetProjectionSchema() const;

   // Returns a string representation of this scan.
   std::string ToString() const;
  private:
   class KUDU_NO_EXPORT Data;

   FRIEND_TEST(ClientTest, TestScanCloseProxy);
   FRIEND_TEST(ClientTest, TestScanFaultTolerance);
   FRIEND_TEST(ClientTest, TestScanNoBlockCaching);
   FRIEND_TEST(ClientTest, TestScanTimeout);

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduScanner);
 };

 // In-memory representation of a remote tablet server.
 class KUDU_EXPORT KuduTabletServer {
  public:
   ~KuduTabletServer();

   // Returns the UUID of this tablet server. Is globally unique and
   // guaranteed not to change for the lifetime of the tablet server.
   const std::string& uuid() const;

   // Returns the hostname of the first RPC address that this tablet server
   // is listening on.
   const std::string& hostname() const;

  private:
   class KUDU_NO_EXPORT Data;

   friend class KuduClient;
   friend class KuduScanner;

   KuduTabletServer();

   // Owned.
   Data* data_;

   DISALLOW_COPY_AND_ASSIGN(KuduTabletServer);
 };

 } // namespace client
 } // namespace kudu
 #endif