blob: 7e3f2a9a880a2ea2b495a1e2169f910a91029acf [file] [log] [blame]
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef KUDU_CLIENT_SCHEMA_H
#define KUDU_CLIENT_SCHEMA_H
// NOTE: using stdint.h instead of cstdint because this file is supposed
// to be processed by a compiler lacking C++11 support.
#include <stdint.h>
#include <cstddef>
#include <string>
#include <vector>
#ifdef KUDU_HEADERS_NO_STUBS
#include <gtest/gtest_prod.h>
#include "kudu/gutil/port.h"
#else
#include "kudu/client/stubs.h"
#endif
#include "kudu/util/kudu_export.h"
#include "kudu/util/status.h"
namespace kudu {
class ColumnSchema;
class KuduPartialRow;
class Schema;
class Slice;
struct ColumnSchemaDelta;
namespace tools {
class RemoteKsckCluster;
class ReplicaDumper;
}
namespace client {
namespace internal {
class GetTableSchemaRpc;
class LookupRpc;
class MetaCache;
class MetaCacheEntry;
class WriteRpc;
} // namespace internal
class KuduSchema;
class KuduValue;
/// @brief Representation of column type attributes.
class KUDU_EXPORT KuduColumnTypeAttributes {
public:
KuduColumnTypeAttributes();
/// Create a KuduColumnTypeAttributes object as a copy of the other one.
///
/// @param [in] other
/// The other KuduColumnTypeAttributes object to use as a reference.
KuduColumnTypeAttributes(const KuduColumnTypeAttributes& other);
/// Create a KuduColumnTypeAttributes object
///
/// @param [in] precision
/// The precision of a decimal column.
/// @param [in] scale
/// The scale of a decimal column.
KuduColumnTypeAttributes(int8_t precision, int8_t scale);
/// Create a KuduColumnTypeAttributes object
///
/// @param [in] length
/// The maximum length of a VARCHAR column in characters.
explicit KuduColumnTypeAttributes(uint16_t length);
~KuduColumnTypeAttributes();
/// @name Assign/copy KuduColumnTypeAttributes.
///@{
/// @param [in] other
/// The source KuduColumnTypeAttributes object to use as a reference.
/// @return Copy of 'other'
KuduColumnTypeAttributes& operator=(const KuduColumnTypeAttributes& other);
/// @param [in] other
/// The source KuduColumnTypeAttributes object to use as a reference.
void CopyFrom(const KuduColumnTypeAttributes& other);
///@}
/// @return Precision for the column type.
int8_t precision() const;
/// @return Scale for the column type.
int8_t scale() const;
/// @return Length for the column type.
uint16_t length() const;
private:
friend class KuduColumnSchema;
friend class KuduColumnSpec;
friend class KuduSchema;
KuduColumnTypeAttributes(int8_t precision, int8_t scale, uint16_t length);
class KUDU_NO_EXPORT Data;
// Owned.
Data* data_;
};
/// @brief Representation of column storage attributes.
class KUDU_EXPORT KuduColumnStorageAttributes {
public:
/// @brief Column encoding types.
enum EncodingType {
AUTO_ENCODING = 0,
PLAIN_ENCODING = 1,
PREFIX_ENCODING = 2,
RLE = 4,
DICT_ENCODING = 5,
BIT_SHUFFLE = 6,
/// @deprecated GROUP_VARINT is not supported for valid types, and
/// will fall back to another encoding on the server side.
GROUP_VARINT = 3
};
/// @brief Column compression types.
enum CompressionType {
DEFAULT_COMPRESSION = 0,
NO_COMPRESSION = 1,
SNAPPY = 2,
LZ4 = 3,
ZLIB = 4,
};
/// @deprecated This constructor is deprecated for external use, and will
/// be made private in a future release.
///
/// @todo Make this constructor private.
///
/// @param [in] encoding
/// Encoding type for the column storage.
/// @param [in] compression
/// Compression type for the column storage.
/// @param [in] block_size
/// Block size (in bytes, uncompressed data) for the column storage.
explicit KuduColumnStorageAttributes(
EncodingType encoding = AUTO_ENCODING,
CompressionType compression = DEFAULT_COMPRESSION,
int32_t block_size = 0)
ATTRIBUTE_DEPRECATED("this constructor will be private in a future release")
: encoding_(encoding),
compression_(compression),
block_size_(block_size) {
}
/// @return Encoding type for the column storage.
const EncodingType encoding() const {
return encoding_;
}
/// @return Compression type for the column storage.
const CompressionType compression() const {
return compression_;
}
/// @return String representation of the storage attributes.
std::string ToString() const;
/// @param [in] encoding
/// String representation of the column encoding type
/// @param [out] type
/// Enum representation of the column encoding type,
/// Converted from string format.
/// @return Operation result status.
static Status StringToEncodingType(const std::string& encoding,
EncodingType* type);
/// @param [in] compression
/// String representation of the column compression type
/// @param [out] type
/// Enum representation of the column compression type,
/// Converted from string format.
/// @return Operation result status.
static Status StringToCompressionType(const std::string& compression,
CompressionType* type);
private:
EncodingType encoding_;
CompressionType compression_;
int32_t block_size_;
};
/// @brief Representation of the column schema.
class KUDU_EXPORT KuduColumnSchema {
public:
/// @brief Supported data types for columns.
enum DataType {
INT8 = 0,
INT16 = 1,
INT32 = 2,
INT64 = 3,
STRING = 4,
BOOL = 5,
FLOAT = 6,
DOUBLE = 7,
BINARY = 8,
UNIXTIME_MICROS = 9,
DECIMAL = 10,
VARCHAR = 11,
TIMESTAMP = UNIXTIME_MICROS, //!< deprecated, use UNIXTIME_MICROS
DATE = 12
};
/// @param [in] type
/// Column data type.
/// @return String representation of the column data type.
static std::string DataTypeToString(DataType type);
/// @param [in] type_str
/// String representation of the column data type
/// @param [out] type
/// Enum representation of the column data type, Converted from string format.
/// @return Operation result status.
static Status StringToDataType(const std::string& type_str, DataType* type);
/// Construct KuduColumnSchema object as a copy of another object.
///
/// @param [in] other
/// The reference object to copy from.
KuduColumnSchema(const KuduColumnSchema& other);
~KuduColumnSchema();
/// The assignment operator.
///
/// @param [in] other
/// The reference object to assign from.
/// @return The updated object.
KuduColumnSchema& operator=(const KuduColumnSchema& other);
/// Make this object an identical copy of the other one.
///
/// @param [in] other
/// The reference object to copy from.
void CopyFrom(const KuduColumnSchema& other);
/// Check whether the object is identical to the other one.
///
/// @param [in] other
/// The reference object to compare with.
/// @return @c true iff the object is identical to the specified one.
bool Equals(const KuduColumnSchema& other) const;
/// @name Getters to expose column schema information.
///
/// @todo Expose default column value and attributes?
///
///@{
/// @return Name of the column schema.
const std::string& name() const;
/// @return Type of the column schema.
DataType type() const;
/// @return @c true iff the column schema has the nullable attribute set.
bool is_nullable() const;
///@}
/// @return Type attributes of the column schema.
KuduColumnTypeAttributes type_attributes() const;
/// @return comment of the column schema.
///
/// @note An empty string will be returned if there is no comment.
const std::string& comment() const;
private:
friend class KuduColumnSpec;
friend class KuduSchema;
friend class KuduSchemaBuilder;
// KuduTableAlterer::Data needs to be a friend. Friending the parent class
// is transitive to nested classes. See https://s.apache.org/inner-class-friends
friend class KuduTableAlterer;
#ifdef KUDU_HEADERS_NO_STUBS
FRIEND_TEST(KuduColumnSchemaTest, TestEquals);
#endif
KuduColumnSchema();
#if defined(__clang__) || \
(defined(__GNUC__) && (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40600)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
/// This constructor is private because clients should use the Builder API.
KuduColumnSchema(
const std::string &name,
DataType type,
bool is_nullable = false,
const void* default_value = NULL, //NOLINT(modernize-use-nullptr)
const KuduColumnStorageAttributes& storage_attributes = KuduColumnStorageAttributes(),
const KuduColumnTypeAttributes& type_attributes = KuduColumnTypeAttributes(),
const std::string& comment = "");
#if defined(__clang__) || \
(defined(__GNUC__) && (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40600)
#pragma GCC diagnostic pop
#endif
// Owned.
ColumnSchema* col_;
};
/// @brief Builder API for specifying or altering a column
/// within a table schema.
///
/// An object of this type cannot be constructed directly, but rather
/// is returned from KuduSchemaBuilder::AddColumn() to specify a column
/// within a Schema.
///
/// @todo KUDU-861: this API will also be used for an improved AlterTable API.
class KUDU_EXPORT KuduColumnSpec {
public:
/// Set the default value for the column.
///
/// When adding a new column to a table, this default value will be used to
/// fill the new column in all existing rows. The default value
/// will also be used when inserting a new row with no value for the column.
///
/// @param [in] value
/// The value to use as the default. The KuduColumnSpec takes ownership
/// over the passed parameter.
///
/// @return Pointer to the modified object.
KuduColumnSpec* Default(KuduValue* value);
/// Set the preferred compression type for the column.
///
/// @param [in] compression
/// The compression type to use.
/// @return Pointer to the modified object.
KuduColumnSpec* Compression(KuduColumnStorageAttributes::CompressionType compression);
/// Set the preferred encoding for the column.
///
/// @note Not all encodings are supported for all column types.
///
/// @param [in] encoding
/// The encoding to use.
/// @return Pointer to the modified object.
KuduColumnSpec* Encoding(KuduColumnStorageAttributes::EncodingType encoding);
/// Set the target block size for the column.
///
/// This is the number of bytes of user data packed per block on disk, and
/// represents the unit of IO when reading the column. Larger values
/// may improve scan performance, particularly on spinning media. Smaller
/// values may improve random access performance, particularly for workloads
/// that have high cache hit rates or operate on fast storage such as SSD.
///
/// @note The block size specified here corresponds to uncompressed data.
/// The actual size of the unit read from disk may be smaller if
/// compression is enabled.
///
/// @note It's recommended that this not be set any lower than 4096 (4KB)
/// or higher than 1048576 (1MB).
/// @todo KUDU-1107: move above info to docs
///
/// @param [in] block_size
/// Block size (in bytes) to use.
/// @return Pointer to the modified object.
KuduColumnSpec* BlockSize(int32_t block_size);
/// @name Operations only relevant for decimal columns.
///@{
/// Set the precision for the column.
///
/// Clients must specify a precision for decimal columns.
/// Precision is the total number of digits that can be
/// represented by the column, regardless of the location of the decimal point.
/// For example, representing integer values up to 9999, and fractional
/// values up to 99.99, both require a precision of 4. You can also represent
/// corresponding negative values, without any change in the precision.
/// For example, the range -9999 to 9999 still only requires a precision of 4.
///
/// The precision must be between 1 and 38.
///
/// @param [in] precision
/// Desired precision to set.
/// @return Pointer to the modified object.
KuduColumnSpec* Precision(int8_t precision);
/// Set the scale for the column.
///
/// Clients can specify a scale for decimal columns.
/// Scale represents the number of fractional digits. This value must be less
/// than or equal to precision. A scale of 0 produces integral values,
/// with no fractional part. If precision and scale are equal, all the digits
/// come after the decimal point, making all the values between
/// 0.9999 and -0.9999.
///
/// The scale must be greater than 0 and less than the column's precision.
/// If no scale is provided a default scale of 0 is used.
///
/// @param [in] scale
/// Desired scale to set.
/// @return Pointer to the modified object.
KuduColumnSpec* Scale(int8_t scale);
///@}
/// @name Operation only relevant for VARCHAR columns.
///@{
/// Set the length for a column.
///
/// Clients can specify a length for VARCHAR columns.
/// Length represents the maximum length of a VARCHAR column in
/// characters.
///
/// The length must be greater than 0 and less than 65536.
/// If no length is provided a default length of 65535 is used.
///
/// @param [in] length
/// Desired length to set.
/// @return Pointer to the modified object.
KuduColumnSpec* Length(uint16_t length);
///@}
/// @name Operations only relevant for Create Table
///@{
/// Set the column to be the primary key of the table.
///
/// This may only be used to set non-composite primary keys. If a composite
/// key is desired, use KuduSchemaBuilder::SetPrimaryKey(). This may not be
/// used in conjunction with KuduSchemaBuilder::SetPrimaryKey().
///
/// @note Primary keys may not be changed after a table is created.
///
/// @return Pointer to the modified object.
KuduColumnSpec* PrimaryKey();
/// Set the column to be not nullable.
///
/// @note Column nullability may not be changed once a table is created.
///
/// @return Pointer to the modified object.
KuduColumnSpec* NotNull();
/// Set the column to be nullable (the default).
///
/// @note Column nullability may not be changed once a table is created.
///
/// @return Pointer to the modified object.
KuduColumnSpec* Nullable();
/// Set the data type of the column.
///
/// @note Column data types may not be changed once a table is created.
///
/// @param [in] type
/// The data type to set.
/// @return Pointer to the modified object.
KuduColumnSpec* Type(KuduColumnSchema::DataType type);
///@}
/// @name Operations only relevant for Alter Table
///@{
/// Remove the default value for the column.
///
/// Without a default, clients must always specify a value for the column
/// when inserting data.
///
/// @return Pointer to the modified object.
KuduColumnSpec* RemoveDefault();
/// Rename the column.
///
/// @param [in] new_name
/// The new name for the column.
/// @return Pointer to the modified object.
KuduColumnSpec* RenameTo(const std::string& new_name);
///@}
/// Set the comment of the column.
///
/// @param [in] comment
/// The comment for the column.
/// @return Pointer to the modified object.
KuduColumnSpec* Comment(const std::string& comment);
private:
class KUDU_NO_EXPORT Data;
friend class KuduSchemaBuilder;
friend class KuduTableAlterer;
// This class should always be owned and deleted by one of its friends,
// not the user.
~KuduColumnSpec();
explicit KuduColumnSpec(const std::string& col_name);
Status ToColumnSchema(KuduColumnSchema* col) const;
Status ToColumnSchemaDelta(ColumnSchemaDelta* col_delta) const;
Slice DefaultValueAsSlice() const;
// Owned.
Data* data_;
};
/// @brief Builder API for constructing a KuduSchema object.
///
/// The API here is a "fluent" style of programming, such that the resulting
/// code looks somewhat like a SQL "CREATE TABLE" statement. For example:
///
/// SQL:
/// @code
/// CREATE TABLE t (
/// my_key int not null primary key,
/// a float default 1.5
/// );
/// @endcode
///
/// is represented as:
/// @code
/// KuduSchemaBuilder t;
/// t.AddColumn("my_key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
/// t.AddColumn("a")->Type(KuduColumnSchema::FLOAT)->Default(KuduValue::FromFloat(1.5));
/// KuduSchema schema;
/// t.Build(&schema);
/// @endcode
class KUDU_EXPORT KuduSchemaBuilder {
public:
KuduSchemaBuilder();
~KuduSchemaBuilder();
/// Add a column with the specified name to the schema.
///
/// @param [in] name
/// Name of the column to add.
/// @return A KuduColumnSpec object for a new column within the Schema.
/// The returned object is owned by the KuduSchemaBuilder.
KuduColumnSpec* AddColumn(const std::string& name);
/// Set the primary key of the new Schema based on the given column names.
///
/// This may be used to specify a compound primary key.
///
/// @param [in] key_col_names
/// Names of the columns to include into the compound primary key.
/// @return Pointer to the modified object.
KuduSchemaBuilder* SetPrimaryKey(const std::vector<std::string>& key_col_names);
/// Build the schema based on current configuration of the builder object.
///
/// @param [out] schema
/// The placeholder for the result schema. Upon successful completion,
/// the parameter is reset to the result of this builder: literally,
/// calling KuduSchema::Reset() on the parameter.
/// @return Operation result status. If the resulting would-be-schema
/// is invalid for any reason (e.g. missing types, duplicate column names,
/// etc.) a bad Status is returned.
Status Build(KuduSchema* schema);
private:
class KUDU_NO_EXPORT Data;
// Owned.
Data* data_;
};
/// @brief A representation of a table's schema.
class KUDU_EXPORT KuduSchema {
public:
KuduSchema();
/// Create a KuduSchema object as a copy of the other one.
///
/// @param [in] other
/// The other KuduSchema object to use as a reference.
KuduSchema(const KuduSchema& other);
~KuduSchema();
/// @name Assign/copy the schema
///@{
/// @param [in] other
/// The source KuduSchema object to use as a reference.
/// @return Copy of 'other'
KuduSchema& operator=(const KuduSchema& other);
/// @param [in] other
/// The source KuduSchema object to use as a reference.
void CopyFrom(const KuduSchema& other);
///@}
/// @deprecated This method will be removed soon.
///
/// @todo Remove KuduSchema::Reset().
///
/// @param [in] columns
/// Per-column schema information.
/// @param [in] key_columns
/// Number of key columns in the schema.
/// @return Operation result status.
Status Reset(const std::vector<KuduColumnSchema>& columns, int key_columns)
ATTRIBUTE_DEPRECATED("this method will be removed in a future release")
WARN_UNUSED_RESULT;
/// Check whether the schema is identical to the other one.
///
/// @param [in] other
/// The other KuduSchema object to compare with.
/// @return @c true iff this KuduSchema object is identical
/// to the specified one.
bool Equals(const KuduSchema& other) const;
/// @param [in] idx
/// Column index.
/// @return Schema for the specified column.
KuduColumnSchema Column(size_t idx) const;
/// @param [in] col_name
/// Column name.
/// @param [out] col_schema
/// Schema for the specified column.
/// @return @c true iff the specified column exists.
bool HasColumn(const std::string& col_name, KuduColumnSchema* col_schema) const;
/// @return The number of columns in the schema.
size_t num_columns() const;
/// Get the indexes of the primary key columns within this Schema.
///
/// @attention In current versions of Kudu, these will always be contiguous
/// column indexes starting with 0. However, in future versions this
/// assumption may not hold, so callers should not assume it is the case.
///
/// @param [out] indexes
/// The placeholder for the result.
void GetPrimaryKeyColumnIndexes(std::vector<int>* indexes) const;
/// Create a new row corresponding to this schema.
///
/// @note The new row refers to this KuduSchema object, so it must be
/// destroyed before the KuduSchema object to avoid dangling pointers.
///
/// @return A pointer to the newly created row. The caller takes ownership
/// of the created row.
KuduPartialRow* NewRow() const;
/// Stringify this KuduSchema.
///
/// @return A string describing this schema.
std::string ToString() const;
/// @cond PRIVATE_API
/// Convert a Schema to a KuduSchema.
///
/// Private API.
///
/// @param[in] schema
/// The Schema to convert
/// @return The converted KuduSchema
static KuduSchema FromSchema(const Schema& schema) KUDU_NO_EXPORT;
/// Convert a KuduSchema to a Schema.
///
/// Private API.
///
/// @param[in] kudu_schema
/// The KuduSchema to convert
/// @return The converted Schema
static Schema ToSchema(const KuduSchema& kudu_schema) KUDU_NO_EXPORT;
/// @endcond
private:
friend class ClientTest;
friend class KuduClient;
friend class KuduScanner;
friend class KuduScanToken;
friend class KuduScanTokenBuilder;
friend class KuduSchemaBuilder;
friend class KuduTable;
friend class KuduTableCreator;
friend class KuduWriteOperation;
friend class ScanConfiguration;
friend class internal::GetTableSchemaRpc;
friend class internal::LookupRpc;
friend class internal::MetaCache;
friend class internal::MetaCacheEntry;
friend class internal::WriteRpc;
friend class tools::RemoteKsckCluster;
friend class tools::ReplicaDumper;
// For use by KuduSchema::FromSchema.
explicit KuduSchema(const Schema& schema);
#if __cplusplus >= 201103
explicit KuduSchema(Schema&& schema);
#endif
// Private since we don't want users to rely on the first N columns
// being the keys.
size_t num_key_columns() const;
// Owned.
Schema* schema_;
};
} // namespace client
} // namespace kudu
#endif // KUDU_CLIENT_SCHEMA_H