blob: 4dc833632ffcb82e5aaf8bb40750192735e1278f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
namespace cpp impala
namespace java org.apache.impala.thrift
include "CatalogObjects.thrift"
include "JniCatalog.thrift"
include "Types.thrift"
include "Status.thrift"
include "Results.thrift"
include "hive_metastore.thrift"
// CatalogServer service API and related structs.
enum CatalogServiceVersion {
V1
}
// Prefix used on statestore topic entry keys to indicate that the entry
// should be sent to "v1" impalads that receive all of their metadata
// via the topic itself.
const string CATALOG_TOPIC_V1_PREFIX = "1:";
// Prefix used on statestore topic entry keys to indicate that the entry
// should be sent to "v2" impalads that fetch metadata on demand.
const string CATALOG_TOPIC_V2_PREFIX = "2:";
// Common header included in all CatalogService requests.
// TODO: The CatalogServiceVersion/protocol version should be part of the header.
// This would require changes in BDR and break their compatibility story. We should
// coordinate a joint change somewhere down the line.
struct TCatalogServiceRequestHeader {
// The effective user who submitted this request. When kerberos is enabled, this
// contains the fully qualified user principal.
1: optional string requesting_user
// The redacted SQL statement to be logged.
2: optional string redacted_sql_stmt
// The client IP address.
3: optional string client_ip
}
// Returns details on the result of an operation that updates the catalog. Information
// returned includes the Status of the operations, the catalog version that will contain
// the update, and the catalog service ID. If SYNC_DDL was set in the query options, it
// also returns the version of the catalog update that this operation must wait for
// before returning the response to the client.
struct TCatalogUpdateResult {
// The CatalogService service ID this result came from.
1: required Types.TUniqueId catalog_service_id
// The Catalog version that will contain this update.
2: required i64 version
// The status of the operation, OK if the operation was successful.
3: required Status.TStatus status
// True if this is a result of an INVALIDATE METADATA operation.
4: required bool is_invalidate
// The resulting TCatalogObjects that were added or modified, if applicable.
5: optional list<CatalogObjects.TCatalogObject> updated_catalog_objects
// The resulting TCatalogObjects that were removed, if applicable.
6: optional list<CatalogObjects.TCatalogObject> removed_catalog_objects
}
// Request for executing a DDL operation (CREATE, ALTER, DROP).
struct TDdlExecRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// Common header included in all CatalogService requests.
2: optional TCatalogServiceRequestHeader header
3: required JniCatalog.TDdlType ddl_type
// Parameters for ALTER TABLE
4: optional JniCatalog.TAlterTableParams alter_table_params
// Parameters for ALTER VIEW
5: optional JniCatalog.TCreateOrAlterViewParams alter_view_params
// Parameters for CREATE DATABASE
6: optional JniCatalog.TCreateDbParams create_db_params
// Parameters for CREATE TABLE
7: optional JniCatalog.TCreateTableParams create_table_params
// Parameters for CREATE TABLE LIKE
8: optional JniCatalog.TCreateTableLikeParams create_table_like_params
// Parameters for CREATE VIEW
9: optional JniCatalog.TCreateOrAlterViewParams create_view_params
// Parameters for CREATE FUNCTION
10: optional JniCatalog.TCreateFunctionParams create_fn_params
// Parameters for DROP DATABASE
11: optional JniCatalog.TDropDbParams drop_db_params
// Parameters for DROP TABLE/VIEW
12: optional JniCatalog.TDropTableOrViewParams drop_table_or_view_params
// Parameters for TRUNCATE TABLE
13: optional JniCatalog.TTruncateParams truncate_params
// Parameters for DROP FUNCTION
14: optional JniCatalog.TDropFunctionParams drop_fn_params
// Parameters for COMPUTE STATS
15: optional JniCatalog.TComputeStatsParams compute_stats_params
// Parameters for CREATE DATA SOURCE
16: optional JniCatalog.TCreateDataSourceParams create_data_source_params
// Parameters for DROP DATA SOURCE
17: optional JniCatalog.TDropDataSourceParams drop_data_source_params
// Parameters for DROP STATS
18: optional JniCatalog.TDropStatsParams drop_stats_params
// Parameters for CREATE/DROP ROLE
19: optional JniCatalog.TCreateDropRoleParams create_drop_role_params
// Parameters for GRANT/REVOKE ROLE
20: optional JniCatalog.TGrantRevokeRoleParams grant_revoke_role_params
// Parameters for GRANT/REVOKE privilege
21: optional JniCatalog.TGrantRevokePrivParams grant_revoke_priv_params
// True if SYNC_DDL is set in query options
22: required bool sync_ddl
// Parameters for COMMENT ON
23: optional JniCatalog.TCommentOnParams comment_on_params
// Parameters for ALTER DATABASE
24: optional JniCatalog.TAlterDbParams alter_db_params
// Parameters for replaying an exported testcase.
25: optional JniCatalog.TCopyTestCaseReq copy_test_case_params
}
// Response from executing a TDdlExecRequest
struct TDdlExecResponse {
1: required TCatalogUpdateResult result
// Set only for CREATE TABLE AS SELECT statements. Will be true iff the statement
// resulted in a new table being created in the Metastore. This is used to
// determine if a CREATE TABLE IF NOT EXISTS AS SELECT ... actually creates a new
// table or whether creation was skipped because the table already existed, in which
// case this flag would be false
2: optional bool new_table_created;
// Result of DDL operation to be returned to the client. Currently only set
// by COMPUTE STATS and ALTER TABLE.
3: optional Results.TResultSet result_set
// The table/view name in HMS. Set only for CREATE TABLE, CREATE TABLE AS SELECT,
// CREATE TABLE LIKE, and CREATE VIEW statements.
4: optional string table_name
// The table/view create time stored in HMS. Set only for CREATE TABLE,
// CREATE TABLE AS SELECT, CREATE TABLE LIKE, and CREATE VIEW statements.
5: optional i64 table_create_time
// Set only for CREATE EXTERNAL TABLE. This is the table location from the newly
// created table. This is useful for establishing lineage between table and it's
// location for external tables.
6: optional string table_location
}
// Updates the metastore with new partition information and returns a response
// with details on the result of the operation. Used to add partitions after executing
// DML operations, and could potentially be used in the future to update column stats
// after DML operations.
// TODO: Rename this struct to something more descriptive.
struct TUpdateCatalogRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// True if SYNC_DDL is set in query options.
2: required bool sync_ddl
// Common header included in all CatalogService requests.
3: optional TCatalogServiceRequestHeader header
// Unqualified name of the table to change
4: required string target_table;
// Database that the table belongs to
5: required string db_name;
// List of partitions that are new and need to be created. May
// include the root partition (represented by the empty string).
6: required set<string> created_partitions;
// True if the update corresponds to an "insert overwrite" operation
7: required bool is_overwrite;
// ACID transaction ID for transactional inserts.
8: optional i64 transaction_id;
}
// Response from a TUpdateCatalogRequest
struct TUpdateCatalogResponse {
1: required TCatalogUpdateResult result
}
// Parameters of REFRESH/INVALIDATE METADATA commands
struct TResetMetadataRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// Common header included in all CatalogService requests.
2: optional TCatalogServiceRequestHeader header
// If true, refresh. Otherwise, invalidate metadata
3: required bool is_refresh
// Fully qualified name of the table to refresh or invalidate; not set if invalidating
// the entire catalog
4: optional CatalogObjects.TTableName table_name
// If set, refreshes the specified partition, otherwise
// refreshes the whole table
5: optional list<CatalogObjects.TPartitionKeyValue> partition_spec
// If set, refreshes functions in the specified database.
6: optional string db_name
// True if SYNC_DDL is set in query options
7: required bool sync_ddl
// If set, refreshes authorization metadata.
8: optional bool authorization
}
// Response from TResetMetadataRequest
struct TResetMetadataResponse {
1: required TCatalogUpdateResult result
}
// Request to GetFunctions()
struct TGetFunctionsRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// Common header included in all CatalogService requests.
3: optional TCatalogServiceRequestHeader header
// The parent database name.
2: optional string db_name;
}
// Response a call to GetFunctions()
struct TGetFunctionsResponse {
// The status of the operation, OK if the operation was successful.
1: optional Status.TStatus status
// List of functions returned to the caller. Functions are not returned in a
// defined order.
2: optional list<Types.TFunction> functions;
}
// Selector for partial information about Catalog-scoped objects
// (i.e. those that are not within a particular database or table).
struct TCatalogInfoSelector {
1: bool want_db_names
// TODO(todd): add objects like DataSources, etc.
}
// Returned info from a catalog request which selected items in
// TCatalogInfoSelector.
struct TPartialCatalogInfo {
1: list<string> db_names
}
// Selector for partial information about a Table.
struct TTableInfoSelector {
// The response should include the HMS table struct.
1: bool want_hms_table
// If set, the response should include information about the given list of
// partitions. If this is unset, information about all partitions will be
// returned, so long as at least one of the following 'want_partition_*'
// flags is specified.
//
// If a partition ID is passed, but that partition does not exist in the
// table, then an exception will be thrown. It is assumed that the partition
// IDs passed here are a result of a prior successful call to fetch the partition
// list of this table.
//
// NOTE: "unset" and "set to empty" are different -- "set to empty" causes
// no partitions to be returned, whereas "unset" causes all partitions to be
// returned, so long as one of the following 'want_partition_*' is set.
2: optional list<i64> partition_ids
// ... each such partition should include its name.
3: bool want_partition_names
// ... each such partition should include metadata (location, etc).
4: bool want_partition_metadata
// ... each such partition should include its file info
5: bool want_partition_files
// List of columns to fetch stats for.
6: optional list<string> want_stats_for_column_names
// ... each partition should include the partition stats serialized as a byte[]
// and that is deflate-compressed.
7: bool want_partition_stats
}
// Returned information about a particular partition.
struct TPartialPartitionInfo {
1: required i64 id
// Set if 'want_partition_names' was set in TTableInfoSelector.
2: optional string name
// Set if 'want_partition_metadata' was set in TTableInfoSelector.
3: optional hive_metastore.Partition hms_partition
// Set if 'want_partition_files' was set in TTableInfoSelector.
4: optional list<CatalogObjects.THdfsFileDesc> file_descriptors
// Deflate-compressed byte[] representation of TPartitionStats for this partition.
// Set if 'want_partition_stats' was set in TTableInfoSelector. Not set if the
// partition does not have stats.
5: optional binary partition_stats
// Set to true if the partition contains intermediate column stats computed via
// incremental statistics. Set when 'want_partition_metadata' is true in
// TTableInfoSelector. Incremental stats data can be fetched by setting
// 'want_partition_stats' in TTableInfoSelector.
6: optional bool has_incremental_stats
}
// Returned information about a Table, as selected by TTableInfoSelector.
struct TPartialTableInfo {
1: optional hive_metastore.Table hms_table
// The partition metadata for the requested partitions.
//
// If explicit partitions were passed, then it is guaranteed that this list
// is the same size and the same order as the requested list of IDs.
//
// See TPartialPartitionInfo for details on which fields will be set based
// on the caller-provided selector.
2: optional list<TPartialPartitionInfo> partitions
3: optional list<hive_metastore.ColumnStatisticsObj> column_stats
// Set if this table needs storage access during metadata load.
// Time used for storage loading in nanoseconds.
4: optional i64 storage_metadata_load_time_ns
// Each TNetworkAddress is a datanode which contains blocks of a file in the table.
// Used so that each THdfsFileBlock can just reference an index in this list rather
// than duplicate the list of network address, which helps reduce memory usage.
// Only used when partition files are fetched.
7: optional list<Types.TNetworkAddress> network_addresses
}
// Selector for partial information about a Database.
struct TDbInfoSelector {
// The response should include the HMS Database object.
1: bool want_hms_database
// The response should include the list of table names in the DB.
2: bool want_table_names
// The response should include the list of function names in the DB.
3: bool want_function_names
}
// Returned information about a Database, as selected by TDbInfoSelector.
struct TPartialDbInfo {
1: optional hive_metastore.Database hms_database
2: optional list<string> table_names
3: optional list<string> function_names
}
// RPC request for GetPartialCatalogObject.
struct TGetPartialCatalogObjectRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// A catalog object descriptor: a TCatalogObject with the object name and type fields
// set. This may be a TABLE, DB, CATALOG, or FUNCTION. The selectors below can
// further restrict what information should be returned.
2: required CatalogObjects.TCatalogObject object_desc
3: optional TTableInfoSelector table_info_selector
4: optional TDbInfoSelector db_info_selector
5: optional TCatalogInfoSelector catalog_info_selector
}
enum CatalogLookupStatus {
OK,
DB_NOT_FOUND,
TABLE_NOT_FOUND,
TABLE_NOT_LOADED,
FUNCTION_NOT_FOUND,
// Partial fetch RPCs currently look up partitions by IDs instead of names. These IDs
// change over the lifetime of a table with queries like invalidate metadata. In such
// cases this lookup status is set and the caller can retry the fetch.
// TODO: Fix partition lookup logic to not do it with IDs.
PARTITION_NOT_FOUND
}
// RPC response for GetPartialCatalogObject.
struct TGetPartialCatalogObjectResponse {
// The status of the operation, OK if the operation was successful.
// Unset indicates "OK".
1: optional Status.TStatus status
// Catalog-specific error codes (eg if the object no longer exists).
2: optional CatalogLookupStatus lookup_status = CatalogLookupStatus.OK
3: optional i64 object_version_number
4: optional TPartialTableInfo table_info
5: optional TPartialDbInfo db_info
6: optional TPartialCatalogInfo catalog_info
// Functions are small enough that we return them wholesale.
7: optional list<Types.TFunction> functions
}
// Request the complete metadata for a given catalog object. May trigger a metadata load
// if the object is not already in the catalog cache.
struct TGetCatalogObjectRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// Common header included in all CatalogService requests.
3: optional TCatalogServiceRequestHeader header
// A catalog object descriptor: a TCatalogObject with the object name and type fields
// set.
2: required CatalogObjects.TCatalogObject object_desc
}
// Response from TGetCatalogObjectRequest
struct TGetCatalogObjectResponse {
1: required CatalogObjects.TCatalogObject catalog_object
}
// Request the partition statistics for the specified table.
struct TGetPartitionStatsRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
2: required CatalogObjects.TTableName table_name
}
// Response for requesting partition statistics. All partition statistics
// are returned. If a partition does not have statistics, it is not returned.
// Partitions are identified by name, consisting of partition column name/value pairs.
// The returned statistics are deflate-compressed bytes that represent
// CatalogObject.TPartitionStats when decompressed.
// An OK or null status means that the call succeeded.
// If there was an error, an error status is returned and partition_stats
// is left unset.
struct TGetPartitionStatsResponse {
1: optional Status.TStatus status
2: optional map<string, binary> partition_stats
}
// Instructs the Catalog Server to prioritizing loading of metadata for the specified
// catalog objects. Currently only used for controlling the priority of loading
// tables/views since Db/Function metadata is loaded on startup.
struct TPrioritizeLoadRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// Common header included in all CatalogService requests.
2: optional TCatalogServiceRequestHeader header
// A list of catalog objects descriptors for which to prioritize loading. A catalog
// object descriptor is a TCatalogObject with only the object name and type fields set.
3: required list<CatalogObjects.TCatalogObject> object_descs
}
struct TPrioritizeLoadResponse {
// The status of the operation, OK if the operation was successful.
1: required Status.TStatus status
}
// Request to perform a privilege check with the Sentry Service to determine
// if the requesting user is a Sentry Service admin.
struct TSentryAdminCheckRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
// Common header included in all CatalogService requests.
2: optional TCatalogServiceRequestHeader header
}
struct TSentryAdminCheckResponse {
// Returns OK if the operation was successful.
1: optional Status.TStatus status
// Returns true if the user is a Sentry admin user.
2: required bool is_admin
}
struct TTableUsage {
1: required CatalogObjects.TTableName table_name
// count of usages since the last report
2: required i32 num_usages
}
struct TUpdateTableUsageRequest {
1: required list<TTableUsage> usages
}
struct TUpdateTableUsageResponse {
// The operation may fail if the catalogd is in a bad state or if there is a bug.
1: optional Status.TStatus status
}
// The CatalogService API
service CatalogService {
// Executes a DDL request and returns details on the result of the operation.
TDdlExecResponse ExecDdl(1: TDdlExecRequest req);
// Gets the catalog object corresponding to the given request.
TGetCatalogObjectResponse GetCatalogObject(1: TGetCatalogObjectRequest req);
// Gets the statistics that are associated with table partitions.
TGetPartitionStatsResponse GetPartitionStats(1: TGetPartitionStatsRequest req);
// Resets the Catalog metadata. Used to explicitly trigger reloading of the Hive
// Metastore metadata and/or HDFS block location metadata.
TResetMetadataResponse ResetMetadata(1: TResetMetadataRequest req);
// Updates the metastore with new partition information and returns a response
// with details on the result of the operation.
TUpdateCatalogResponse UpdateCatalog(1: TUpdateCatalogRequest req);
// Gets all user defined functions (aggregate and scalar) in the catalog matching
// the parameters of TGetFunctionsRequest.
TGetFunctionsResponse GetFunctions(1: TGetFunctionsRequest req);
// Prioritize the loading of metadata for the CatalogObjects specified in the
// TPrioritizeLoadRequest.
TPrioritizeLoadResponse PrioritizeLoad(1: TPrioritizeLoadRequest req);
// Performs a check with the Sentry Service to determine if the requesting user
// is configured as an admin on the Sentry Service. This API may be removed in
// the future and external clients should not rely on using it.
// TODO: When Sentry Service has a better mechanism to perform these changes this API
// should be deprecated.
TSentryAdminCheckResponse SentryAdminCheck(1: TSentryAdminCheckRequest req);
// Fetch partial information about some object in the catalog.
TGetPartialCatalogObjectResponse GetPartialCatalogObject(
1: TGetPartialCatalogObjectRequest req);
// Update recently used tables and their usage counts in an impalad since the last
// report.
TUpdateTableUsageResponse UpdateTableUsage(1: TUpdateTableUsageRequest req);
}