// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

namespace cpp impala
namespace java org.apache.impala.thrift

include "CatalogObjects.thrift"
include "JniCatalog.thrift"
include "Types.thrift"
include "Status.thrift"
include "Results.thrift"
include "hive_metastore.thrift"
include "SqlConstraints.thrift"

// CatalogServer service API and related structs.

enum CatalogServiceVersion {
  V1
}

// Prefix used on statestore topic entry keys to indicate that the entry
// should be sent to "v1" impalads that receive all of their metadata
// via the topic itself.
const string CATALOG_TOPIC_V1_PREFIX = "1:";

// Prefix used on statestore topic entry keys to indicate that the entry
// should be sent to "v2" impalads that fetch metadata on demand.
const string CATALOG_TOPIC_V2_PREFIX = "2:";

// Common header included in all CatalogService requests.
// TODO: The CatalogServiceVersion/protocol version should be part of the header.
// This would require changes in BDR and break their compatibility story. We should
// coordinate a joint change somewhere down the line.
struct TCatalogServiceRequestHeader {
  // The effective user who submitted this request. When kerberos is enabled, this
  // contains the fully qualified user principal.
  1: optional string requesting_user

  // The redacted SQL statement to be logged.
  2: optional string redacted_sql_stmt

  // The client IP address.
  3: optional string client_ip
}

// Returns details on the result of an operation that updates the catalog. Information
// returned includes the Status of the operations, the catalog version that will contain
// the update, and the catalog service ID. If SYNC_DDL was set in the query options, it
// also returns the version of the catalog update that this operation must wait for
// before returning the response to the client.
struct TCatalogUpdateResult {
  // The CatalogService service ID this result came from.
  1: required Types.TUniqueId catalog_service_id

  // The Catalog version that will contain this update.
  2: required i64 version

  // The status of the operation, OK if the operation was successful.
  3: required Status.TStatus status

  // True if this is a result of an INVALIDATE METADATA operation.
  4: required bool is_invalidate

  // The resulting TCatalogObjects that were added or modified, if applicable.
  5: optional list<CatalogObjects.TCatalogObject> updated_catalog_objects

  // The resulting TCatalogObjects that were removed, if applicable.
  6: optional list<CatalogObjects.TCatalogObject> removed_catalog_objects
}

// Request for executing a DDL operation (CREATE, ALTER, DROP).
struct TDdlExecRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1

  // Common header included in all CatalogService requests.
  2: optional TCatalogServiceRequestHeader header

  3: required JniCatalog.TDdlType ddl_type

  // Parameters for ALTER TABLE
  4: optional JniCatalog.TAlterTableParams alter_table_params

  // Parameters for ALTER VIEW
  5: optional JniCatalog.TCreateOrAlterViewParams alter_view_params

  // Parameters for CREATE DATABASE
  6: optional JniCatalog.TCreateDbParams create_db_params

  // Parameters for CREATE TABLE
  7: optional JniCatalog.TCreateTableParams create_table_params

  // Parameters for CREATE TABLE LIKE
  8: optional JniCatalog.TCreateTableLikeParams create_table_like_params

  // Parameters for CREATE VIEW
  9: optional JniCatalog.TCreateOrAlterViewParams create_view_params

  // Parameters for CREATE FUNCTION
  10: optional JniCatalog.TCreateFunctionParams create_fn_params

  // Parameters for DROP DATABASE
  11: optional JniCatalog.TDropDbParams drop_db_params

  // Parameters for DROP TABLE/VIEW
  12: optional JniCatalog.TDropTableOrViewParams drop_table_or_view_params

  // Parameters for TRUNCATE TABLE
  13: optional JniCatalog.TTruncateParams truncate_params

  // Parameters for DROP FUNCTION
  14: optional JniCatalog.TDropFunctionParams drop_fn_params

  // Parameters for COMPUTE STATS
  15: optional JniCatalog.TComputeStatsParams compute_stats_params

  // Parameters for CREATE DATA SOURCE
  16: optional JniCatalog.TCreateDataSourceParams create_data_source_params

  // Parameters for DROP DATA SOURCE
  17: optional JniCatalog.TDropDataSourceParams drop_data_source_params

  // Parameters for DROP STATS
  18: optional JniCatalog.TDropStatsParams drop_stats_params

  // Parameters for CREATE/DROP ROLE
  19: optional JniCatalog.TCreateDropRoleParams create_drop_role_params

  // Parameters for GRANT/REVOKE ROLE
  20: optional JniCatalog.TGrantRevokeRoleParams grant_revoke_role_params

  // Parameters for GRANT/REVOKE privilege
  21: optional JniCatalog.TGrantRevokePrivParams grant_revoke_priv_params

  // True if SYNC_DDL is set in query options
  22: required bool sync_ddl

  // Parameters for COMMENT ON
  23: optional JniCatalog.TCommentOnParams comment_on_params

  // Parameters for ALTER DATABASE
  24: optional JniCatalog.TAlterDbParams alter_db_params

  // Parameters for replaying an exported testcase.
  25: optional JniCatalog.TCopyTestCaseReq copy_test_case_params
}

// Response from executing a TDdlExecRequest
struct TDdlExecResponse {
  1: required TCatalogUpdateResult result

  // Set only for CREATE TABLE AS SELECT statements. Will be true iff the statement
  // resulted in a new table being created in the Metastore. This is used to
  // determine if a CREATE TABLE IF NOT EXISTS AS SELECT ... actually creates a new
  // table or whether creation was skipped because the table already existed, in which
  // case this flag would be false
  2: optional bool new_table_created;

  // Result of DDL operation to be returned to the client. Currently only set
  // by COMPUTE STATS and ALTER TABLE.
  3: optional Results.TResultSet result_set

  // The table/view name in HMS. Set only for CREATE TABLE, CREATE TABLE AS SELECT,
  // CREATE TABLE LIKE, and CREATE VIEW statements.
  4: optional string table_name

  // The table/view create time stored in HMS. Set only for CREATE TABLE,
  // CREATE TABLE AS SELECT, CREATE TABLE LIKE, and CREATE VIEW statements.
  5: optional i64 table_create_time

  // Set only for CREATE EXTERNAL TABLE. This is the table location from the newly
  // created table. This is useful for establishing lineage between table and it's
  // location for external tables.
  6: optional string table_location
}

// Updates the metastore with new partition information and returns a response
// with details on the result of the operation. Used to add partitions after executing
// DML operations, and could potentially be used in the future to update column stats
// after DML operations.
// TODO: Rename this struct to something more descriptive.
struct TUpdateCatalogRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1

  // True if SYNC_DDL is set in query options.
  2: required bool sync_ddl

  // Common header included in all CatalogService requests.
  3: optional TCatalogServiceRequestHeader header

  // Unqualified name of the table to change
  4: required string target_table;

  // Database that the table belongs to
  5: required string db_name;

  // List of partitions that are new and need to be created. May
  // include the root partition (represented by the empty string).
  6: required set<string> created_partitions;

  // True if the update corresponds to an "insert overwrite" operation
  7: required bool is_overwrite;

  // ACID transaction ID for transactional inserts.
  8: optional i64 transaction_id

  // ACID write ID for transactional inserts.
  9: optional i64 write_id
}

// Response from a TUpdateCatalogRequest
struct TUpdateCatalogResponse {
  1: required TCatalogUpdateResult result
}

// Parameters of REFRESH/INVALIDATE METADATA commands
struct TResetMetadataRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1

  // Common header included in all CatalogService requests.
  2: optional TCatalogServiceRequestHeader header

  // If true, refresh. Otherwise, invalidate metadata
  3: required bool is_refresh

  // Fully qualified name of the table to refresh or invalidate; not set if invalidating
  // the entire catalog
  4: optional CatalogObjects.TTableName table_name

  // If set, refreshes the specified partition, otherwise
  // refreshes the whole table
  5: optional list<CatalogObjects.TPartitionKeyValue> partition_spec

  // If set, refreshes functions in the specified database.
  6: optional string db_name

  // True if SYNC_DDL is set in query options
  7: required bool sync_ddl

  // If set, refreshes authorization metadata.
  8: optional bool authorization

  // If set, refreshes partition objects which are modified externally.
  // Applicable only when refreshing the table.
  9: optional bool refresh_updated_hms_partitions
}

// Response from TResetMetadataRequest
struct TResetMetadataResponse {
  1: required TCatalogUpdateResult result
}

// Request to GetFunctions()
struct TGetFunctionsRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1

  // Common header included in all CatalogService requests.
  3: optional TCatalogServiceRequestHeader header

  // The parent database name.
  2: optional string db_name;
}

// Response a call to GetFunctions()
struct TGetFunctionsResponse {
  // The status of the operation, OK if the operation was successful.
  1: optional Status.TStatus status

  // List of functions returned to the caller. Functions are not returned in a
  // defined order.
  2: optional list<Types.TFunction> functions;
}

// Selector for partial information about Catalog-scoped objects
// (i.e. those that are not within a particular database or table).
struct TCatalogInfoSelector {
  1: bool want_db_names
  // TODO(todd): add objects like DataSources, etc.
}

// Returned info from a catalog request which selected items in
// TCatalogInfoSelector.
struct TPartialCatalogInfo {
  1: list<string> db_names
}

// Selector for partial information about a Table.
struct TTableInfoSelector {
  // The response should include the HMS table struct.
  1: bool want_hms_table

  // If set, the response should include information about the given list of
  // partitions. If this is unset, information about all partitions will be
  // returned, so long as at least one of the following 'want_partition_*'
  // flags is specified.
  //
  // If a partition ID is passed, but that partition does not exist in the
  // table, then an exception will be thrown. It is assumed that the partition
  // IDs passed here are a result of a prior successful call to fetch the partition
  // list of this table.
  //
  // NOTE: "unset" and "set to empty" are different -- "set to empty" causes
  // no partitions to be returned, whereas "unset" causes all partitions to be
  // returned, so long as one of the following 'want_partition_*' is set.
  2: optional list<i64> partition_ids

  // ... each such partition should include its name.
  3: bool want_partition_names

  // ... each such partition should include metadata (location, etc).
  4: bool want_partition_metadata

  // ... each such partition should include its file info
  5: bool want_partition_files

  // List of columns to fetch stats for.
  6: optional list<string> want_stats_for_column_names

  // ... each partition should include the partition stats serialized as a byte[]
  // and that is deflate-compressed.
  7: bool want_partition_stats

  // The response should contain table constraints like primary keys
  // and foreign keys
  8: bool want_table_constraints

  // If this is for a ACID table and this is set, this table info returned
  // will be consistent the provided valid_write_ids
  9: optional CatalogObjects.TValidWriteIdList valid_write_ids

  // If the table id is provided the catalog service compares this table id
  // with the HMS table which it has and triggers a reload in case it doesn't match.
  // this field is only used when valid_write_ids is set, otherwise it is ignored
  10: optional i64 table_id = -1
}

// Returned information about a particular partition.
struct TPartialPartitionInfo {
  1: required i64 id

  // Set if 'want_partition_names' was set in TTableInfoSelector.
  2: optional string name

  // Set if 'want_partition_metadata' was set in TTableInfoSelector.
  3: optional hive_metastore.Partition hms_partition

  // Set if 'want_partition_files' was set in TTableInfoSelector.
  4: optional list<CatalogObjects.THdfsFileDesc> file_descriptors

  // Set if 'want_partition_files' was set in TTableInfoSelector.
  8: optional list<CatalogObjects.THdfsFileDesc> insert_file_descriptors

  // Set if 'want_partition_files' was set in TTableInfoSelector.
  9: optional list<CatalogObjects.THdfsFileDesc> delete_file_descriptors

  // Deflate-compressed byte[] representation of TPartitionStats for this partition.
  // Set if 'want_partition_stats' was set in TTableInfoSelector. Not set if the
  // partition does not have stats.
  5: optional binary partition_stats

  // Set to true if the partition contains intermediate column stats computed via
  // incremental statistics. Set when 'want_partition_metadata' is true in
  // TTableInfoSelector. Incremental stats data can be fetched by setting
  // 'want_partition_stats' in TTableInfoSelector.
  6: optional bool has_incremental_stats

  // Set to true if the partition is marked as cached by hdfs caching. Does not
  // necessarily mean the data is cached. Set when 'want_partition_metadata' is true in
  // TTableInfoSelector.
  7: optional bool is_marked_cached
}

// Returned information about a Table, as selected by TTableInfoSelector.
struct TPartialTableInfo {
  1: optional hive_metastore.Table hms_table

  // The partition metadata for the requested partitions.
  //
  // If explicit partitions were passed, then it is guaranteed that this list
  // is the same size and the same order as the requested list of IDs.
  //
  // See TPartialPartitionInfo for details on which fields will be set based
  // on the caller-provided selector.
  2: optional list<TPartialPartitionInfo> partitions

  3: optional list<hive_metastore.ColumnStatisticsObj> column_stats

  // Set if this table needs storage access during metadata load.
  // Time used for storage loading in nanoseconds.
  4: optional i64 storage_metadata_load_time_ns

  // Each TNetworkAddress is a datanode which contains blocks of a file in the table.
  // Used so that each THdfsFileBlock can just reference an index in this list rather
  // than duplicate the list of network address, which helps reduce memory usage.
  // Only used when partition files are fetched.
  7: optional list<Types.TNetworkAddress> network_addresses

  // SqlConstraints for the table, small enough that we can
  // return them wholesale.
  8: optional SqlConstraints.TSqlConstraints sql_constraints

  // Valid write id list of ACID table.
  9: optional CatalogObjects.TValidWriteIdList valid_write_ids;

  // Set if this table is marked as cached by hdfs caching. Does not necessarily mean the
  // data is cached or that all/any partitions are cached. Only used in analyzing DDLs.
  10: optional bool is_marked_cached
}

struct TBriefTableMeta {
  // Name of the table
  1: required string name

  // HMS table type of the table: EXTERNAL_TABLE, MANAGED_TABLE, VIRTUAL_VIEW, etc.
  // Unset if the table is unloaded.
  2: optional string msType

  // Comment(remark) of the table. Unset if the table is unloaded.
  3: optional string comment
}

// Selector for partial information about a Database.
struct TDbInfoSelector {
  // The response should include the HMS Database object.
  1: bool want_hms_database

  // The response should include TBriefTableMeta of tables in the DB.
  2: bool want_brief_meta_of_tables

  // The response should include the list of function names in the DB.
  3: bool want_function_names
}

// Returned information about a Database, as selected by TDbInfoSelector.
struct TPartialDbInfo {
  1: optional hive_metastore.Database hms_database
  2: optional list<TBriefTableMeta> brief_meta_of_tables
  3: optional list<string> function_names
}

// RPC request for GetPartialCatalogObject.
struct TGetPartialCatalogObjectRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1

  // A catalog object descriptor: a TCatalogObject with the object name and type fields
  // set. This may be a TABLE, DB, CATALOG, or FUNCTION. The selectors below can
  // further restrict what information should be returned.
  2: required CatalogObjects.TCatalogObject object_desc

  3: optional TTableInfoSelector table_info_selector
  4: optional TDbInfoSelector db_info_selector
  5: optional TCatalogInfoSelector catalog_info_selector
}

enum CatalogLookupStatus {
  OK,
  DB_NOT_FOUND,
  TABLE_NOT_FOUND,
  TABLE_NOT_LOADED,
  FUNCTION_NOT_FOUND,
  // Partial fetch RPCs currently look up partitions by IDs instead of names. These IDs
  // change over the lifetime of a table with queries like invalidate metadata. In such
  // cases this lookup status is set and the caller can retry the fetch.
  // TODO: Fix partition lookup logic to not do it with IDs.
  PARTITION_NOT_FOUND
}

// RPC response for GetPartialCatalogObject.
struct TGetPartialCatalogObjectResponse {
  // The status of the operation, OK if the operation was successful.
  // Unset indicates "OK".
  1: optional Status.TStatus status

  // Catalog-specific error codes (eg if the object no longer exists).
  2: optional CatalogLookupStatus lookup_status = CatalogLookupStatus.OK

  3: optional i64 object_version_number
  4: optional TPartialTableInfo table_info
  5: optional TPartialDbInfo db_info
  6: optional TPartialCatalogInfo catalog_info

  // Functions are small enough that we return them wholesale.
  7: optional list<Types.TFunction> functions
}


// Request the complete metadata for a given catalog object. May trigger a metadata load
// if the object is not already in the catalog cache.
struct TGetCatalogObjectRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1

  // Common header included in all CatalogService requests.
  3: optional TCatalogServiceRequestHeader header

  // A catalog object descriptor: a TCatalogObject with the object name and type fields
  // set.
  2: required CatalogObjects.TCatalogObject object_desc
}

// Response from TGetCatalogObjectRequest
struct TGetCatalogObjectResponse {
  1: required CatalogObjects.TCatalogObject catalog_object
}

// Request the partition statistics for the specified table.
struct TGetPartitionStatsRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1
  2: required CatalogObjects.TTableName table_name
  // if the table is transactional then this field represents the client's view
  // of the table snapshot view in terms of ValidWriteIdList.
  3: optional CatalogObjects.TValidWriteIdList valid_write_ids
  // If the table id is provided the catalog service compares this table id
  // with the HMS table which it has and triggers a reload in case it doesn't match.
  // this field is only used when valid_write_ids is set, otherwise it is ignored
  4: optional i64 table_id = -1
}

// Response for requesting partition statistics. All partition statistics
// are returned. If a partition does not have statistics, it is not returned.
// Partitions are identified by name, consisting of partition column name/value pairs.
// The returned statistics are deflate-compressed bytes that represent
// CatalogObject.TPartitionStats when decompressed.
// An OK or null status means that the call succeeded.
// If there was an error, an error status is returned and partition_stats
// is left unset.
struct TGetPartitionStatsResponse {
  1: optional Status.TStatus status
  2: optional map<string, binary> partition_stats
}

// Instructs the Catalog Server to prioritizing loading of metadata for the specified
// catalog objects. Currently only used for controlling the priority of loading
// tables/views since Db/Function metadata is loaded on startup.
struct TPrioritizeLoadRequest {
  1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V1

  // Common header included in all CatalogService requests.
  2: optional TCatalogServiceRequestHeader header

  // A list of catalog objects descriptors for which to prioritize loading. A catalog
  // object descriptor is a TCatalogObject with only the object name and type fields set.
  3: required list<CatalogObjects.TCatalogObject> object_descs
}

struct TPrioritizeLoadResponse {
  // The status of the operation, OK if the operation was successful.
  1: required Status.TStatus status
}

struct TTableUsage {
  1: required CatalogObjects.TTableName table_name
  // count of usages since the last report
  2: required i32 num_usages
}

struct TUpdateTableUsageRequest {
  1: required list<TTableUsage> usages
}

struct TUpdateTableUsageResponse {
  // The operation may fail if the catalogd is in a bad state or if there is a bug.
  1: optional Status.TStatus status
}

// The CatalogService API
service CatalogService {
  // Executes a DDL request and returns details on the result of the operation.
  TDdlExecResponse ExecDdl(1: TDdlExecRequest req);

  // Gets the catalog object corresponding to the given request.
  TGetCatalogObjectResponse GetCatalogObject(1: TGetCatalogObjectRequest req);

  // Gets the statistics that are associated with table partitions.
  TGetPartitionStatsResponse GetPartitionStats(1: TGetPartitionStatsRequest req);

  // Resets the Catalog metadata. Used to explicitly trigger reloading of the Hive
  // Metastore metadata and/or HDFS block location metadata.
  TResetMetadataResponse ResetMetadata(1: TResetMetadataRequest req);

  // Updates the metastore with new partition information and returns a response
  // with details on the result of the operation.
  TUpdateCatalogResponse UpdateCatalog(1: TUpdateCatalogRequest req);

  // Gets all user defined functions (aggregate and scalar) in the catalog matching
  // the parameters of TGetFunctionsRequest.
  TGetFunctionsResponse GetFunctions(1: TGetFunctionsRequest req);

  // Prioritize the loading of metadata for the CatalogObjects specified in the
  // TPrioritizeLoadRequest.
  TPrioritizeLoadResponse PrioritizeLoad(1: TPrioritizeLoadRequest req);

  // Fetch partial information about some object in the catalog.
  TGetPartialCatalogObjectResponse GetPartialCatalogObject(
      1: TGetPartialCatalogObjectRequest req);

  // Update recently used tables and their usage counts in an impalad since the last
  // report.
  TUpdateTableUsageResponse UpdateTableUsage(1: TUpdateTableUsageRequest req);
}
