blob: 032bfb5c820f9274ef1655adb2fccccc0d3b9155 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_SERVICE_QUERY_OPTIONS_H
#define IMPALA_SERVICE_QUERY_OPTIONS_H
#include <string>
#include <map>
#include <unordered_map>
#include <bitset>
#include "common/status.h"
/// Utility methods to process per-query options
namespace impala {
class TQueryOptions;
// Maps query option names to option levels used for displaying the query
// options via SET and SET ALL
typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
QueryOptionLevels;
// Macro to help generate functions that use or manipulate query options.
// If the DCHECK is hit then handle the missing query option below and update
// the DCHECK.
// Specifically, the DCHECK will make sure that the number of elements in
// the map _TImpalaQueryOptions_VALUES_TO_NAMES automatically generated in
// ImpalaService_types.cpp is equal to the largest integer associated with an
// option in the enum TImpalaQueryOptions (defined in ImpalaService.thrift)
// plus one. Thus, the second argument to the DCHECK has to be updated every
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
#define QUERY_OPTS_TABLE\
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
TImpalaQueryOptions::BROADCAST_BYTES_LIMIT + 1);\
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
QUERY_OPT_FN(batch_size, BATCH_SIZE, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(debug_action, DEBUG_ACTION, TQueryOptionLevel::DEVELOPMENT)\
REMOVED_QUERY_OPT_FN(default_order_by_limit, DEFAULT_ORDER_BY_LIMIT)\
REMOVED_QUERY_OPT_FN(disable_cached_reads, DISABLE_CACHED_READS)\
QUERY_OPT_FN(disable_outermost_topn, DISABLE_OUTERMOST_TOPN,\
TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(disable_codegen, DISABLE_CODEGEN, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(explain_level, EXPLAIN_LEVEL, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(hbase_cache_blocks, HBASE_CACHE_BLOCKS, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(hbase_caching, HBASE_CACHING, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(max_errors, MAX_ERRORS, TQueryOptionLevel::ADVANCED)\
REMOVED_QUERY_OPT_FN(max_io_buffers, MAX_IO_BUFFERS)\
QUERY_OPT_FN(max_scan_range_length, MAX_SCAN_RANGE_LENGTH,\
TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(mem_limit, MEM_LIMIT, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(num_nodes, NUM_NODES, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(num_scanner_threads, NUM_SCANNER_THREADS, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(compression_codec, COMPRESSION_CODEC, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(parquet_file_size, PARQUET_FILE_SIZE, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(request_pool, REQUEST_POOL, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(reservation_request_timeout, RESERVATION_REQUEST_TIMEOUT)\
QUERY_OPT_FN(sync_ddl, SYNC_DDL, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(v_cpu_cores, V_CPU_CORES)\
REMOVED_QUERY_OPT_FN(rm_initial_mem, RM_INITIAL_MEM)\
QUERY_OPT_FN(query_timeout_s, QUERY_TIMEOUT_S, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(buffer_pool_limit, BUFFER_POOL_LIMIT, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(appx_count_distinct, APPX_COUNT_DISTINCT, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(disable_unsafe_spills, DISABLE_UNSAFE_SPILLS, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(seq_compression_mode, SEQ_COMPRESSION_MODE)\
QUERY_OPT_FN(exec_single_node_rows_threshold, EXEC_SINGLE_NODE_ROWS_THRESHOLD,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(optimize_partition_key_scans, OPTIMIZE_PARTITION_KEY_SCANS,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(replica_preference, REPLICA_PREFERENCE, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(schedule_random_replica, SCHEDULE_RANDOM_REPLICA,\
TQueryOptionLevel::ADVANCED)\
REMOVED_QUERY_OPT_FN(scan_node_codegen_threshold, SCAN_NODE_CODEGEN_THRESHOLD)\
QUERY_OPT_FN(disable_streaming_preaggregations, DISABLE_STREAMING_PREAGGREGATIONS,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(runtime_filter_mode, RUNTIME_FILTER_MODE, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(runtime_bloom_filter_size, RUNTIME_BLOOM_FILTER_SIZE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(runtime_filter_wait_time_ms, RUNTIME_FILTER_WAIT_TIME_MS,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(disable_row_runtime_filtering, DISABLE_ROW_RUNTIME_FILTERING,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(max_num_runtime_filters, MAX_NUM_RUNTIME_FILTERS,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(parquet_annotate_strings_utf8, PARQUET_ANNOTATE_STRINGS_UTF8,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(parquet_fallback_schema_resolution, PARQUET_FALLBACK_SCHEMA_RESOLUTION,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(mt_dop, MT_DOP, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(s3_skip_insert_staging, S3_SKIP_INSERT_STAGING,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(runtime_filter_min_size, RUNTIME_FILTER_MIN_SIZE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(runtime_filter_max_size, RUNTIME_FILTER_MAX_SIZE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(prefetch_mode, PREFETCH_MODE, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(strict_mode, STRICT_MODE, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(scratch_limit, SCRATCH_LIMIT, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(enable_expr_rewrites, ENABLE_EXPR_REWRITES, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(decimal_v2, DECIMAL_V2, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(parquet_dictionary_filtering, PARQUET_DICTIONARY_FILTERING,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(parquet_array_resolution, PARQUET_ARRAY_RESOLUTION,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(parquet_read_statistics, PARQUET_READ_STATISTICS,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(default_join_distribution_mode, DEFAULT_JOIN_DISTRIBUTION_MODE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(disable_codegen_rows_threshold, DISABLE_CODEGEN_ROWS_THRESHOLD,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(default_spillable_buffer_size, DEFAULT_SPILLABLE_BUFFER_SIZE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(min_spillable_buffer_size, MIN_SPILLABLE_BUFFER_SIZE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(max_row_size, MAX_ROW_SIZE, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(idle_session_timeout, IDLE_SESSION_TIMEOUT, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(compute_stats_min_sample_size, COMPUTE_STATS_MIN_SAMPLE_SIZE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(exec_time_limit_s, EXEC_TIME_LIMIT_S, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(shuffle_distinct_exprs, SHUFFLE_DISTINCT_EXPRS,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(max_mem_estimate_for_admission, MAX_MEM_ESTIMATE_FOR_ADMISSION,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(thread_reservation_limit, THREAD_RESERVATION_LIMIT,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(thread_reservation_aggregate_limit, THREAD_RESERVATION_AGGREGATE_LIMIT,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(kudu_read_mode, KUDU_READ_MODE, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(allow_erasure_coded_files, ALLOW_ERASURE_CODED_FILES,\
TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(timezone, TIMEZONE, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(scan_bytes_limit, SCAN_BYTES_LIMIT,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(cpu_limit_s, CPU_LIMIT_S, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(topn_bytes_limit, TOPN_BYTES_LIMIT, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(client_identifier, CLIENT_IDENTIFIER, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(resource_trace_ratio, RESOURCE_TRACE_RATIO, TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(num_remote_executor_candidates, NUM_REMOTE_EXECUTOR_CANDIDATES,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(num_rows_produced_limit, NUM_ROWS_PRODUCED_LIMIT,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(\
planner_testcase_mode, PLANNER_TESTCASE_MODE, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(default_file_format, DEFAULT_FILE_FORMAT, TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(parquet_timestamp_type, PARQUET_TIMESTAMP_TYPE,\
TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(parquet_read_page_index, PARQUET_READ_PAGE_INDEX,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(parquet_write_page_index, PARQUET_WRITE_PAGE_INDEX,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(parquet_page_row_count_limit, PARQUET_PAGE_ROW_COUNT_LIMIT,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(disable_hdfs_num_rows_estimate, DISABLE_HDFS_NUM_ROWS_ESTIMATE,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(default_hints_insert_statement, DEFAULT_HINTS_INSERT_STATEMENT,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(spool_query_results, SPOOL_QUERY_RESULTS,\
TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(default_transactional_type, DEFAULT_TRANSACTIONAL_TYPE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(statement_expression_limit, STATEMENT_EXPRESSION_LIMIT,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(max_statement_length_bytes, MAX_STATEMENT_LENGTH_BYTES,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(disable_data_cache, DISABLE_DATA_CACHE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(max_result_spooling_mem, MAX_RESULT_SPOOLING_MEM,\
TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(max_spilled_result_spooling_mem, MAX_SPILLED_RESULT_SPOOLING_MEM,\
TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(disable_hbase_num_rows_estimate, DISABLE_HBASE_NUM_ROWS_ESTIMATE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(fetch_rows_timeout_ms, FETCH_ROWS_TIMEOUT_MS,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(now_string, NOW_STRING, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(parquet_object_store_split_size, PARQUET_OBJECT_STORE_SPLIT_SIZE,\
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(mem_limit_executors, MEM_LIMIT_EXECUTORS, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(broadcast_bytes_limit, BROADCAST_BYTES_LIMIT, TQueryOptionLevel::ADVANCED)
;
/// Enforce practical limits on some query options to avoid undesired query state.
static const int64_t SPILLABLE_BUFFER_LIMIT = 1LL << 40; // 1 TB
static const int64_t ROW_SIZE_LIMIT = 1LL << 40; // 1 TB
/// Limits on the query size are intended to be large. Prevent them from being set
/// to small values (which can prevent clients from executing anything).
static const int32_t MIN_STATEMENT_EXPRESSION_LIMIT = 1 << 10; // 1024
static const int32_t MIN_MAX_STATEMENT_LENGTH_BYTES = 1 << 10; // 1 KB
/// Converts a TQueryOptions struct into a map of key, value pairs. Options that
/// aren't set and lack defaults in common/thrift/ImpalaInternalService.thrift are
/// mapped to the empty string.
void TQueryOptionsToMap(const TQueryOptions& query_options,
std::map<std::string, std::string>* configuration);
/// Returns a comma-delimted string of the contents of query_options. The output does not
/// contain key-value pairs where the value matches the default value specified in the
/// TQueryOptions definition (regardless of whether or not it was explicitly or
/// implicitly set to the default value).
std::string DebugQueryOptions(const TQueryOptions& query_options);
/// Bitmask for the values of TQueryOptions.
/// TODO: Find a way to set the size based on the number of fields.
typedef std::bitset<128> QueryOptionsMask;
/// Updates the query options in dst from those in src where the query option is set
/// (i.e. src->__isset.PROPERTY is true) and the corresponding bit in mask is set. If
/// mask has no set bits, no options are set. If all bits are set, then all options
/// that were set on src are copied to dst.
void OverlayQueryOptions(const TQueryOptions& src, const QueryOptionsMask& mask,
TQueryOptions* dst);
/// Set the key/value pair in TQueryOptions. It will override existing setting in
/// query_options. The bit corresponding to query option 'key' in set_query_options_mask
/// is set. An empty string value will reset the key to its default value.
Status SetQueryOption(const std::string& key, const std::string& value,
TQueryOptions* query_options, QueryOptionsMask* set_query_options_mask);
/// Validates the query options after they have all been set. Returns a Status indicating
/// the results of running the validation rules. The majority of the query options
/// validation is done in SetQueryOption. However, more complex validations rules (e.g.
/// validating that one config is greater than another config) are run here.
Status ValidateQueryOptions(TQueryOptions* query_options);
/// Parse a "," separated key=value pair of query options and set it in 'query_options'.
/// If the same query option is specified more than once, the last one wins. The
/// set_query_options_mask bitmask is updated to reflect the query options which were
/// set. Returns an error status containing an error detail for any invalid options (e.g.
/// bad format or invalid query option), but all valid query options are still handled.
Status ParseQueryOptions(const std::string& options, TQueryOptions* query_options,
QueryOptionsMask* set_query_options_mask);
/// Based on the query option levels provided to QUERY_OPT_FN macro this function
/// populates the received QueryOptionLevels map with (option name -> option level)
/// entries.
void PopulateQueryOptionLevels(QueryOptionLevels* query_option_levels);
}
#endif