blob: 40841b569d87379bc2b968b6f2ed7e8d05a7ade4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file contains global flags, ie, flags which don't belong to a particular
// component (and would therefore need to be DEFINE'd in every source file containing
// a main()), or flags that are referenced from multiple places and having them here
// calms the linker errors that would otherwise ensue.
#include <string>
#include "common/constant-strings.h"
#include "common/logging.h"
#include "gutil/strings/substitute.h"
#include "common/names.h"
// This will be defaulted to the host name returned by the OS.
// This name is used in the principal generated for Kerberos authorization.
DEFINE_string(hostname, "", "Hostname to use for this daemon, also used as part of "
"the Kerberos principal, if enabled. If not set, the system default will be"
" used");
DEFINE_int32(be_port, 22000,
"port on which thrift based ImpalaInternalService is exported");
DEFINE_int32(krpc_port, 27000,
"port on which KRPC based ImpalaInternalService is exported");
// Kerberos is enabled if and only if principal is set.
DEFINE_string(principal, "", "Kerberos principal. If set, both client and backend "
"network connections will use Kerberos encryption and authentication. Kerberos will "
"not be used for internal or external connections if this is not set.");
DEFINE_string(be_principal, "", "Kerberos principal for backend network connections only,"
"overriding --principal if set. Must not be set if --principal is not set.");
DEFINE_string(keytab_file, "", "Absolute path to Kerberos keytab file");
DEFINE_string(krb5_ccname, "/tmp/krb5cc_impala_internal", "Absolute path to the file "
"based credentials cache that we pass to the KRB5CCNAME environment variable.");
DEFINE_string(krb5_conf, "", "Absolute path to Kerberos krb5.conf if in a non-standard "
"location. Does not normally need to be set.");
DEFINE_string(krb5_debug_file, "", "Turn on Kerberos debugging and output to this file");
static const string mem_limit_help_msg = "Limit on process memory consumption. "
"Includes the JVM's memory consumption only if --mem_limit_includes_jvm is true. "
+ Substitute(MEM_UNITS_HELP_MSG, "the physical memory");
DEFINE_string(mem_limit, "80%", mem_limit_help_msg.c_str());
DEFINE_bool(mem_limit_includes_jvm, false,
"If true, --mem_limit will include the JVM's max heap size and committed memory in "
"the process memory limit.");
static const string buffer_pool_limit_help_msg = "(Advanced) Limit on buffer pool size. "
+ Substitute(MEM_UNITS_HELP_MSG, "the process memory limit (minus the JVM heap if "
"--mem_limit_includes_jvm is true)") + " "
"The default value and behaviour of this flag may change between releases.";
DEFINE_string(buffer_pool_limit, "85%", buffer_pool_limit_help_msg.c_str());
static const string buffer_pool_clean_pages_limit_help_msg = "(Advanced) Limit on bytes "
"of clean pages that will be accumulated in the buffer pool. "
+ Substitute(MEM_UNITS_HELP_MSG, "the buffer pool limit") + ".";
DEFINE_string(buffer_pool_clean_pages_limit, "10%",
DEFINE_int64(min_buffer_size, 8 * 1024,
"(Advanced) The minimum buffer size to use in the buffer pool");
DEFINE_bool(enable_process_lifetime_heap_profiling, false, "(Advanced) Enables heap "
"profiling for the lifetime of the process. Profile output will be stored in the "
"directory specified by -heap_profile_dir. Enabling this option will disable the "
"on-demand/remote server profile handlers.");
DEFINE_string(heap_profile_dir, "", "Output directory to store heap profiles. If not set "
"profiles are stored in the current working directory.");
DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, 0, "(Advanced) Bound on the total "
"amount of bytes allocated to TCMalloc thread caches. If left at 0 (default), use "
"the default value in TCMalloc library.");
DEFINE_bool(abort_on_config_error, true, "Abort Impala startup if there are improper "
"configs or running on unsupported hardware.");
DEFINE_bool(compact_catalog_topic, true, "If true, catalog updates sent via the "
"statestore are compacted before transmission. This saves network bandwidth at the"
" cost of a small quantity of CPU time. Enable this option in cluster with large"
" catalogs. It must be enabled on both the catalog service, and all Impala demons.");
DEFINE_string(redaction_rules_file, "", "Absolute path to sensitive data redaction "
"rules. The rules will be applied to all log messages and query text shown in the "
"Web UI and audit records. Query results will not be affected. Refer to the "
"documentation for the rule file format.");
DEFINE_bool(enable_minidumps, true, "Whether to enable minidump generation upon process "
"crash or SIGUSR1.");
DEFINE_string(minidump_path, "minidumps", "Directory to write minidump files to. This "
"can be either an absolute path or a path relative to log_dir. Each daemon will "
"create an additional sub-directory to prevent naming conflicts and to make it "
"easier to identify a crashing daemon. Minidump files contain crash-related "
"information in a compressed format and will be written when a daemon exits "
"unexpectedly, for example on an unhandled exception or signal. It is also possible "
"to create minidumps on demand without exiting the process by sending SIGUSR1. "
"Set to empty to disable writing minidump files.");
DEFINE_int32(max_minidumps, 9, "Maximum number of minidump files to keep per daemon. "
"Older files are removed first. Set to 0 to keep all minidump files.");
DEFINE_int32(minidump_size_limit_hint_kb, 20480, "Size limit hint for minidump files in "
"KB. If a minidump exceeds this value, then breakpad will reduce the stack memory it "
"collects for each thread from 8KB to 2KB. However it will always include the full "
"stack memory for the first 20 threads, including the thread that crashed.");
DEFINE_bool(load_auth_to_local_rules, false, "If true, load auth_to_local configuration "
"from hdfs' core-site.xml. When enabled, impalad reads the rules from the property "
" and applies them to translate the Kerberos principal "
"to its corresponding local user name for authorization.");
// Stress options that are only enabled in debug builds for testing.
#ifndef NDEBUG
DEFINE_int32(stress_fn_ctx_alloc, 0, "A stress option which causes memory allocations "
"in function contexts to fail once every n allocations where n is the value of this "
"flag. Effective in debug builds only.");
DEFINE_int32(stress_datastream_recvr_delay_ms, 0, "A stress option that causes data "
"stream receiver registration to be delayed. Effective in debug builds only.");
DEFINE_bool(skip_file_runtime_filtering, false, "Skips file-based runtime filtering for"
"testing purposes. Effective in debug builds only.");
DEFINE_int32(fault_injection_rpc_exception_type, 0, "A fault injection option that "
"specifies the exception to be thrown in the caller side of an RPC call. Effective "
"in debug builds only");
DEFINE_int32(stress_scratch_write_delay_ms, 0, "A stress option which causes writes to "
"scratch files to be to be delayed to simulate slow writes.");
DEFINE_bool(thread_creation_fault_injection, false, "A fault injection option that "
" causes calls to Thread::Create() to fail randomly 1% of the time on eligible "
" codepaths. Effective in debug builds only.");
DEFINE_int32(stress_catalog_init_delay_ms, 0, "A stress option that injects extra delay"
" in milliseconds when initializing an impalad's local catalog replica. Delay <= 0"
" inject no delay.");
DEFINE_int32(stress_disk_read_delay_ms, 0, "A stress option that injects extra delay"
" in milliseconds when the I/O manager is reading from disk.");
DEFINE_string(debug_actions, "", "For testing only. Uses the same format as the debug "
"action query options, but allows for injection of debug actions in code paths where "
"query options are not available.");
// Used for testing the path where the Kudu client is stubbed.
DEFINE_bool(disable_kudu, false, "If true, Kudu features will be disabled.");
// Timeout (ms) used in the FE for admin and metadata operations (set on the KuduClient),
// and in the BE for scans and writes (set on the KuduScanner and KuduSession
// accordingly).
DEFINE_int32(kudu_operation_timeout_ms, 3 * 60 * 1000, "Timeout (milliseconds) set for "
"all Kudu operations. This must be a positive value, and there is no way to disable "
static const int32 default_kudu_client_rpc_timeout_ms = 60000;
static const int32 default_kudu_client_rpc_timeout_ms = 0;
// Timeout (ms) for Kudu rpcs set in the BE on the KuduClient.
DEFINE_int32(kudu_client_rpc_timeout_ms, default_kudu_client_rpc_timeout_ms,
"(Advanced) Timeout (milliseconds) set for individual Kudu client rpcs. An operation "
"may consist of several rpcs, so this is expected to be less than "
"kudu_operation_timeout_ms. This must be a positive value or it will be ignored and "
"Kudu's default of 10s will be used. There is no way to disable timeouts.");
DEFINE_int64(inc_stats_size_limit_bytes, 200 * (1LL<<20), "Maximum size of "
"incremental stats the catalog is allowed to serialize per table. "
"This limit is set as a safety check, to prevent the JVM from "
"hitting a maximum array limit of 1GB (or OOM) while building "
"the thrift objects to send to impalads. By default, it's set to 200MB");
DEFINE_bool(enable_stats_extrapolation, false,
"If true, uses table statistics computed with COMPUTE STATS "
"to extrapolate the row counts of partitions.");
DEFINE_string(log_filename, "",
"Prefix of log filename - "
"full path is <log_dir>/<log_filename>.[INFO|WARN|ERROR|FATAL]");
DEFINE_bool(redirect_stdout_stderr, true,
"If true, redirects stdout/stderr to INFO/ERROR log.");
DEFINE_int32(max_log_files, 10, "Maximum number of log files to retain per severity "
"level. The most recent log files are retained. If set to 0, all log files are "
// The read size is the preferred size of the reads issued to HDFS or the local FS.
// There is a trade off of latency and throughput, trying to keep disks busy but
// not introduce seeks. The literature seems to agree that with 8 MB reads, random
// io and sequential io perform similarly.
DEFINE_int32(read_size, 8 * 1024 * 1024, "(Advanced) The preferred I/O request size in "
"bytes to issue to HDFS or the local filesystem. Increasing the read size will "
"increase memory requirements. Decreasing the read size may decrease I/O "
DEFINE_string(reserved_words_version, "3.0.0", "Reserved words compatibility version. "
"Reserved words cannot be used as identifiers in SQL. This flag determines the impala"
" version from which the reserved word list is taken. The value must be one of "
"[\"2.11.0\", \"3.0.0\"].");
DEFINE_bool_hidden(disable_catalog_data_ops_debug_only, false,
"Disable catalog operations that require access to file-system data blocks. "
"Examples are when catalog reads data blocks to load avro schemas and copy jars."
"Use only for testing/debugging, not in deployed clusters.");
// TODO: this flag and others, since it requires multiple daemons to be set the
// same way, is error prone. One fix for this flag is to set it only on
// catalogd, propagate the setting as a property of the Catalog object, and let
// impalad uses act on this setting.
DEFINE_int32(invalidate_tables_timeout_s, 0, "If a table has not been referenced in a "
"SQL statement for more than the configured amount of time, the catalog server will "
"automatically evict its cached metadata about this table. This has the same effect "
"as a user-initiated \"INVALIDATE METADATA\" statement on the table. Configuring "
"this to 0 disables time-based automatic invalidation of tables. This is independent "
"from memory-based invalidation configured by invalidate_tables_on_memory_pressure. "
"To enable this feature, a non-zero flag must be applied to both catalogd and "
DEFINE_bool(invalidate_tables_on_memory_pressure, false, "Configure catalogd to "
"invalidate recently unused tables when the old GC generation is almost full. This "
"is independent from time-based invalidation configured by "
"invalidate_table_timeout_s. To enable this feature, a true flag must be applied to "
"both catalogd and impalad.");
DEFINE_int32(hms_event_polling_interval_s, 0,
"Configure catalogd to invalidate cached table metadata based on metastore events. "
"These metastore events could be generated by external systems like Apache Hive or "
"a different Impala cluster using the same Hive metastore server as this one. "
"A non-zero value of this flag sets the polling interval of catalogd in seconds to "
"fetch new metastore events. A value of zero disables this feature. When enabled, "
"this flag has the same effect as \"INVALIDATE METADATA\" statement on the table "
"for certain metastore event types. Additionally, in case of events which detect "
"creation or removal of objects from metastore, catalogd adds or removes such "
"objects from its cached metadata. This feature is independent of time and memory "
"based automatic invalidation of tables. Note that this is still an experimental "
"feature and not recommended to be deployed on production systems until it is "
"made generally available.");
DEFINE_string(blacklisted_dbs, "sys,information_schema",
"Comma separated list for blacklisted databases. Configure which databases to be "
"skipped for loading (in startup and global INVALIDATE METADATA). Users can't access,"
" create, or drop databases which are blacklisted.");
DEFINE_string(blacklisted_tables, "",
"Comma separated full names (in format: <db>.<table>) of blacklisted tables. "
"Configure which tables to be skipped for loading (in startup and reseting metadata "
"of the table). Users can't access, create, or drop tables which are blacklisted");
DEFINE_double_hidden(invalidate_tables_gc_old_gen_full_threshold, 0.6, "The threshold "
"above which CatalogdTableInvalidator would consider the old generation to be almost "
"full and trigger an invalidation on recently unused tables");
DEFINE_double_hidden(invalidate_tables_fraction_on_memory_pressure, 0.1,
"The fraction of tables to invalidate when CatalogdTableInvalidator considers the "
"old GC generation to be almost full.");
DEFINE_bool_hidden(unlock_mt_dop, false,
"(Experimental) If true, allow specifying mt_dop for all queries.");
DEFINE_bool_hidden(mt_dop_auto_fallback, false,
"(Experimental) If true, fall back to non-mt_dop if mt_dop query option is set and "
"a query does not support it. Has no effect if --unlock_mt_dop is true.");
DEFINE_bool_hidden(recursively_list_partitions, true,
"If true, recursively list the content of partition directories.");
DEFINE_bool(unlock_zorder_sort, false,
"(Experimental) If true, enables using ZORDER option for SORT BY.");
DEFINE_string(min_privilege_set_for_show_stmts, "any",
"Comma separated list of privileges. Any one of them is required to show a database "
"or table. Defaults to \"any\" which means if the user has any privilege (CREATE, "
"SELECT, INSERT, etc) on a database or table, the database/table is visible in the "
"results of SHOW DATABASES/TABLES. If set to \"select\", only dbs/tables on which "
"the user has SELECT privilege will be shown. If set to \"select,insert\", only "
"dbs/tables on which the user has SELECT or INSERT privilege will be shown. In "
"practice, this flag can be set to \"select\" or \"select,insert\" to improve "
"performance of SHOW DATABASES/TABLES and GET_SCHEMAS/GET_TABLES, especially when "
"using Sentry and having thousands of candidate dbs/tables to be checked with a "
"user with large scale of privileges. No significant performance gain when using "
// Set the slow RPC threshold to 2 minutes to avoid false positives (since TransmitData
// RPCs can take some time to process).
DEFINE_int64(impala_slow_rpc_threshold_ms, 2 * 60 * 1000,
"(Advanced) Threshold for considering Impala internal RPCs to be unusually slow. "
"Slow RPCs trigger additional logging and other diagnostics. Lowering this value "
"may result in false positives"
"This overrides KRPC's --rpc_duration_too_long_ms setting.");
DEFINE_int32(num_check_authorization_threads, 1,
"The number of threads used to check authorization for the user when executing show "
"tables/databases. This configuration is applicable only when authorization is "
"enabled. A value of 1 disables multi-threaded execution for checking authorization."
"However, a small value of larger than 1 may limit the parallism of FE requests when "
"checking authorization with a high concurrency. The value must be in the range of "
"1 to 128.");
// ++========================++
// || Startup flag graveyard ||
// ++========================++
// -----------
// -----------/ R I P ╲
// / R I P ╲ -----------|-----------
// |-----------| |/ R I P ╲
// | | LLAMA ||-----------|
// | Old Aggs | || |
// | | -- || Old Joins |
// | -- | || |
// | | || -- |
// | |~.~~.~~.~~~~| |
// ~~.~~.~~.~~~~ | |
// ~~.~~.~~.~~~~
// The flags have no effect but we don't want to prevent Impala from starting when they
// are provided on the command line after an upgrade. We issue a warning if the flag is
// set from the command line.
#define REMOVED_FLAG(flagname) \
DEFINE_string_hidden(flagname, "__UNSET__", "Removed"); \
DEFINE_validator(flagname, [](const char* name, const string& val) { \
if (val != "__UNSET__") LOG(WARNING) << "Ignoring removed flag " << name; \
return true; \