blob: 5e8430aa5fa9c719a8cf16723d622f1ca87bfa9b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.metastore.conf;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.ZooKeeperHiveHelper;
import org.apache.hadoop.hive.metastore.utils.StringUtils;
import org.apache.hadoop.security.alias.CredentialProviderFactory;
import org.apache.hive.common.util.SuppressFBWarnings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
/**
* A set of definitions of config values used by the Metastore. One of the key aims of this
* class is to provide backwards compatibility with existing Hive configuration keys while
* allowing the metastore to have its own, Hive independent keys. For this reason access to the
* underlying Configuration object should always be done via the static methods provided here
* rather than directly via {@link Configuration#get(String)} and
* {@link Configuration#set(String, String)}. All the methods of this class will handle checking
* both the MetastoreConf key and the Hive key. The algorithm is, on reads, to check first the
* MetastoreConf key, then the Hive key, then return the default if neither are set. On write
* the Metastore key only is set.
*
* This class does not extend Configuration. Rather it provides static methods for operating on
* a Configuration object. This allows it to work on HiveConf objects, which otherwise would not
* be the case.
*/
public class MetastoreConf {
private static final Logger LOG = LoggerFactory.getLogger(MetastoreConf.class);
private static final Pattern TIME_UNIT_SUFFIX = Pattern.compile("([0-9]+)([a-zA-Z]+)");
@VisibleForTesting
static final String DEFAULT_STORAGE_SCHEMA_READER_CLASS =
"org.apache.hadoop.hive.metastore.DefaultStorageSchemaReader";
@VisibleForTesting
static final String HIVE_ALTER_HANDLE_CLASS =
"org.apache.hadoop.hive.metastore.HiveAlterHandler";
@VisibleForTesting
static final String MATERIALZIATIONS_REBUILD_LOCK_CLEANER_TASK_CLASS =
"org.apache.hadoop.hive.metastore.MaterializationsRebuildLockCleanerTask";
@VisibleForTesting
static final String METASTORE_TASK_THREAD_CLASS =
"org.apache.hadoop.hive.metastore.MetastoreTaskThread";
@VisibleForTesting
static final String RUNTIME_STATS_CLEANER_TASK_CLASS =
"org.apache.hadoop.hive.metastore.RuntimeStatsCleanerTask";
static final String PARTITION_MANAGEMENT_TASK_CLASS =
"org.apache.hadoop.hive.metastore.PartitionManagementTask";
@VisibleForTesting
static final String EVENT_CLEANER_TASK_CLASS =
"org.apache.hadoop.hive.metastore.events.EventCleanerTask";
static final String ACID_METRICS_TASK_CLASS =
"org.apache.hadoop.hive.metastore.metrics.AcidMetricService";
@VisibleForTesting
static final String METASTORE_DELEGATION_MANAGER_CLASS =
"org.apache.hadoop.hive.metastore.security.MetastoreDelegationTokenManager";
@VisibleForTesting
static final String ACID_HOUSE_KEEPER_SERVICE_CLASS =
"org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService";
@VisibleForTesting
static final String ACID_TXN_CLEANER_SERVICE_CLASS =
"org.apache.hadoop.hive.metastore.txn.AcidTxnCleanerService";
@VisibleForTesting
static final String ACID_OPEN_TXNS_COUNTER_SERVICE_CLASS =
"org.apache.hadoop.hive.metastore.txn.AcidOpenTxnsCounterService";
public static final String METASTORE_AUTHENTICATION_LDAP_USERMEMBERSHIPKEY_NAME =
"metastore.authentication.ldap.userMembershipKey";
private static final Map<String, ConfVars> metaConfs = new HashMap<>();
private static volatile URL hiveSiteURL = null;
private static URL hiveDefaultURL = null;
private static URL hiveMetastoreSiteURL = null;
private static URL metastoreSiteURL = null;
private static AtomicBoolean beenDumped = new AtomicBoolean();
private static Map<String, ConfVars> keyToVars;
static {
keyToVars = new HashMap<>(ConfVars.values().length * 2);
for (ConfVars var : ConfVars.values()) {
keyToVars.put(var.varname, var);
keyToVars.put(var.hiveName, var);
}
}
@VisibleForTesting
static final String TEST_ENV_WORKAROUND = "metastore.testing.env.workaround.dont.ever.set.this.";
public static enum StatsUpdateMode {
NONE, EXISTING, ALL
}
private static class TimeValue {
final long val;
final TimeUnit unit;
private TimeValue(long val, TimeUnit unit) {
this.val = val;
this.unit = unit;
}
@Override
public String toString() {
switch (unit) {
case NANOSECONDS: return Long.toString(val) + "ns";
case MICROSECONDS: return Long.toString(val) + "us";
case MILLISECONDS: return Long.toString(val) + "ms";
case SECONDS: return Long.toString(val) + "s";
case MINUTES: return Long.toString(val) + "m";
case HOURS: return Long.toString(val) + "h";
case DAYS: return Long.toString(val) + "d";
}
throw new RuntimeException("Unknown time unit " + unit);
}
}
/**
* Metastore related options that the db is initialized against. When a conf
* var in this is list is changed, the metastore instance for the CLI will
* be recreated so that the change will take effect.
* TODO - I suspect the vast majority of these don't need to be here. But it requires testing
* before just pulling them out.
*/
@SuppressFBWarnings(value = "MS_MUTABLE_ARRAY")
public static final MetastoreConf.ConfVars[] metaVars = {
ConfVars.WAREHOUSE,
ConfVars.REPLDIR,
ConfVars.THRIFT_URIS,
ConfVars.SERVER_PORT,
ConfVars.THRIFT_BIND_HOST,
ConfVars.THRIFT_ZOOKEEPER_CLIENT_PORT,
ConfVars.THRIFT_ZOOKEEPER_NAMESPACE,
ConfVars.THRIFT_CONNECTION_RETRIES,
ConfVars.THRIFT_FAILURE_RETRIES,
ConfVars.CLIENT_CONNECT_RETRY_DELAY,
ConfVars.CLIENT_SOCKET_TIMEOUT,
ConfVars.CLIENT_SOCKET_LIFETIME,
ConfVars.PWD,
ConfVars.CONNECT_URL_HOOK,
ConfVars.CONNECT_URL_KEY,
ConfVars.SERVER_MIN_THREADS,
ConfVars.SERVER_MAX_THREADS,
ConfVars.TCP_KEEP_ALIVE,
ConfVars.KERBEROS_KEYTAB_FILE,
ConfVars.KERBEROS_PRINCIPAL,
ConfVars.USE_THRIFT_SASL,
ConfVars.METASTORE_CLIENT_AUTH_MODE,
ConfVars.METASTORE_CLIENT_PLAIN_USERNAME,
ConfVars.TOKEN_SIGNATURE,
ConfVars.CACHE_PINOBJTYPES,
ConfVars.CONNECTION_POOLING_TYPE,
ConfVars.VALIDATE_TABLES,
ConfVars.DATANUCLEUS_INIT_COL_INFO,
ConfVars.VALIDATE_COLUMNS,
ConfVars.VALIDATE_CONSTRAINTS,
ConfVars.STORE_MANAGER_TYPE,
ConfVars.AUTO_CREATE_ALL,
ConfVars.DATANUCLEUS_TRANSACTION_ISOLATION,
ConfVars.DATANUCLEUS_CACHE_LEVEL2,
ConfVars.DATANUCLEUS_CACHE_LEVEL2_TYPE,
ConfVars.IDENTIFIER_FACTORY,
ConfVars.DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK,
ConfVars.AUTHORIZATION_STORAGE_AUTH_CHECKS,
ConfVars.BATCH_RETRIEVE_MAX,
ConfVars.EVENT_LISTENERS,
ConfVars.TRANSACTIONAL_EVENT_LISTENERS,
ConfVars.EVENT_CLEAN_FREQ,
ConfVars.EVENT_EXPIRY_DURATION,
ConfVars.EVENT_MESSAGE_FACTORY,
ConfVars.FILTER_HOOK,
ConfVars.RAW_STORE_IMPL,
ConfVars.END_FUNCTION_LISTENERS,
ConfVars.PART_INHERIT_TBL_PROPS,
ConfVars.BATCH_RETRIEVE_OBJECTS_MAX,
ConfVars.INIT_HOOKS,
ConfVars.PRE_EVENT_LISTENERS,
ConfVars.HMS_HANDLER_ATTEMPTS,
ConfVars.HMS_HANDLER_INTERVAL,
ConfVars.HMS_HANDLER_FORCE_RELOAD_CONF,
ConfVars.PARTITION_NAME_WHITELIST_PATTERN,
ConfVars.ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS,
ConfVars.USERS_IN_ADMIN_ROLE,
ConfVars.HIVE_TXN_MANAGER,
ConfVars.TXN_TIMEOUT,
ConfVars.TXN_MAX_OPEN_BATCH,
ConfVars.TXN_RETRYABLE_SQLEX_REGEX,
ConfVars.STATS_NDV_TUNER,
ConfVars.STATS_NDV_DENSITY_FUNCTION,
ConfVars.AGGREGATE_STATS_CACHE_ENABLED,
ConfVars.AGGREGATE_STATS_CACHE_SIZE,
ConfVars.AGGREGATE_STATS_CACHE_MAX_PARTITIONS,
ConfVars.AGGREGATE_STATS_CACHE_FPP,
ConfVars.AGGREGATE_STATS_CACHE_MAX_VARIANCE,
ConfVars.AGGREGATE_STATS_CACHE_TTL,
ConfVars.AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT,
ConfVars.AGGREGATE_STATS_CACHE_MAX_READER_WAIT,
ConfVars.AGGREGATE_STATS_CACHE_MAX_FULL,
ConfVars.AGGREGATE_STATS_CACHE_CLEAN_UNTIL,
ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES,
ConfVars.FILE_METADATA_THREADS,
ConfVars.METASTORE_CLIENT_FILTER_ENABLED,
ConfVars.METASTORE_SERVER_FILTER_ENABLED
};
/**
* User configurable Metastore vars
*/
private static final MetastoreConf.ConfVars[] metaConfVars = {
ConfVars.TRY_DIRECT_SQL,
ConfVars.TRY_DIRECT_SQL_DDL,
ConfVars.CLIENT_SOCKET_TIMEOUT,
ConfVars.PARTITION_NAME_WHITELIST_PATTERN,
ConfVars.CAPABILITY_CHECK,
ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES,
ConfVars.EXPRESSION_PROXY_CLASS
};
static {
for (ConfVars confVar : metaConfVars) {
metaConfs.put(confVar.varname, confVar);
metaConfs.put(confVar.hiveName, confVar);
}
}
/**
* Variables that we should never print the value of for security reasons.
*/
private static final Set<String> unprintables = StringUtils.asSet(
ConfVars.PWD.varname,
ConfVars.PWD.hiveName,
ConfVars.SSL_KEYSTORE_PASSWORD.varname,
ConfVars.SSL_KEYSTORE_PASSWORD.hiveName,
ConfVars.SSL_TRUSTSTORE_PASSWORD.varname,
ConfVars.SSL_TRUSTSTORE_PASSWORD.hiveName,
ConfVars.DBACCESS_SSL_TRUSTSTORE_PASSWORD.varname,
ConfVars.DBACCESS_SSL_TRUSTSTORE_PASSWORD.hiveName,
ConfVars.THRIFT_ZOOKEEPER_SSL_KEYSTORE_PASSWORD.varname,
ConfVars.THRIFT_ZOOKEEPER_SSL_KEYSTORE_PASSWORD.hiveName,
ConfVars.THRIFT_ZOOKEEPER_SSL_TRUSTSTORE_PASSWORD.varname,
ConfVars.THRIFT_ZOOKEEPER_SSL_TRUSTSTORE_PASSWORD.hiveName
);
public static ConfVars getMetaConf(String name) {
return metaConfs.get(name);
}
public enum ConfVars {
// alpha order, PLEASE!
ACID_HOUSEKEEPER_SERVICE_INTERVAL("metastore.acid.housekeeper.interval",
"hive.metastore.acid.housekeeper.interval", 60, TimeUnit.SECONDS,
"Time interval describing how often the acid housekeeper runs."),
ACID_HOUSEKEEPER_SERVICE_START("metastore.acid.housekeeper.start",
"hive.metastore.acid.housekeeper.start", 60, TimeUnit.SECONDS,
"Time delay of 1st acid housekeeper run after metastore has started."),
ACID_TXN_CLEANER_INTERVAL("metastore.acid.txn.cleaner.interval",
"hive.metastore.acid.txn.cleaner.interval", 10, TimeUnit.SECONDS,
"Time interval describing how often aborted and committed txns are cleaned."),
ADDED_JARS("metastore.added.jars.path", "hive.added.jars.path", "",
"This an internal parameter."),
AGGREGATE_STATS_CACHE_CLEAN_UNTIL("metastore.aggregate.stats.cache.clean.until",
"hive.metastore.aggregate.stats.cache.clean.until", 0.8,
"The cleaner thread cleans until cache reaches this % full size."),
AGGREGATE_STATS_CACHE_ENABLED("metastore.aggregate.stats.cache.enabled",
"hive.metastore.aggregate.stats.cache.enabled", false,
"Whether aggregate stats caching is enabled or not."),
AGGREGATE_STATS_CACHE_FPP("metastore.aggregate.stats.cache.fpp",
"hive.metastore.aggregate.stats.cache.fpp", 0.01,
"Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."),
AGGREGATE_STATS_CACHE_MAX_FULL("metastore.aggregate.stats.cache.max.full",
"hive.metastore.aggregate.stats.cache.max.full", 0.9,
"Maximum cache full % after which the cache cleaner thread kicks in."),
AGGREGATE_STATS_CACHE_MAX_PARTITIONS("metastore.aggregate.stats.cache.max.partitions",
"hive.metastore.aggregate.stats.cache.max.partitions", 10000,
"Maximum number of partitions that are aggregated per cache node."),
AGGREGATE_STATS_CACHE_MAX_READER_WAIT("metastore.aggregate.stats.cache.max.reader.wait",
"hive.metastore.aggregate.stats.cache.max.reader.wait", 1000, TimeUnit.MILLISECONDS,
"Number of milliseconds a reader will wait to acquire the readlock before giving up."),
AGGREGATE_STATS_CACHE_MAX_VARIANCE("metastore.aggregate.stats.cache.max.variance",
"hive.metastore.aggregate.stats.cache.max.variance", 0.01,
"Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."),
AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("metastore.aggregate.stats.cache.max.writer.wait",
"hive.metastore.aggregate.stats.cache.max.writer.wait", 5000, TimeUnit.MILLISECONDS,
"Number of milliseconds a writer will wait to acquire the writelock before giving up."),
AGGREGATE_STATS_CACHE_SIZE("metastore.aggregate.stats.cache.size",
"hive.metastore.aggregate.stats.cache.size", 10000,
"Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."),
AGGREGATE_STATS_CACHE_TTL("metastore.aggregate.stats.cache.ttl",
"hive.metastore.aggregate.stats.cache.ttl", 600, TimeUnit.SECONDS,
"Number of seconds for a cached node to be active in the cache before they become stale."),
ALLOW_TENANT_BASED_STORAGE("metastore.warehouse.tenant.colocation", "hive.metastore.warehouse.tenant.colocation", false,
"Allows managed and external tables for a tenant to have a common parent directory\n" +
"For example: /user/warehouse/user1/managed and /user/warehouse/user1/external\n" +
"This allows users to be able to set quotas on user1 directory. These locations have to be defined on the\n" +
"database object explicitly when creating the DB or via alter database."),
ALTER_HANDLER("metastore.alter.handler", "hive.metastore.alter.impl",
HIVE_ALTER_HANDLE_CLASS,
"Alter handler. For now defaults to the Hive one. Really need a better default option"),
ASYNC_LOG_ENABLED("metastore.async.log.enabled", "hive.async.log.enabled", true,
"Whether to enable Log4j2's asynchronous logging. Asynchronous logging can give\n" +
" significant performance improvement as logging will be handled in separate thread\n" +
" that uses LMAX disruptor queue for buffering log messages.\n" +
" Refer https://logging.apache.org/log4j/2.x/manual/async.html for benefits and\n" +
" drawbacks."),
AUTHORIZATION_STORAGE_AUTH_CHECKS("metastore.authorization.storage.checks",
"hive.metastore.authorization.storage.checks", false,
"Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" +
"for operations like drop-partition (disallow the drop-partition if the user in\n" +
"question doesn't have permissions to delete the corresponding directory\n" +
"on the storage)."),
AUTO_CREATE_ALL("datanucleus.schema.autoCreateAll", "datanucleus.schema.autoCreateAll", false,
"Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once."
+ "To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not "
+ "recommended for production use cases, run schematool command instead." ),
BATCH_RETRIEVE_MAX("metastore.batch.retrieve.max", "hive.metastore.batch.retrieve.max", 300,
"Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" +
"The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" +
"but it may also cause higher memory requirement at the client side."),
BATCH_RETRIEVE_OBJECTS_MAX("metastore.batch.retrieve.table.partition.max",
"hive.metastore.batch.retrieve.table.partition.max", 1000,
"Maximum number of objects that metastore internally retrieves in one batch."),
CACHE_PINOBJTYPES("metastore.cache.pinobjtypes", "hive.metastore.cache.pinobjtypes",
"Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order",
"List of comma separated metastore object types that should be pinned in the cache"),
CACHED_RAW_STORE_IMPL("metastore.cached.rawstore.impl",
"hive.metastore.cached.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore",
"Name of the wrapped RawStore class"),
CACHED_RAW_STORE_CACHE_UPDATE_FREQUENCY("metastore.cached.rawstore.cache.update.frequency",
"hive.metastore.cached.rawstore.cache.update.frequency", 60, TimeUnit.SECONDS,
"The time after which metastore cache is updated from metastore DB."),
CACHED_RAW_STORE_CACHED_OBJECTS_WHITELIST("metastore.cached.rawstore.cached.object.whitelist",
"hive.metastore.cached.rawstore.cached.object.whitelist", ".*", "Comma separated list of regular expressions \n " +
"to select the tables (and its partitions, stats etc) that will be cached by CachedStore. \n" +
"This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.blacklist. \n" +
"Example: .*, db1.*, db2\\.tbl.*. The last item can potentially override patterns specified before."),
CACHED_RAW_STORE_CACHED_OBJECTS_BLACKLIST("metastore.cached.rawstore.cached.object.blacklist",
"hive.metastore.cached.rawstore.cached.object.blacklist", "", "Comma separated list of regular expressions \n " +
"to filter out the tables (and its partitions, stats etc) that will be cached by CachedStore. \n" +
"This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.whitelist. \n" +
"Example: db2.*, db3\\.tbl1, db3\\..*. The last item can potentially override patterns specified before. \n" +
"The blacklist also overrides the whitelist."),
CACHED_RAW_STORE_MAX_CACHE_MEMORY("metastore.cached.rawstore.max.cache.memory",
"hive.metastore.cached.rawstore.max.cache.memory", "1Gb", new SizeValidator(),
"The maximum memory in bytes that the cached objects can use. "
+ "Memory used is calculated based on estimated size of tables and partitions in the cache. "
+ "Setting it to a negative value disables memory estimation."),
CAPABILITY_CHECK("metastore.client.capability.check",
"hive.metastore.client.capability.check", true,
"Whether to check client capabilities for potentially breaking API usage."),
CATALOG_DEFAULT("metastore.catalog.default", "metastore.catalog.default", "hive",
"The default catalog to use when a catalog is not specified. Default is 'hive' (the " +
"default catalog)."),
CATALOGS_TO_CACHE("metastore.cached.rawstore.catalogs", "metastore.cached.rawstore.catalogs",
"hive", "Comma separated list of catalogs to cache in the CachedStore. Default is 'hive' " +
"(the default catalog). Empty string means all catalogs will be cached."),
CLIENT_CONNECT_RETRY_DELAY("metastore.client.connect.retry.delay",
"hive.metastore.client.connect.retry.delay", 1, TimeUnit.SECONDS,
"Number of seconds for the client to wait between consecutive connection attempts"),
CLIENT_KERBEROS_PRINCIPAL("metastore.client.kerberos.principal",
"hive.metastore.client.kerberos.principal",
"", // E.g. "hive-metastore/_HOST@EXAMPLE.COM".
"The Kerberos principal associated with the HA cluster of hcat_servers."),
CLIENT_SOCKET_LIFETIME("metastore.client.socket.lifetime",
"hive.metastore.client.socket.lifetime", 0, TimeUnit.SECONDS,
"MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" +
"reconnects on the next MetaStore operation. A value of 0s means the connection\n" +
"has an infinite lifetime."),
CLIENT_SOCKET_TIMEOUT("metastore.client.socket.timeout", "hive.metastore.client.socket.timeout", 600,
TimeUnit.SECONDS, "MetaStore Client socket timeout in seconds"),
COMPACTOR_HISTORY_RETENTION_DID_NOT_INITIATE("metastore.compactor.history.retention.did.not.initiate",
"hive.compactor.history.retention.did.not.initiate", 2,
new RangeValidator(0, 100), "Determines how many compaction records in state " +
"'did not initiate' will be retained in compaction history for a given table/partition.",
// deprecated keys:
"metastore.compactor.history.retention.attempted", "hive.compactor.history.retention.attempted"),
COMPACTOR_HISTORY_RETENTION_FAILED("metastore.compactor.history.retention.failed",
"hive.compactor.history.retention.failed", 3,
new RangeValidator(0, 100), "Determines how many failed compaction records will be " +
"retained in compaction history for a given table/partition."),
COMPACTOR_HISTORY_RETENTION_SUCCEEDED("metastore.compactor.history.retention.succeeded",
"hive.compactor.history.retention.succeeded", 3,
new RangeValidator(0, 100), "Determines how many successful compaction records will be " +
"retained in compaction history for a given table/partition."),
COMPACTOR_HISTORY_RETENTION_TIMEOUT("metastore.compactor.history.retention.timeout",
"hive.compactor.history.retention.timeout", 7, TimeUnit.DAYS,
"Determines how long failed and not initiated compaction records will be " +
"retained in compaction history if there is a more recent succeeded compaction on the table/partition."),
COMPACTOR_INITIATOR_FAILED_THRESHOLD("metastore.compactor.initiator.failed.compacts.threshold",
"hive.compactor.initiator.failed.compacts.threshold", 2,
new RangeValidator(1, 20), "Number of consecutive compaction failures (per table/partition) " +
"after which automatic compactions will not be scheduled any more. Note that this must be less " +
"than hive.compactor.history.retention.failed."),
COMPACTOR_INITIATOR_FAILED_RETRY_TIME("metastore.compactor.initiator.failed.retry.time",
"hive.compactor.initiator.failed.retry.time", 7, TimeUnit.DAYS,
"Time after Initiator will ignore metastore.compactor.initiator.failed.compacts.threshold "
+ "and retry with compaction again. This will try to auto heal tables with previous failed compaction "
+ "without manual intervention. Setting it to 0 or negative value will disable this feature."),
COMPACTOR_RUN_AS_USER("metastore.compactor.run.as.user", "hive.compactor.run.as.user", "",
"Specify the user to run compactor Initiator and Worker as. If empty string, defaults to table/partition " +
"directory owner."),
METASTORE_HOUSEKEEPING_LEADER_HOSTNAME("metastore.housekeeping.leader.hostname",
"hive.metastore.housekeeping.leader.hostname", "",
"If there are multiple Thrift metastore services running, the hostname of Thrift metastore " +
"service to run housekeeping tasks at. By default this values is empty, which " +
"means that the current metastore will run the housekeeping tasks. If configuration" +
"metastore.thrift.bind.host is set on the intended leader metastore, this value should " +
"match that configuration. Otherwise it should be same as the hostname returned by " +
"InetAddress#getLocalHost#getHostName(). Given the uncertainty in the later " +
"it is desirable to configure metastore.thrift.bind.host on the intended leader HMS."),
METASTORE_HOUSEKEEPING_THREADS_ON("metastore.housekeeping.threads.on",
"hive.metastore.housekeeping.threads.on", false,
"Whether to run the tasks under metastore.task.threads.remote on this metastore instance or not.\n" +
"Set this to true on one instance of the Thrift metastore service as part of turning\n" +
"on Hive transactions. For a complete list of parameters required for turning on\n" +
"transactions, see hive.txn.manager."),
METASTORE_ACIDMETRICS_THREAD_ON("metastore.acidmetrics.thread.on",
"hive.metastore.acidmetrics.thread.on", true,
"Whether to run acid related metrics collection on this metastore instance."),
METASTORE_ACIDMETRICS_CHECK_INTERVAL("metastore.acidmetrics.check.interval",
"hive.metastore.acidmetrics.check.interval", 300,
TimeUnit.SECONDS,
"Time in seconds between acid related metric collection runs."),
COMPACTOR_INITIATOR_ON("metastore.compactor.initiator.on", "hive.compactor.initiator.on", false,
"Whether to run the initiator and cleaner threads on this metastore instance or not.\n" +
"Set this to true on one instance of the Thrift metastore service as part of turning\n" +
"on Hive transactions. For a complete list of parameters required for turning on\n" +
"transactions, see hive.txn.manager."),
COMPACTOR_WORKER_THREADS("metastore.compactor.worker.threads",
"hive.compactor.worker.threads", 0,
"How many compactor worker threads to run on this metastore instance. Set this to a\n" +
"positive number on one or more instances of the Thrift metastore service as part of\n" +
"turning on Hive transactions. For a complete list of parameters required for turning\n" +
"on transactions, see hive.txn.manager.\n" +
"Worker threads spawn MapReduce jobs to do compactions. They do not do the compactions\n" +
"themselves. Increasing the number of worker threads will decrease the time it takes\n" +
"tables or partitions to be compacted once they are determined to need compaction.\n" +
"It will also increase the background load on the Hadoop cluster as more MapReduce jobs\n" +
"will be running in the background."),
COMPACTOR_MINOR_STATS_COMPRESSION(
"metastore.compactor.enable.stats.compression",
"metastore.compactor.enable.stats.compression", true,
"Can be used to disable compression and ORC indexes for files produced by minor compaction."),
CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName",
"javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver",
"Driver class name for a JDBC metastore"),
CONNECTION_POOLING_MAX_CONNECTIONS("datanucleus.connectionPool.maxPoolSize",
"datanucleus.connectionPool.maxPoolSize", 10,
"Specify the maximum number of connections in the connection pool. Note: The configured size will be used by\n" +
"2 connection pools (TxnHandler and ObjectStore). When configuring the max connection pool size, it is\n" +
"recommended to take into account the number of metastore instances and the number of HiveServer2 instances\n" +
"configured with embedded metastore. To get optimal performance, set config to meet the following condition\n"+
"(2 * pool_size * metastore_instances + 2 * pool_size * HS2_instances_with_embedded_metastore) = \n" +
"(2 * physical_core_count + hard_disk_count)."),
CONNECT_URL_HOOK("metastore.ds.connection.url.hook",
"hive.metastore.ds.connection.url.hook", "",
"Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"),
CONNECT_URL_KEY("javax.jdo.option.ConnectionURL",
"javax.jdo.option.ConnectionURL",
"jdbc:derby:;databaseName=metastore_db;create=true",
"JDBC connect string for a JDBC metastore.\n" +
"To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.\n" +
"For example, jdbc:postgresql://myhost/db?ssl=true for postgres database."),
CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType",
"datanucleus.connectionPoolingType", "HikariCP", new StringSetValidator("DBCP",
"HikariCP", "NONE"),
"Specify connection pool library for datanucleus"),
CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName",
"javax.jdo.option.ConnectionUserName", "APP",
"Username to use against metastore database"),
CREATE_TABLES_AS_ACID("metastore.create.as.acid", "hive.create.as.acid", false,
"Whether the eligible tables should be created as full ACID by default. Does \n" +
"not apply to external tables, the ones using storage handlers, etc."),
COUNT_OPEN_TXNS_INTERVAL("metastore.count.open.txns.interval", "hive.count.open.txns.interval",
1, TimeUnit.SECONDS, "Time in seconds between checks to count open transactions."),
DATANUCLEUS_AUTOSTART("datanucleus.autoStartMechanismMode",
"datanucleus.autoStartMechanismMode", "ignored", new StringSetValidator("ignored"),
"Autostart mechanism for datanucleus. Currently ignored is the only option supported."),
DATANUCLEUS_CACHE_LEVEL2("datanucleus.cache.level2", "datanucleus.cache.level2", false,
"Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"),
DATANUCLEUS_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type",
"datanucleus.cache.level2.type", "none", ""),
DATANUCLEUS_INIT_COL_INFO("datanucleus.rdbms.initializeColumnInfo",
"datanucleus.rdbms.initializeColumnInfo", "NONE",
"initializeColumnInfo setting for DataNucleus; set to NONE at least on Postgres."),
DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck",
"datanucleus.plugin.pluginRegistryBundleCheck", "LOG",
"Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"),
DATANUCLEUS_TRANSACTION_ISOLATION("datanucleus.transactionIsolation",
"datanucleus.transactionIsolation", "read-committed",
"Default transaction isolation level for identity generation."),
DATANUCLEUS_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy",
"datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""),
// Parameters for configuring SSL encryption to the database store
// If DBACCESS_USE_SSL is false, then all other DBACCESS_SSL_* properties will be ignored
DBACCESS_SSL_TRUSTSTORE_PASSWORD("metastore.dbaccess.ssl.truststore.password", "hive.metastore.dbaccess.ssl.truststore.password", "",
"Password for the Java truststore file that is used when encrypting the connection to the database store. \n"
+ "metastore.dbaccess.ssl.use.SSL must be set to true for this property to take effect. \n"
+ "This directly maps to the javax.net.ssl.trustStorePassword Java system property. Defaults to jssecacerts, if it exists, otherwise uses cacerts. \n"
+ "It is recommended to specify the password using a credential provider so as to not expose it to discovery by other users. \n"
+ "One way to do this is by using the Hadoop CredentialProvider API and provisioning credentials for this property. Refer to the Hadoop CredentialProvider API Guide for more details."),
DBACCESS_SSL_TRUSTSTORE_PATH("metastore.dbaccess.ssl.truststore.path", "hive.metastore.dbaccess.ssl.truststore.path", "",
"Location on disk of the Java truststore file to use when encrypting the connection to the database store. \n"
+ "This file consists of a collection of certificates trusted by the metastore server. \n"
+ "metastore.dbaccess.ssl.use.SSL must be set to true for this property to take effect. \n"
+ "This directly maps to the javax.net.ssl.trustStore Java system property. Defaults to the default Java truststore file. \n"),
DBACCESS_SSL_TRUSTSTORE_TYPE("metastore.dbaccess.ssl.truststore.type", "hive.metastore.dbaccess.ssl.truststore.type", "jks",
new StringSetValidator("jceks", "jks", "dks", "pkcs11", "pkcs12", "bcfks"),
"File type for the Java truststore file that is used when encrypting the connection to the database store. \n"
+ "metastore.dbaccess.ssl.use.SSL must be set to true for this property to take effect. \n"
+ "This directly maps to the javax.net.ssl.trustStoreType Java system property. \n"
+ "Types jceks, jks, dks, pkcs11, and pkcs12 can be read from Java 8 and beyond. Defaults to jks."),
DBACCESS_USE_SSL("metastore.dbaccess.ssl.use.SSL", "hive.metastore.dbaccess.ssl.use.SSL", false,
"Set this to true to use SSL encryption to the database store."),
DEFAULTPARTITIONNAME("metastore.default.partition.name",
"hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__",
"The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" +
"This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" +
"The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."),
DELEGATION_KEY_UPDATE_INTERVAL("metastore.cluster.delegation.key.update-interval",
"hive.cluster.delegation.key.update-interval", 1, TimeUnit.DAYS, ""),
DELEGATION_TOKEN_GC_INTERVAL("metastore.cluster.delegation.token.gc-interval",
"hive.cluster.delegation.token.gc-interval", 15, TimeUnit.MINUTES, ""),
DELEGATION_TOKEN_MAX_LIFETIME("metastore.cluster.delegation.token.max-lifetime",
"hive.cluster.delegation.token.max-lifetime", 7, TimeUnit.DAYS, ""),
DELEGATION_TOKEN_RENEW_INTERVAL("metastore.cluster.delegation.token.renew-interval",
"hive.cluster.delegation.token.renew-interval", 1, TimeUnit.DAYS, ""),
DELEGATION_TOKEN_STORE_CLS("metastore.cluster.delegation.token.store.class",
"hive.cluster.delegation.token.store.class", METASTORE_DELEGATION_MANAGER_CLASS,
"Class to store delegation tokens"),
DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit",
"javax.jdo.option.DetachAllOnCommit", true,
"Detaches all objects from session so that they can be used after transaction is committed"),
DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE("metastore.direct.sql.max.elements.in.clause",
"hive.direct.sql.max.elements.in.clause", 1000,
"The maximum number of values in a IN clause. Once exceeded, it will be broken into\n" +
" multiple OR separated IN clauses."),
DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause",
"hive.direct.sql.max.elements.values.clause",
1000, "The maximum number of values in a VALUES clause for INSERT statement."),
DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length",
"hive.direct.sql.max.query.length", 100, "The maximum\n" +
" size of a query string (in KB)."),
DIRECT_SQL_PARTITION_BATCH_SIZE("metastore.direct.sql.batch.size",
"hive.metastore.direct.sql.batch.size", 0,
"Batch size for partition and other object retrieval from the underlying DB in direct\n" +
"SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" +
"that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" +
"may impede performance. -1 means no batching, 0 means automatic batching."),
DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES("metastore.disallow.incompatible.col.type.changes",
"hive.metastore.disallow.incompatible.col.type.changes", true,
"If true, ALTER TABLE operations which change the type of a\n" +
"column (say STRING) to an incompatible type (say MAP) are disallowed.\n" +
"RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" +
"datatypes can be converted from string to any type. The map is also serialized as\n" +
"a string, which can be read as a string as well. However, with any binary\n" +
"serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" +
"when subsequently trying to access old partitions.\n" +
"\n" +
"Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" +
"not blocked.\n" +
"\n" +
"See HIVE-4409 for more details."),
DUMP_CONFIG_ON_CREATION("metastore.dump.config.on.creation", "metastore.dump.config.on.creation", true,
"If true, a printout of the config file (minus sensitive values) will be dumped to the " +
"log whenever newMetastoreConf() is called. Can produce a lot of logs"),
END_FUNCTION_LISTENERS("metastore.end.function.listeners",
"hive.metastore.end.function.listeners", "",
"List of comma separated listeners for the end of metastore functions."),
EVENT_CLEAN_FREQ("metastore.event.clean.freq", "hive.metastore.event.clean.freq", 0,
TimeUnit.SECONDS, "Frequency at which timer task runs to purge expired events in metastore."),
EVENT_EXPIRY_DURATION("metastore.event.expiry.duration", "hive.metastore.event.expiry.duration",
0, TimeUnit.SECONDS, "Duration after which events expire from events table"),
EVENT_LISTENERS("metastore.event.listeners", "hive.metastore.event.listeners", "",
"A comma separated list of Java classes that implement the org.apache.riven.MetaStoreEventListener" +
" interface. The metastore event and corresponding listener method will be invoked in separate JDO transactions. " +
"Alternatively, configure hive.metastore.transactional.event.listeners to ensure both are invoked in same JDO transaction."),
EVENT_MESSAGE_FACTORY("metastore.event.message.factory",
"hive.metastore.event.message.factory",
"org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder",
"Factory class for making encoding and decoding messages in the events generated."),
EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS("metastore.notification.parameters.exclude.patterns",
"hive.metastore.notification.parameters.exclude.patterns", "",
"List of comma-separated regexes that are used to reduced the size of HMS Notification messages."
+ " The regexes are matched against each key of parameters map in Table or Partition object"
+ "present in HMS Notification. Any key-value pair whose key is matched with any regex will"
+" be removed from Parameters map during Serialization of Table/Partition object."),
EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive",
"hive.metastore.event.db.listener.timetolive", 1, TimeUnit.DAYS,
"time after which events will be removed from the database listener queue when repl.cm.enabled \n" +
"is set to false. When set to true, the conf repl.event.db.listener.timetolive is used instead."),
EVENT_CLEAN_MAX_EVENTS("metastore.event.db.clean.maxevents",
"hive.metastore.event.db.clean.maxevents", 10000,
"Limit on number events to be cleaned at a time in metastore cleanNotificationEvents " +
"call, to avoid OOM. The configuration is not effective when set to zero or " +
"a negative value."),
EVENT_DB_LISTENER_CLEAN_INTERVAL("metastore.event.db.listener.clean.interval",
"hive.metastore.event.db.listener.clean.interval", 7200, TimeUnit.SECONDS,
"sleep interval between each run for cleanup of events from the database listener queue"),
EVENT_DB_NOTIFICATION_API_AUTH("metastore.metastore.event.db.notification.api.auth",
"hive.metastore.event.db.notification.api.auth", true,
"Should metastore do authorization against database notification related APIs such as get_next_notification.\n" +
"If set to true, then only the superusers in proxy settings have the permission"),
EXECUTE_SET_UGI("metastore.execute.setugi", "hive.metastore.execute.setugi", true,
"In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" +
"the client's reported user and group permissions. Note that this property must be set on \n" +
"both the client and server sides. Further note that its best effort. \n" +
"If client sets its to true and server sets it to false, client setting will be ignored."),
EXPRESSION_PROXY_CLASS("metastore.expression.proxy", "hive.metastore.expression.proxy",
"org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore",
"Class to use to process expressions in partition pruning."),
DECODE_FILTER_EXPRESSION_TO_STRING("metastore.decode.filter.expression.tostring",
"hive.metastore.decode.filter.expression.tostring", false,
"If set to true convertExprToFilter method of PartitionExpressionForMetastore will decode \n" +
"byte array into string rather than ExprNode. This is specially required for \n" +
"msck command when used with filter conditions"),
FILE_METADATA_THREADS("metastore.file.metadata.threads",
"hive.metastore.hbase.file.metadata.threads", 1,
"Number of threads to use to read file metadata in background to cache it."),
FILTER_HOOK("metastore.filter.hook", "hive.metastore.filter.hook",
org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl.class.getName(),
"Metastore hook class for filtering the metadata read results. If hive.security.authorization.manager"
+ "is set to instance of HiveAuthorizerFactory, then this value is ignored."),
FS_HANDLER_CLS("metastore.fs.handler.class", "hive.metastore.fs.handler.class",
"org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""),
FS_HANDLER_THREADS_COUNT("metastore.fshandler.threads", "hive.metastore.fshandler.threads", 15,
"Number of threads to be allocated for metastore handler for fs operations."),
HMS_HANDLER_ATTEMPTS("metastore.hmshandler.retry.attempts", "hive.hmshandler.retry.attempts", 10,
"The number of times to retry a HMSHandler call if there were a connection error."),
HMS_HANDLER_FORCE_RELOAD_CONF("metastore.hmshandler.force.reload.conf",
"hive.hmshandler.force.reload.conf", false,
"Whether to force reloading of the HMSHandler configuration (including\n" +
"the connection URL, before the next metastore query that accesses the\n" +
"datastore. Once reloaded, this value is reset to false. Used for\n" +
"testing only."),
HMS_HANDLER_INTERVAL("metastore.hmshandler.retry.interval", "hive.hmshandler.retry.interval",
2000, TimeUnit.MILLISECONDS, "The time between HMSHandler retry attempts on failure."),
IDENTIFIER_FACTORY("datanucleus.identifierFactory",
"datanucleus.identifierFactory", "datanucleus1",
"Name of the identifier factory to use when generating table/column names etc. \n" +
"'datanucleus1' is used for backward compatibility with DataNucleus v1"),
INIT_HOOKS("metastore.init.hooks", "hive.metastore.init.hooks", "",
"A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" +
"An init hook is specified as the name of Java class which extends org.apache.riven.MetaStoreInitListener."),
INIT_METADATA_COUNT_ENABLED("metastore.initial.metadata.count.enabled",
"hive.metastore.initial.metadata.count.enabled", true,
"Enable a metadata count at metastore startup for metrics."),
INTEGER_JDO_PUSHDOWN("metastore.integral.jdo.pushdown",
"hive.metastore.integral.jdo.pushdown", false,
"Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" +
"improves metastore perf for integral columns, especially if there's a large number of partitions.\n" +
"However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" +
"leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" +
"is also irrelevant."),
// Once exceeded, the queries should be broken into separate batches.
// Note: This value is not passed into the JDBC driver, therefore this batch size limit is not automatically enforced.
// Batch construction/splits should be done manually in code using this config value.
JDBC_MAX_BATCH_SIZE("metastore.jdbc.max.batch.size", "hive.metastore.jdbc.max.batch.size",
1000, new RangeValidator(1, null),
"Maximum number of update/delete/insert queries in a single JDBC batch statement (including Statement/PreparedStatement)."),
KERBEROS_KEYTAB_FILE("metastore.kerberos.keytab.file",
"hive.metastore.kerberos.keytab.file", "",
"The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."),
KERBEROS_PRINCIPAL("metastore.kerberos.principal", "hive.metastore.kerberos.principal",
"hive-metastore/_HOST@EXAMPLE.COM",
"The service principal for the metastore Thrift server. \n" +
"The special string _HOST will be replaced automatically with the correct host name."),
THRIFT_METASTORE_AUTHENTICATION("metastore.authentication", "hive.metastore.authentication",
"NOSASL",
new StringSetValidator("NOSASL", "NONE", "LDAP", "KERBEROS", "CUSTOM"),
"Client authentication types.\n" +
" NONE: no authentication check\n" +
" LDAP: LDAP/AD based authentication\n" +
" KERBEROS: Kerberos/GSSAPI authentication\n" +
" CUSTOM: Custom authentication provider\n" +
" (Use with property metastore.custom.authentication.class)\n" +
" CONFIG: username and password is specified in the config" +
" NOSASL: Raw transport"),
METASTORE_CUSTOM_AUTHENTICATION_CLASS("metastore.custom.authentication.class",
"hive.metastore.custom.authentication.class",
"",
"Custom authentication class. Used when property\n" +
"'metastore.authentication' is set to 'CUSTOM'. Provided class\n" +
"must be a proper implementation of the interface\n" +
"org.apache.hadoop.hive.metastore.MetaStorePasswdAuthenticationProvider. MetaStore\n" +
"will call its Authenticate(user, passed) method to authenticate requests.\n" +
"The implementation may optionally implement Hadoop's\n" +
"org.apache.hadoop.conf.Configurable class to grab MetaStore's Configuration object."),
METASTORE_PLAIN_LDAP_URL("metastore.authentication.ldap.url",
"hive.metastore.authentication.ldap.url", "",
"LDAP connection URL(s),\n" +
"this value could contain URLs to multiple LDAP servers instances for HA,\n" +
"each LDAP URL is separated by a SPACE character. URLs are used in the \n" +
" order specified until a connection is successful."),
METASTORE_PLAIN_LDAP_BASEDN("metastore.authentication.ldap.baseDN",
"hive.metastore.authentication.ldap.baseDN", "", "LDAP base DN"),
METASTORE_PLAIN_LDAP_DOMAIN("metastore.authentication.ldap.Domain",
"hive.metastore.authentication.ldap.Domain", "", ""),
METASTORE_PLAIN_LDAP_GROUPDNPATTERN("metastore.authentication.ldap.groupDNPattern",
"hive.metastore.authentication.ldap.groupDNPattern", "",
"COLON-separated list of patterns to use to find DNs for group entities in this directory.\n" +
"Use %s where the actual group name is to be substituted for.\n" +
"For example: CN=%s,CN=Groups,DC=subdomain,DC=domain,DC=com."),
METASTORE_PLAIN_LDAP_GROUPFILTER("metastore.authentication.ldap.groupFilter",
"hive.metastore.authentication.ldap.groupFilter", "",
"COMMA-separated list of LDAP Group names (short name not full DNs).\n" +
"For example: HiveAdmins,HadoopAdmins,Administrators"),
METASTORE_PLAIN_LDAP_USERDNPATTERN("metastore.authentication.ldap.userDNPattern",
"hive.metastore.authentication.ldap.userDNPattern", "",
"COLON-separated list of patterns to use to find DNs for users in this directory.\n" +
"Use %s where the actual group name is to be substituted for.\n" +
"For example: CN=%s,CN=Users,DC=subdomain,DC=domain,DC=com."),
METASTORE_PLAIN_LDAP_USERFILTER("metastore.authentication.ldap.userFilter",
"hive.metastore.authentication.ldap.userFilter", "",
"COMMA-separated list of LDAP usernames (just short names, not full DNs).\n" +
"For example: hiveuser,impalauser,hiveadmin,hadoopadmin"),
METASTORE_PLAIN_LDAP_GUIDKEY("metastore.authentication.ldap.guidKey",
"hive.metastore.authentication.ldap.guidKey", "uid",
"LDAP attribute name whose values are unique in this LDAP server.\n" +
"For example: uid or CN."),
METASTORE_PLAIN_LDAP_GROUPMEMBERSHIP_KEY("metastore.authentication.ldap.groupMembershipKey",
"hive.metastore.authentication.ldap.groupMembershipKey",
"member",
"LDAP attribute name on the group object that contains the list of distinguished names\n" +
"for the user, group, and contact objects that are members of the group.\n" +
"For example: member, uniqueMember or memberUid"),
METASTORE_PLAIN_LDAP_USERMEMBERSHIP_KEY(METASTORE_AUTHENTICATION_LDAP_USERMEMBERSHIPKEY_NAME,
"hive." + METASTORE_AUTHENTICATION_LDAP_USERMEMBERSHIPKEY_NAME,
"",
"LDAP attribute name on the user object that contains groups of which the user is\n" +
"a direct member, except for the primary group, which is represented by the\n" +
"primaryGroupId.\n" +
"For example: memberOf"),
METASTORE_PLAIN_LDAP_GROUPCLASS_KEY("metastore.authentication.ldap.groupClassKey",
"hive.metastore.authentication.ldap.groupClassKey",
"groupOfNames",
"LDAP attribute name on the group entry that is to be used in LDAP group searches.\n" +
"For example: group, groupOfNames or groupOfUniqueNames."),
METASTORE_PLAIN_LDAP_CUSTOMLDAPQUERY("metastore.authentication.ldap.customLDAPQuery",
"hive.metastore.authentication.ldap.customLDAPQuery", "",
"A full LDAP query that LDAP Atn provider uses to execute against LDAP Server.\n" +
"If this query returns a null resultset, the LDAP Provider fails the Authentication\n" +
"request, succeeds if the user is part of the resultset." +
"For example: (&(objectClass=group)(objectClass=top)(instanceType=4)(cn=Domain*)) \n" +
"(&(objectClass=person)(|(sAMAccountName=admin)(|(memberOf=CN=Domain Admins,CN=Users,DC=domain,DC=com)" +
"(memberOf=CN=Administrators,CN=Builtin,DC=domain,DC=com))))"),
METASTORE_PLAIN_LDAP_BIND_USER("metastore.authentication.ldap.binddn",
"hive.metastore.authentication.ldap.binddn", "",
"The user with which to bind to the LDAP server, and search for the full domain name " +
"of the user being authenticated.\n" +
"This should be the full domain name of the user, and should have search access across all " +
"users in the LDAP tree.\n" +
"If not specified, then the user being authenticated will be used as the bind user.\n" +
"For example: CN=bindUser,CN=Users,DC=subdomain,DC=domain,DC=com"),
METASTORE_PLAIN_LDAP_BIND_PASSWORD("metastore.authentication.ldap.bindpw",
"hive.metastore.authentication.ldap.bindpw", "",
"The password for the bind user, to be used to search for the full name of the user being authenticated.\n" +
"If the username is specified, this parameter must also be specified."),
LIMIT_PARTITION_REQUEST("metastore.limit.partition.request",
"hive.metastore.limit.partition.request", -1,
"This limits the number of partitions (whole partition objects) that can be requested " +
"from the metastore for a give table. MetaStore API methods using this are: \n" +
"get_partitions, \n" +
"get_partitions_with_auth, \n" +
"get_partitions_by_filter, \n" +
"get_partitions_spec_by_filter, \n" +
"get_partitions_by_expr.\n" +
"The default value \"-1\" means no limit."),
MSC_CACHE_ENABLED("metastore.client.cache.v2.enabled",
"hive.metastore.client.cache.v2.enabled", true,
"This property enables a Caffeine Cache for Metastore client"),
MSC_CACHE_MAX_SIZE("metastore.client.cache.v2.maxSize",
"hive.metastore.client.cache.v2.maxSize", "1Gb", new SizeValidator(),
"Set the maximum size (number of bytes) of the metastore client cache (DEFAULT: 1GB). " +
"Only in effect when the cache is enabled"),
MSC_CACHE_RECORD_STATS("metastore.client.cache.v2.recordStats",
"hive.metastore.client.cache.v2.recordStats", false,
"This property enables recording metastore client cache stats in DEBUG logs"),
LOG4J_FILE("metastore.log4j.file", "hive.log4j.file", "",
"Hive log4j configuration file.\n" +
"If the property is not set, then logging will be initialized using metastore-log4j2.properties found on the classpath.\n" +
"If the property is set, the value must be a valid URI (java.net.URI, e.g. \"file:///tmp/my-logging.xml\"), \n" +
"which you can then extract a URL from and pass to PropertyConfigurator.configure(URL)."),
MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass",
"javax.jdo.PersistenceManagerFactoryClass",
"org.datanucleus.api.jdo.JDOPersistenceManagerFactory",
"class implementing the jdo persistence"),
MATERIALIZATIONS_INVALIDATION_CACHE_IMPL("metastore.materializations.invalidation.impl",
"hive.metastore.materializations.invalidation.impl", "DEFAULT",
new StringSetValidator("DEFAULT", "DISABLE"),
"The implementation that we should use for the materializations invalidation cache. \n" +
" DEFAULT: Default implementation for invalidation cache\n" +
" DISABLE: Disable invalidation cache (debugging purposes)"),
MATERIALIZATIONS_INVALIDATION_CACHE_CLEAN_FREQUENCY("metastore.materializations.invalidation.clean.frequency",
"hive.metastore.materializations.invalidation.clean.frequency",
3600, TimeUnit.SECONDS, "Frequency at which timer task runs to remove unnecessary transaction entries from" +
"materializations invalidation cache."),
MATERIALIZATIONS_INVALIDATION_CACHE_EXPIRY_DURATION("metastore.materializations.invalidation.max.duration",
"hive.metastore.materializations.invalidation.max.duration",
86400, TimeUnit.SECONDS, "Maximum duration for query producing a materialization. After this time, transaction" +
"entries that are not relevant for materializations can be removed from invalidation cache."),
RUNTIME_STATS_CLEAN_FREQUENCY("metastore.runtime.stats.clean.frequency", "hive.metastore.runtime.stats.clean.frequency", 3600,
TimeUnit.SECONDS, "Frequency at which timer task runs to remove outdated runtime stat entries."),
RUNTIME_STATS_MAX_AGE("metastore.runtime.stats.max.age", "hive.metastore.runtime.stats.max.age", 86400 * 3, TimeUnit.SECONDS,
"Stat entries which are older than this are removed."),
SCHEDULED_QUERIES_ENABLED("metastore.scheduled.queries.enabled", "hive.metastore.scheduled.queries.enabled", true,
"Wheter scheduled query metastore requests be processed"),
SCHEDULED_QUERIES_EXECUTION_PROGRESS_TIMEOUT("metastore.scheduled.queries.execution.timeout",
"hive.metastore.scheduled.queries.progress.timeout", 120, TimeUnit.SECONDS,
"If a scheduled query is not making progress for this amount of time it will be considered TIMED_OUT"),
SCHEDULED_QUERIES_EXECUTION_MAINT_TASK_FREQUENCY("metastore.scheduled.queries.execution.maint.task.frequency",
"hive.metastore.scheduled.queries.execution.clean.frequency", 60, TimeUnit.SECONDS,
"Interval of scheduled query maintenance task. Which removes executions above max age;"
+ "and marks executions as timed out if the condition is met"),
SCHEDULED_QUERIES_EXECUTION_MAX_AGE("metastore.scheduled.queries.execution.max.age",
"hive.metastore.scheduled.queries.execution.max.age", 30 * 86400, TimeUnit.SECONDS,
"Maximal age of a scheduled query execution entry before it is removed."),
// Parameters for exporting metadata on table drop (requires the use of the)
// org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener
METADATA_EXPORT_LOCATION("metastore.metadata.export.location", "hive.metadata.export.location",
"",
"When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
"it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" +
"metadata being exported to the current user's home directory on HDFS."),
METASTORE_MAX_EVENT_RESPONSE("metastore.max.event.response", "hive.metastore.max.event.response", 1000000,
"The parameter will decide the maximum number of events that HMS will respond."),
METASTORE_CLIENT_FILTER_ENABLED("metastore.client.filter.enabled", "hive.metastore.client.filter.enabled", true,
"Enable filtering the metadata read results at HMS client. Default is true."),
METASTORE_SERVER_FILTER_ENABLED("metastore.server.filter.enabled", "hive.metastore.server.filter.enabled", false,
"Enable filtering the metadata read results at HMS server. Default is false."),
MOVE_EXPORTED_METADATA_TO_TRASH("metastore.metadata.move.exported.metadata.to.trash",
"hive.metadata.move.exported.metadata.to.trash", true,
"When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
"this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" +
"alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."),
METRICS_ENABLED("metastore.metrics.enabled", "hive.metastore.metrics.enabled", false,
"Enable metrics on the metastore."),
METRICS_HADOOP2_COMPONENT_NAME("metastore.metrics.hadoop2.component", "hive.service.metrics.hadoop2.component", "hivemetastore",
"Component name to provide to Hadoop2 Metrics system."),
METRICS_JSON_FILE_INTERVAL("metastore.metrics.file.frequency",
"hive.service.metrics.file.frequency", 60000, TimeUnit.MILLISECONDS,
"For json metric reporter, the frequency of updating JSON metrics file."),
METRICS_JSON_FILE_LOCATION("metastore.metrics.file.location",
"hive.service.metrics.file.location", "/tmp/report.json",
"For metric class json metric reporter, the location of local JSON metrics file. " +
"This file will get overwritten at every interval."),
METRICS_SLF4J_LOG_FREQUENCY_MINS("metastore.metrics.slf4j.frequency",
"hive.service.metrics.slf4j.frequency", 5, TimeUnit.MINUTES,
"For SLF4J metric reporter, the frequency of logging metrics events. The default value is 5 mins."),
METRICS_SLF4J_LOG_LEVEL("metastore.metrics.slf4j.logging.level",
"hive.service.metrics.slf4j.logging.level", "INFO",
new StringSetValidator("TRACE", "DEBUG", "INFO", "WARN", "ERROR"),
"For SLF4J metric reporter, the logging level to be used for metrics event logs. The default level is INFO."),
METRICS_REPORTERS("metastore.metrics.reporters", "metastore.metrics.reporters", "json,jmx",
new StringSetValidator("json", "jmx", "console", "hadoop", "slf4j"),
"A comma separated list of metrics reporters to start"),
MSCK_PATH_VALIDATION("metastore.msck.path.validation", "hive.msck.path.validation", "throw",
new StringSetValidator("throw", "skip", "ignore"), "The approach msck should take with HDFS " +
"directories that are partition-like but contain unsupported characters. 'throw' (an " +
"exception) is the default; 'skip' will skip the invalid directories and still repair the" +
" others; 'ignore' will skip the validation (legacy behavior, causes bugs in many cases)"),
MSCK_REPAIR_BATCH_SIZE("metastore.msck.repair.batch.size",
"hive.msck.repair.batch.size", 3000,
"Batch size for the msck repair command. If the value is greater than zero,\n "
+ "it will execute batch wise with the configured batch size. In case of errors while\n"
+ "adding unknown partitions the batch size is automatically reduced by half in the subsequent\n"
+ "retry attempt. The default value is 3000 which means it will execute in the batches of 3000."),
MSCK_REPAIR_BATCH_MAX_RETRIES("metastore.msck.repair.batch.max.retries", "hive.msck.repair.batch.max.retries", 4,
"Maximum number of retries for the msck repair command when adding unknown partitions.\n "
+ "If the value is greater than zero it will retry adding unknown partitions until the maximum\n"
+ "number of attempts is reached or batch size is reduced to 0, whichever is earlier.\n"
+ "In each retry attempt it will reduce the batch size by a factor of 2 until it reaches zero.\n"
+ "If the value is set to zero it will retry until the batch size becomes zero as described above."),
MSCK_REPAIR_ENABLE_PARTITION_RETENTION("metastore.msck.repair.enable.partition.retention",
"metastore.msck.repair.enable.partition.retention", false,
"If 'partition.retention.period' table property is set, this flag determines whether MSCK REPAIR\n" +
"command should handle partition retention. If enabled, and if a specific partition's age exceeded\n" +
"retention period the partition will be dropped along with data"),
// Partition management task params
PARTITION_MANAGEMENT_TASK_FREQUENCY("metastore.partition.management.task.frequency",
"metastore.partition.management.task.frequency",
300, TimeUnit.SECONDS, "Frequency at which timer task runs to do automatic partition management for tables\n" +
"with table property 'discover.partitions'='true'. Partition management include 2 pieces. One is partition\n" +
"discovery and other is partition retention period. When 'discover.partitions'='true' is set, partition\n" +
"management will look for partitions in table location and add partitions objects for it in metastore.\n" +
"Similarly if partition object exists in metastore and partition location does not exist, partition object\n" +
"will be dropped. The second piece in partition management is retention period. When 'discover.partition'\n" +
"is set to true and if 'partition.retention.period' table property is defined, partitions that are older\n" +
"than the specified retention period will be automatically dropped from metastore along with the data."),
PARTITION_MANAGEMENT_TABLE_TYPES("metastore.partition.management.table.types",
"metastore.partition.management.table.types", "MANAGED_TABLE,EXTERNAL_TABLE",
"Comma separated list of table types to use for partition management"),
PARTITION_MANAGEMENT_TASK_THREAD_POOL_SIZE("metastore.partition.management.task.thread.pool.size",
"metastore.partition.management.task.thread.pool.size", 3,
"Partition management uses thread pool on to which tasks are submitted for discovering and retaining the\n" +
"partitions. This determines the size of the thread pool. Note: Increasing the thread pool size will cause\n" +
"threadPoolSize * maxConnectionPoolSize connections to backend db"),
PARTITION_MANAGEMENT_CATALOG_NAME("metastore.partition.management.catalog.name",
"metastore.partition.management.catalog.name", "hive",
"Automatic partition management will look for tables under the specified catalog name"),
PARTITION_MANAGEMENT_DATABASE_PATTERN("metastore.partition.management.database.pattern",
"metastore.partition.management.database.pattern", "*",
"Automatic partition management will look for tables using the specified database pattern"),
PARTITION_MANAGEMENT_TABLE_PATTERN("metastore.partition.management.table.pattern",
"metastore.partition.management.table.pattern", "*",
"Automatic partition management will look for tables using the specified table pattern"),
METASTORE_METADATA_TRANSFORMER_CLASS("metastore.metadata.transformer.class", "metastore.metadata.transformer.class",
"org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer",
"Fully qualified class name for the metastore metadata transformer class \n"
+ "which is used by HMS Server to fetch the extended tables/partitions information \n"
+ "based on the data processor capabilities \n"
+ " This class should implement the IMetaStoreMetadataTransformer interface"),
MULTITHREADED("javax.jdo.option.Multithreaded", "javax.jdo.option.Multithreaded", true,
"Set this to true if multiple threads access metastore through JDO concurrently."),
MAX_OPEN_TXNS("metastore.max.open.txns", "hive.max.open.txns", 100000,
"Maximum number of open transactions. If \n" +
"current open transactions reach this limit, future open transaction requests will be \n" +
"rejected, until this number goes below the limit."),
NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead",
"javax.jdo.option.NonTransactionalRead", true,
"Reads outside of transactions"),
NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES("metastore.notification.sequence.lock.max.retries",
"hive.notification.sequence.lock.max.retries", 10,
"Number of retries required to acquire a lock when getting the next notification sequential ID for entries "
+ "in the NOTIFICATION_LOG table."),
NOTIFICATION_SEQUENCE_LOCK_RETRY_SLEEP_INTERVAL(
"metastore.notification.sequence.lock.retry.sleep.interval",
"hive.notification.sequence.lock.retry.sleep.interval", 10, TimeUnit.SECONDS,
"Sleep interval between retries to acquire a notification lock as described part of property "
+ NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES.name()),
ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("metastore.orm.retrieveMapNullsAsEmptyStrings",
"hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false,
"Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " +
"either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " +
"as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " +
"pruning is the correct behaviour"),
PARTITION_NAME_WHITELIST_PATTERN("metastore.partition.name.whitelist.pattern",
"hive.metastore.partition.name.whitelist.pattern", "",
"Partition names will be checked against this regex pattern and rejected if not matched."),
PART_INHERIT_TBL_PROPS("metastore.partition.inherit.table.properties",
"hive.metastore.partition.inherit.table.properties", "",
"List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" +
"* implies all the keys will get inherited."),
PRE_EVENT_LISTENERS("metastore.pre.event.listeners", "hive.metastore.pre.event.listeners", "",
"List of comma separated listeners for metastore events."),
PWD("javax.jdo.option.ConnectionPassword", "javax.jdo.option.ConnectionPassword", "mine",
"password to use against metastore database"),
RAW_STORE_IMPL("metastore.rawstore.impl", "hive.metastore.rawstore.impl",
"org.apache.hadoop.hive.metastore.ObjectStore",
"Name of the class that implements org.apache.riven.rawstore interface. \n" +
"This class is used to store and retrieval of raw metadata objects such as table, database"),
REPLCMDIR("metastore.repl.cmrootdir", "hive.repl.cmrootdir", "/user/${system:user.name}/cmroot/",
"Root dir for ChangeManager, used for deleted files."),
REPLCMENCRYPTEDDIR("metastore.repl.cm.encryptionzone.rootdir", "hive.repl.cm.encryptionzone.rootdir", ".cmroot",
"Root dir for ChangeManager if encryption zones are enabled, used for deleted files."),
REPLCMFALLBACKNONENCRYPTEDDIR("metastore.repl.cm.nonencryptionzone.rootdir",
"hive.repl.cm.nonencryptionzone.rootdir", "",
"Root dir for ChangeManager for non encrypted paths if hive.repl.cmrootdir is encrypted."),
REPLCMRETIAN("metastore.repl.cm.retain", "hive.repl.cm.retain", 24 * 10, TimeUnit.HOURS,
"Time to retain removed files in cmrootdir."),
REPLCMINTERVAL("metastore.repl.cm.interval", "hive.repl.cm.interval", 3600, TimeUnit.SECONDS,
"Inteval for cmroot cleanup thread."),
REPLCMENABLED("metastore.repl.cm.enabled", "hive.repl.cm.enabled", false,
"Turn on ChangeManager, so delete files will go to cmrootdir."),
REPLDIR("metastore.repl.rootdir", "hive.repl.rootdir", "/user/${system:user.name}/repl/",
"HDFS root dir for all replication dumps."),
REPL_COPYFILE_MAXNUMFILES("metastore.repl.copyfile.maxnumfiles",
"hive.exec.copyfile.maxnumfiles", 1L,
"Maximum number of files Hive uses to do sequential HDFS copies between directories." +
"Distributed copies (distcp) will be used instead for larger numbers of files so that copies can be done faster."),
REPL_COPYFILE_MAXSIZE("metastore.repl.copyfile.maxsize",
"hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/,
"Maximum file size (in bytes) that Hive uses to do single HDFS copies between directories." +
"Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."),
REPL_EVENT_DB_LISTENER_TTL("metastore.repl.event.db.listener.timetolive",
"hive.repl.event.db.listener.timetolive", 10, TimeUnit.DAYS,
"time after which events will be removed from the database listener queue when repl.cm.enabled \n" +
"is set to true. When set to false, the conf event.db.listener.timetolive is used instead."),
REPL_METRICS_CACHE_MAXSIZE("metastore.repl.metrics.cache.maxsize",
"hive.repl.metrics.cache.maxsize", 10000 /*10000 rows */,
"Maximum in memory cache size to collect replication metrics. The metrics will be pushed to persistent"
+ " storage at a frequency defined by config hive.repl.metrics.update.frequency. Till metrics are persisted to"
+ " db, it will be stored in this cache. So set this property based on number of concurrent policies running "
+ " and the frequency of persisting the metrics to persistent storage. "
),
REPL_METRICS_UPDATE_FREQUENCY("metastore.repl.metrics.update.frequency",
"hive.repl.metrics.update.frequency", 1L, TimeUnit.MINUTES /*1 minute */,
"Frequency at which replication Metrics will be stored in persistent storage. "
),
REPL_METRICS_CLEANUP_FREQUENCY("metastore.repl.metrics.cleanup.frequency",
"hive.metastore.repl.metrics.cleanup.frequency", 1, TimeUnit.DAYS,
"Interval of scheduled metrics clean up task which removes metrics above max age; Max age is"
+ " defined by the config metastore.repl.metrics.max.age. The max age should be greater than this frequency"),
REPL_METRICS_MAX_AGE("metastore.repl.metrics.max.age",
"hive.metastore.repl.metrics.max.age", 7, TimeUnit.DAYS,
"Maximal age of a replication metrics entry before it is removed."),
REPL_TXN_TIMEOUT("metastore.repl.txn.timeout", "hive.repl.txn.timeout", 11, TimeUnit.DAYS,
"Time after which replication transactions are declared aborted if the client has not sent a " +
"heartbeat. If this is a target cluster, value must be greater than" +
"hive.repl.event.db.listener.timetolive on the source cluster (!), ideally by 1 day."),
SCHEMA_INFO_CLASS("metastore.schema.info.class", "hive.metastore.schema.info.class",
"org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo",
"Fully qualified class name for the metastore schema information class \n"
+ "which is used by schematool to fetch the schema information.\n"
+ " This class should implement the IMetaStoreSchemaInfo interface"),
SCHEMA_VERIFICATION("metastore.schema.verification", "hive.metastore.schema.verification", true,
"Enforce metastore schema version consistency.\n" +
"True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic\n" +
" schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" +
" proper metastore schema migration. (Default)\n" +
"False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."),
SCHEMA_VERIFICATION_RECORD_VERSION("metastore.schema.verification.record.version",
"hive.metastore.schema.verification.record.version", false,
"When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" +
" enabled the MS will be unusable."),
SERDES_USING_METASTORE_FOR_SCHEMA("metastore.serdes.using.metastore.for.schema",
"hive.serdes.using.metastore.for.schema",
"org.apache.hadoop.hive.ql.io.orc.OrcSerde," +
"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
"org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," +
"org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," +
"org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," +
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," +
"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe," +
"org.apache.hadoop.hive.serde2.OpenCSVSerde",
"SerDes retrieving schema from metastore. This is an internal parameter."),
SERVER_MAX_MESSAGE_SIZE("metastore.server.max.message.size",
"hive.metastore.server.max.message.size", 100*1024*1024L,
"Maximum message size in bytes a HMS will accept."),
SERVER_MAX_THREADS("metastore.server.max.threads",
"hive.metastore.server.max.threads", 1000,
"Maximum number of worker threads in the Thrift server's pool."),
SERVER_MIN_THREADS("metastore.server.min.threads", "hive.metastore.server.min.threads", 200,
"Minimum number of worker threads in the Thrift server's pool."),
SERVER_PORT("metastore.thrift.port", "hive.metastore.port", 9083,
"Hive metastore listener port"),
SSL_KEYSTORE_PASSWORD("metastore.keystore.password", "hive.metastore.keystore.password", "",
"Metastore SSL certificate keystore password."),
SSL_KEYSTORE_PATH("metastore.keystore.path", "hive.metastore.keystore.path", "",
"Metastore SSL certificate keystore location."),
SSL_KEYSTORE_TYPE("metastore.keystore.type", "hive.metastore.keystore.type", "",
"Metastore SSL certificate keystore type."),
SSL_KEYMANAGERFACTORY_ALGORITHM("metastore.keymanagerfactory.algorithm", "hive.metastore.keymanagerfactory.algorithm", "",
"Metastore SSL certificate keystore algorithm."),
SSL_PROTOCOL_BLACKLIST("metastore.ssl.protocol.blacklist", "hive.ssl.protocol.blacklist",
"SSLv2,SSLv3", "SSL Versions to disable for all Hive Servers"),
SSL_TRUSTSTORE_PATH("metastore.truststore.path", "hive.metastore.truststore.path", "",
"Metastore SSL certificate truststore location."),
SSL_TRUSTSTORE_PASSWORD("metastore.truststore.password", "hive.metastore.truststore.password", "",
"Metastore SSL certificate truststore password."),
SSL_TRUSTSTORE_TYPE("metastore.truststore.type", "hive.metastore.truststore.type", "",
"Metastore SSL certificate truststore type."),
SSL_TRUSTMANAGERFACTORY_ALGORITHM("metastore.trustmanagerfactory.algorithm", "hive.metastore.trustmanagerfactory.algorithm", "",
"Metastore SSL certificate truststore algorithm."),
STATS_AUTO_GATHER("metastore.stats.autogather", "hive.stats.autogather", true,
"A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."),
STATS_FETCH_BITVECTOR("metastore.stats.fetch.bitvector", "hive.stats.fetch.bitvector", false,
"Whether we fetch bitvector when we compute ndv. Users can turn it off if they want to use old schema"),
STATS_NDV_TUNER("metastore.stats.ndv.tuner", "hive.metastore.stats.ndv.tuner", 0.0,
"Provides a tunable parameter between the lower bound and the higher bound of ndv for aggregate ndv across all the partitions. \n" +
"The lower bound is equal to the maximum of ndv of all the partitions. The higher bound is equal to the sum of ndv of all the partitions.\n" +
"Its value should be between 0.0 (i.e., choose lower bound) and 1.0 (i.e., choose higher bound)"),
STATS_NDV_DENSITY_FUNCTION("metastore.stats.ndv.densityfunction",
"hive.metastore.stats.ndv.densityfunction", false,
"Whether to use density function to estimate the NDV for the whole table based on the NDV of partitions"),
STATS_DEFAULT_AGGREGATOR("metastore.stats.default.aggregator", "hive.stats.default.aggregator",
"",
"The Java class (implementing the StatsAggregator interface) that is used by default if hive.stats.dbclass is custom type."),
STATS_DEFAULT_PUBLISHER("metastore.stats.default.publisher", "hive.stats.default.publisher", "",
"The Java class (implementing the StatsPublisher interface) that is used by default if hive.stats.dbclass is custom type."),
STATS_AUTO_UPDATE("metastore.stats.auto.analyze", "hive.metastore.stats.auto.analyze", "none",
new EnumValidator(StatsUpdateMode.values()),
"Whether to update stats in the background; none - no, all - for all tables, existing - only existing, out of date, stats."),
STATS_AUTO_UPDATE_NOOP_WAIT("metastore.stats.auto.analyze.noop.wait",
"hive.metastore.stats.auto.analyze.noop.wait", 5L, TimeUnit.MINUTES,
new TimeValidator(TimeUnit.MINUTES),
"How long to sleep if there were no stats needing update during an update iteration.\n" +
"This is a setting to throttle table/partition checks when nothing is being changed; not\n" +
"the analyze queries themselves."),
STATS_AUTO_UPDATE_WORKER_COUNT("metastore.stats.auto.analyze.worker.count",
"hive.metastore.stats.auto.analyze.worker.count", 1,
"Number of parallel analyze commands to run for background stats update."),
STORAGE_SCHEMA_READER_IMPL("metastore.storage.schema.reader.impl", "metastore.storage.schema.reader.impl",
DEFAULT_STORAGE_SCHEMA_READER_CLASS,
"The class to use to read schemas from storage. It must implement " +
"org.apache.hadoop.hive.metastore.StorageSchemaReader"),
STORE_MANAGER_TYPE("datanucleus.storeManagerType", "datanucleus.storeManagerType", "rdbms", "metadata store type"),
STRICT_MANAGED_TABLES("metastore.strict.managed.tables", "hive.strict.managed.tables", false,
"Whether strict managed tables mode is enabled. With this mode enabled, " +
"only transactional tables (both full and insert-only) are allowed to be created as managed tables"),
SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES("metastore.support.special.characters.tablename",
"hive.support.special.characters.tablename", true,
"This flag should be set to true to enable support for special characters in table names.\n"
+ "When it is set to false, only [a-zA-Z_0-9]+ are supported.\n"
+ "The supported special characters are %&'()*+,-./:;<=>?[]_|{}$^!~#@ and space. This flag applies only to"
+ " quoted table names.\nThe default value is true."),
TASK_THREADS_ALWAYS("metastore.task.threads.always", "metastore.task.threads.always",
EVENT_CLEANER_TASK_CLASS + "," + RUNTIME_STATS_CLEANER_TASK_CLASS + "," +
ACID_METRICS_TASK_CLASS + "," +
"org.apache.hadoop.hive.metastore.HiveProtoEventsCleanerTask" + ","
+ "org.apache.hadoop.hive.metastore.ScheduledQueryExecutionsMaintTask" + ","
+ "org.apache.hadoop.hive.metastore.ReplicationMetricsMaintTask",
"Comma separated list of tasks that will be started in separate threads. These will " +
"always be started, regardless of whether the metastore is running in embedded mode " +
"or in server mode. They must implement " + METASTORE_TASK_THREAD_CLASS),
TASK_THREADS_REMOTE_ONLY("metastore.task.threads.remote", "metastore.task.threads.remote",
ACID_HOUSE_KEEPER_SERVICE_CLASS + "," +
ACID_TXN_CLEANER_SERVICE_CLASS + "," +
ACID_OPEN_TXNS_COUNTER_SERVICE_CLASS + "," +
MATERIALZIATIONS_REBUILD_LOCK_CLEANER_TASK_CLASS + "," +
PARTITION_MANAGEMENT_TASK_CLASS,
"Comma-separated list of tasks that will be started in separate threads. These will be" +
" started only when the metastore is running as a separate service. They must " +
"implement " + METASTORE_TASK_THREAD_CLASS),
TCP_KEEP_ALIVE("metastore.server.tcp.keepalive",
"hive.metastore.server.tcp.keepalive", true,
"Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."),
THREAD_POOL_SIZE("metastore.thread.pool.size", "no.such", 10,
"Number of threads in the thread pool. These will be used to execute all background " +
"processes."),
THRIFT_CONNECTION_RETRIES("metastore.connect.retries", "hive.metastore.connect.retries", 3,
"Number of retries while opening a connection to metastore"),
THRIFT_FAILURE_RETRIES("metastore.failure.retries", "hive.metastore.failure.retries", 1,
"Number of retries upon failure of Thrift metastore calls"),
THRIFT_BIND_HOST("metastore.thrift.bind.host", "hive.metastore.thrift.bind.host", "",
"Bind host on which to run the metastore thrift service."),
THRIFT_URIS("metastore.thrift.uris", "hive.metastore.uris", "",
"URIs Used by metastore client to connect to remotemetastore\n." +
"If dynamic service discovery mode is set, the URIs are used to connect to the" +
" corresponding service discovery servers e.g. a zookeeper. Otherwise they are " +
"used as URIs for remote metastore."),
THRIFT_SERVICE_DISCOVERY_MODE("metastore.service.discovery.mode",
"hive.metastore.service.discovery.mode",
"",
"Specifies which dynamic service discovery method to use. Currently we support only " +
"\"zookeeper\" to specify ZooKeeper based service discovery."),
THRIFT_ZOOKEEPER_USE_KERBEROS("metastore.zookeeper.kerberos.enabled",
"hive.zookeeper.kerberos.enabled", true,
"If ZooKeeper is configured for Kerberos authentication. This could be useful when cluster\n" +
"is kerberized, but Zookeeper is not."),
THRIFT_ZOOKEEPER_CLIENT_PORT("metastore.zookeeper.client.port",
"hive.zookeeper.client.port", "2181",
"The port of ZooKeeper servers to talk to.\n" +
"If the list of Zookeeper servers specified in hive.metastore.thrift.uris" +
" does not contain port numbers, this value is used."),
THRIFT_ZOOKEEPER_SESSION_TIMEOUT("metastore.zookeeper.session.timeout",
"hive.zookeeper.session.timeout", 120000L, TimeUnit.MILLISECONDS,
new TimeValidator(TimeUnit.MILLISECONDS),
"ZooKeeper client's session timeout (in milliseconds). The client is disconnected\n" +
"if a heartbeat is not sent in the timeout."),
THRIFT_ZOOKEEPER_CONNECTION_TIMEOUT("metastore.zookeeper.connection.timeout",
"hive.zookeeper.connection.timeout", 15L, TimeUnit.SECONDS,
new TimeValidator(TimeUnit.SECONDS),
"ZooKeeper client's connection timeout in seconds. " +
"Connection timeout * hive.metastore.zookeeper.connection.max.retries\n" +
"with exponential backoff is when curator client deems connection is lost to zookeeper."),
THRIFT_ZOOKEEPER_NAMESPACE("metastore.zookeeper.namespace",
"hive.zookeeper.namespace", "hive_metastore",
"The parent node under which all ZooKeeper nodes for metastores are created."),
THRIFT_ZOOKEEPER_CONNECTION_MAX_RETRIES("metastore.zookeeper.connection.max.retries",
"hive.zookeeper.connection.max.retries", 3,
"Max number of times to retry when connecting to the ZooKeeper server."),
THRIFT_ZOOKEEPER_CONNECTION_BASESLEEPTIME("metastore.zookeeper.connection.basesleeptime",
"hive.zookeeper.connection.basesleeptime", 1000L, TimeUnit.MILLISECONDS,
new TimeValidator(TimeUnit.MILLISECONDS),
"Initial amount of time (in milliseconds) to wait between retries\n" +
"when connecting to the ZooKeeper server when using ExponentialBackoffRetry policy."),
THRIFT_ZOOKEEPER_SSL_ENABLE("metastore.zookeeper.ssl.client.enable",
"hive.zookeeper.ssl.client.enable", false,
"Set client to use TLS when connecting to ZooKeeper. An explicit value overrides any value set via the " +
"zookeeper.client.secure system property (note the different name). Defaults to false if neither is set."),
THRIFT_ZOOKEEPER_SSL_KEYSTORE_LOCATION("metastore.zookeeper.ssl.keystore.location",
"hive.zookeeper.ssl.keystore.location", "",
"Keystore location when using a client-side certificate with TLS connectivity to ZooKeeper. " +
"Overrides any explicit value set via the zookeeper.ssl.keyStore.location " +
"system property (note the camelCase)."),
THRIFT_ZOOKEEPER_SSL_KEYSTORE_PASSWORD("metastore.zookeeper.ssl.keystore.password",
"hive.zookeeper.ssl.keystore.password", "",
"Keystore password when using a client-side certificate with TLS connectivity to ZooKeeper." +
"Overrides any explicit value set via the zookeeper.ssl.keyStore.password" +
"system property (note the camelCase)."),
THRIFT_ZOOKEEPER_SSL_TRUSTSTORE_LOCATION("metastore.zookeeper.ssl.truststore.location",
"hive.zookeeper.ssl.truststore.location", "",
"Truststore location when using a client-side certificate with TLS connectivity to ZooKeeper. " +
"Overrides any explicit value set via the zookeeper.ssl.trustStore.location " +
"system property (note the camelCase)."),
THRIFT_ZOOKEEPER_SSL_TRUSTSTORE_PASSWORD("metastore.zookeeper.ssl.truststore.password",
"hive.zookeeper.ssl.truststore.password", "",
"Truststore password when using a client-side certificate with TLS connectivity to ZooKeeper." +
"Overrides any explicit value set via the zookeeper.ssl.trustStore.password " +
"system property (note the camelCase)."),
THRIFT_URI_SELECTION("metastore.thrift.uri.selection", "hive.metastore.uri.selection", "RANDOM",
new StringSetValidator("RANDOM", "SEQUENTIAL"),
"Determines the selection mechanism used by metastore client to connect to remote " +
"metastore. SEQUENTIAL implies that the first valid metastore from the URIs specified " +
"through hive.metastore.uris will be picked. RANDOM implies that the metastore " +
"will be picked randomly"),
TOKEN_SIGNATURE("metastore.token.signature", "hive.metastore.token.signature", "",
"The delegation token service name to match when selecting a token from the current user's tokens."),
METASTORE_CACHE_CAN_USE_EVENT("metastore.cache.can.use.event", "hive.metastore.cache.can.use.event", false,
"Can notification events from notification log table be used for updating the metastore cache."),
TRANSACTIONAL_EVENT_LISTENERS("metastore.transactional.event.listeners",
"hive.metastore.transactional.event.listeners", "",
"A comma separated list of Java classes that implement the org.apache.riven.MetaStoreEventListener" +
" interface. Both the metastore event and corresponding listener method will be invoked in the same JDO transaction." +
" If org.apache.hive.hcatalog.listener.DbNotificationListener is configured along with other transactional event" +
" listener implementation classes, make sure org.apache.hive.hcatalog.listener.DbNotificationListener is placed at" +
" the end of the list."),
TRUNCATE_ACID_USE_BASE("metastore.acid.truncate.usebase", "hive.metastore.acid.truncate.usebase", true,
"If enabled, truncate for transactional tables will not delete the data directories,\n" +
"rather create a new base directory with no datafiles."),
TRY_DIRECT_SQL("metastore.try.direct.sql", "hive.metastore.try.direct.sql", true,
"Whether the metastore should try to use direct SQL queries instead of the\n" +
"DataNucleus for certain read paths. This can improve metastore performance when\n" +
"fetching many partitions or column statistics by orders of magnitude; however, it\n" +
"is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" +
"the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" +
"work for all queries on your datastore. If all SQL queries fail (for example, your\n" +
"metastore is backed by MongoDB), you might want to disable this to save the\n" +
"try-and-fall-back cost."),
TRY_DIRECT_SQL_DDL("metastore.try.direct.sql.ddl", "hive.metastore.try.direct.sql.ddl", true,
"Same as hive.metastore.try.direct.sql, for read statements within a transaction that\n" +
"modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" +
"select query has incorrect syntax or something similar inside a transaction, the\n" +
"entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" +
"should disable the usage of direct SQL inside transactions if that happens in your case."),
TXN_MAX_OPEN_BATCH("metastore.txn.max.open.batch", "hive.txn.max.open.batch", 1000,
"Maximum number of transactions that can be fetched in one call to open_txns().\n" +
"This controls how many transactions streaming agents such as Flume or Storm open\n" +
"simultaneously. The streaming agent then writes that number of entries into a single\n" +
"file (per Flume agent or Storm bolt). Thus increasing this value decreases the number\n" +
"of delta files created by streaming agents. But it also increases the number of open\n" +
"transactions that Hive has to track at any given time, which may negatively affect\n" +
"read performance."),
TXN_RETRYABLE_SQLEX_REGEX("metastore.txn.retryable.sqlex.regex",
"hive.txn.retryable.sqlex.regex", "", "Comma separated list\n" +
"of regular expression patterns for SQL state, error code, and error message of\n" +
"retryable SQLExceptions, that's suitable for the metastore DB.\n" +
"For example: Can't serialize.*,40001$,^Deadlock,.*ORA-08176.*\n" +
"The string that the regex will be matched against is of the following form, where ex is a SQLException:\n" +
"ex.getMessage() + \" (SQLState=\" + ex.getSQLState() + \", ErrorCode=\" + ex.getErrorCode() + \")\""),
TXN_STORE_IMPL("metastore.txn.store.impl", "hive.metastore.txn.store.impl",
"org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler",
"Name of class that implements org.apache.riven.txn.TxnStore. This " +
"class is used to store and retrieve transactions and locks"),
TXN_TIMEOUT("metastore.txn.timeout", "hive.txn.timeout", 300, TimeUnit.SECONDS,
"time after which transactions are declared aborted if the client has not sent a heartbeat."),
TXN_OPENTXN_TIMEOUT("metastore.txn.opentxn.timeout", "hive.txn.opentxn.timeout", 1000, TimeUnit.MILLISECONDS,
"Time before an open transaction operation should persist, otherwise it is considered invalid and rolled back"),
TXN_USE_MIN_HISTORY_LEVEL("metastore.txn.use.minhistorylevel", "hive.txn.use.minhistorylevel", true,
"Set this to false, for the TxnHandler and Cleaner to not use MinHistoryLevel table and take advantage of openTxn optimisation.\n"
+ "If the table is dropped HMS will switch this flag to false."),
URI_RESOLVER("metastore.uri.resolver", "hive.metastore.uri.resolver", "",
"If set, fully qualified class name of resolver for hive metastore uri's"),
USERS_IN_ADMIN_ROLE("metastore.users.in.admin.role", "hive.users.in.admin.role", "", false,
"Comma separated list of users who are in admin role for bootstrapping.\n" +
"More users can be added in ADMIN role later."),
USE_SSL("metastore.use.SSL", "hive.metastore.use.SSL", false,
"Set this to true for using SSL encryption in HMS server."),
// We should somehow unify next two options.
USE_THRIFT_SASL("metastore.sasl.enabled", "hive.metastore.sasl.enabled", false,
"If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."),
METASTORE_CLIENT_AUTH_MODE("metastore.client.auth.mode",
"hive.metastore.client.auth.mode", "NOSASL",
new StringSetValidator("NOSASL", "PLAIN", "KERBEROS"),
"If PLAIN, clients will authenticate using plain authentication, by providing username" +
" and password. Any other value is ignored right now but may be used later."),
METASTORE_CLIENT_PLAIN_USERNAME("metastore.client.plain.username",
"hive.metastore.client.plain.username", "",
"The username used by the metastore client when " +
METASTORE_CLIENT_AUTH_MODE + " is true. The password is obtained from " +
CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH + " using username as the " +
"alias."),
THRIFT_AUTH_CONFIG_USERNAME("metastore.authentication.config.username",
"hive.metastore.authentication.config.username", "",
"If " + THRIFT_METASTORE_AUTHENTICATION + " is set to CONFIG, username provided by " +
"client is matched against this value."),
THRIFT_AUTH_CONFIG_PASSWORD("metastore.authentication.config.password",
"hive.metastore.authentication.config.password", "",
"If " + THRIFT_METASTORE_AUTHENTICATION + " is set to CONFIG, password provided by " +
"the client is matched against this value."),
USE_THRIFT_FRAMED_TRANSPORT("metastore.thrift.framed.transport.enabled",
"hive.metastore.thrift.framed.transport.enabled", false,
"If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."),
USE_THRIFT_COMPACT_PROTOCOL("metastore.thrift.compact.protocol.enabled",
"hive.metastore.thrift.compact.protocol.enabled", false,
"If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" +
"Setting it to true will break compatibility with older clients running TBinaryProtocol."),
VALIDATE_COLUMNS("datanucleus.schema.validateColumns", "datanucleus.schema.validateColumns", false,
"validates existing schema against code. turn this on if you want to verify existing schema"),
VALIDATE_CONSTRAINTS("datanucleus.schema.validateConstraints",
"datanucleus.schema.validateConstraints", false,
"validates existing schema against code. turn this on if you want to verify existing schema"),
VALIDATE_TABLES("datanucleus.schema.validateTables",
"datanucleus.schema.validateTables", false,
"validates existing schema against code. turn this on if you want to verify existing schema"),
WAREHOUSE("metastore.warehouse.dir", "hive.metastore.warehouse.dir", "/user/hive/warehouse",
"location of default database for the warehouse"),
WAREHOUSE_EXTERNAL("metastore.warehouse.external.dir",
"hive.metastore.warehouse.external.dir", "",
"Default location for external tables created in the warehouse. " +
"If not set or null, then the normal warehouse location will be used as the default location."),
WM_DEFAULT_POOL_SIZE("metastore.wm.default.pool.size",
"hive.metastore.wm.default.pool.size", 4,
"The size of a default pool to create when creating an empty resource plan;\n" +
"If not positive, no default pool will be created."),
RAWSTORE_PARTITION_BATCH_SIZE("metastore.rawstore.batch.size",
"metastore.rawstore.batch.size", -1,
"Batch size for partition and other object retrieval from the underlying DB in JDO.\n" +
"The JDO implementation such as DataNucleus may run into issues when the generated queries are\n" +
"too large. Use this parameter to break the query into multiple batches. -1 means no batching."),
HIVE_METASTORE_RUNWORKER_IN("hive.metastore.runworker.in",
"hive.metastore.runworker.in", "metastore", new StringSetValidator("metastore", "hs2"),
"Chooses where the compactor worker threads should run, Only possible values"
+ " are \"metastore\" and \"hs2\""),
// Hive values we have copied and use as is
// These two are used to indicate that we are running tests
HIVE_IN_TEST("hive.in.test", "hive.in.test", false, "internal usage only, true in test mode"),
HIVE_IN_TEZ_TEST("hive.in.tez.test", "hive.in.tez.test", false,
"internal use only, true when in testing tez"),
HIVE_IN_TEST_ICEBERG("hive.in.iceberg.test", "hive.in.iceberg.test", false,
"internal usage only, true when testing iceberg"),
// We need to track this as some listeners pass it through our config and we need to honor
// the system properties.
HIVE_AUTHORIZATION_MANAGER("hive.security.authorization.manager",
"hive.security.authorization.manager",
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory",
"The Hive client authorization manager class name. The user defined authorization class should implement \n" +
"interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider."),
HIVE_METASTORE_AUTHENTICATOR_MANAGER("hive.security.metastore.authenticator.manager",
"hive.security.metastore.authenticator.manager",
"org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator",
"authenticator manager class name to be used in the metastore for authentication. \n" +
"The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider."),
HIVE_METASTORE_AUTHORIZATION_AUTH_READS("hive.security.metastore.authorization.auth.reads",
"hive.security.metastore.authorization.auth.reads", true,
"If this is true, metastore authorizer authorizes read actions on database, table"),
// The metastore shouldn't care what txn manager Hive is running, but in various tests it
// needs to set these values. We should do the work to detangle this.
HIVE_TXN_MANAGER("hive.txn.manager", "hive.txn.manager",
"org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager",
"Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive\n" +
"transactions, which also requires appropriate settings for hive.compactor.initiator.on,\n" +
"hive.compactor.worker.threads, hive.support.concurrency (true),\n" +
"and hive.exec.dynamic.partition.mode (nonstrict).\n" +
"The default DummyTxnManager replicates pre-Hive-0.13 behavior and provides\n" +
"no transactions."),
// Metastore always support concurrency, but certain ACID tests depend on this being set. We
// need to do the work to detangle this
HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", "hive.support.concurrency", false,
"Whether Hive supports concurrency control or not. \n" +
"A ZooKeeper instance must be up and running when using zookeeper Hive lock manager "),
HIVE_TXN_STATS_ENABLED("hive.txn.stats.enabled", "hive.txn.stats.enabled", true,
"Whether Hive supports transactional stats (accurate stats for transactional tables)"),
// External RDBMS support
USE_CUSTOM_RDBMS("metastore.use.custom.database.product",
"hive.metastore.use.custom.database.product", false,
"Use an external RDBMS which is not in the list of natively supported databases (Derby,\n"
+ "Mysql, Oracle, Postgres, MSSQL), as defined by hive.metastore.db.type. If this configuration\n"
+ "is true, the metastore.custom.database.product.classname must be set to a valid class name"),
CUSTOM_RDBMS_CLASSNAME("metastore.custom.database.product.classname",
"hive.metastore.custom.database.product.classname", "none",
"Hook for external RDBMS. This class will be instantiated only when " +
"metastore.use.custom.database.product is set to true."),
HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "hive.blobstore.supported.schemes", "s3,s3a,s3n",
"Comma-separated list of supported blobstore schemes."),
// Deprecated Hive values that we are keeping for backwards compatibility.
@Deprecated
HIVE_CODAHALE_METRICS_REPORTER_CLASSES("hive.service.metrics.codahale.reporter.classes",
"hive.service.metrics.codahale.reporter.classes", "",
"Use METRICS_REPORTERS instead. Comma separated list of reporter implementation classes " +
"for metric class org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics. Overrides "
+ "HIVE_METRICS_REPORTER conf if present. This will be overridden by " +
"METRICS_REPORTERS if it is present"),
@Deprecated
HIVE_METRICS_REPORTER("hive.service.metrics.reporter", "hive.service.metrics.reporter", "",
"Reporter implementations for metric class "
+ "org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics;" +
"Deprecated, use METRICS_REPORTERS instead. This configuraiton will be"
+ " overridden by HIVE_CODAHALE_METRICS_REPORTER_CLASSES and METRICS_REPORTERS if " +
"present. Comma separated list of JMX, CONSOLE, JSON_FILE, HADOOP2"),
// Planned to be removed in HIVE-21024
@Deprecated
DBACCESS_SSL_PROPS("metastore.dbaccess.ssl.properties", "hive.metastore.dbaccess.ssl.properties", "",
"Deprecated. Use the metastore.dbaccess.ssl.* properties instead. Comma-separated SSL properties for " +
"metastore to access database when JDO connection URL enables SSL access. \n"
+ "e.g. javax.net.ssl.trustStore=/tmp/truststore,javax.net.ssl.trustStorePassword=pwd.\n " +
"If both this and the metastore.dbaccess.ssl.* properties are set, then the latter properties \n" +
"will overwrite what was set in the deprecated property."),
METASTORE_NUM_STRIPED_TABLE_LOCKS("metastore.num.striped.table.locks", "hive.metastore.num.striped.table.locks", 32,
"Number of striped locks available to provide exclusive operation support for critical table operations like add_partitions."),
COLSTATS_RETAIN_ON_COLUMN_REMOVAL("metastore.colstats.retain.on.column.removal",
"hive.metastore.colstats.retain.on.column.removal", true,
"Whether to retain column statistics during column removals in partitioned tables - disabling this "
+ "purges all column statistics data "
+ "for all partition to retain working consistency"),
// These are all values that we put here just for testing
STR_TEST_ENTRY("test.str", "hive.test.str", "defaultval", "comment"),
STR_SET_ENTRY("test.str.set", "hive.test.str.set", "a", new StringSetValidator("a", "b", "c"), ""),
STR_LIST_ENTRY("test.str.list", "hive.test.str.list", "a,b,c",
"no comment"),
LONG_TEST_ENTRY("test.long", "hive.test.long", 42, "comment"),
DOUBLE_TEST_ENTRY("test.double", "hive.test.double", Math.PI, "comment"),
TIME_TEST_ENTRY("test.time", "hive.test.time", 1, TimeUnit.SECONDS, "comment"),
DEPRECATED_TEST_ENTRY("test.deprecated", "hive.test.deprecated", 0, new RangeValidator(0, 3), "comment",
"this.is.the.metastore.deprecated.name", "this.is.the.hive.deprecated.name"),
TIME_VALIDATOR_ENTRY_INCLUSIVE("test.time.validator.inclusive", "hive.test.time.validator.inclusive", 1,
TimeUnit.SECONDS,
new TimeValidator(TimeUnit.MILLISECONDS, 500L, true, 1500L, true), "comment"),
TIME_VALIDATOR_ENTRY_EXCLUSIVE("test.time.validator.exclusive", "hive.test.time.validator.exclusive", 1,
TimeUnit.SECONDS,
new TimeValidator(TimeUnit.MILLISECONDS, 500L, false, 1500L, false), "comment"),
BOOLEAN_TEST_ENTRY("test.bool", "hive.test.bool", true, "comment"),
CLASS_TEST_ENTRY("test.class", "hive.test.class", "", "comment");
private final String varname;
private final String hiveName;
private final Object defaultVal;
private final Validator validator;
private final boolean caseSensitive;
private final String description;
private String deprecatedName = null;
private String hiveDeprecatedName = null;
ConfVars(String varname, String hiveName, String defaultVal, String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
validator = null;
caseSensitive = false;
this.description = description;
}
ConfVars(String varname, String hiveName, String defaultVal, Validator validator,
String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
this.validator = validator;
caseSensitive = false;
this.description = description;
}
ConfVars(String varname, String hiveName, String defaultVal, boolean caseSensitive,
String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
validator = null;
this.caseSensitive = caseSensitive;
this.description = description;
}
ConfVars(String varname, String hiveName, long defaultVal, String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
validator = null;
caseSensitive = false;
this.description = description;
}
ConfVars(String varname, String hiveName, long defaultVal, Validator validator,
String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
this.validator = validator;
caseSensitive = false;
this.description = description;
}
ConfVars(String varname, String hiveName, long defaultVal, Validator validator,
String description, String deprecatedName, String hiveDeprecatedName) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
this.validator = validator;
caseSensitive = false;
this.description = description;
this.deprecatedName = deprecatedName;
this.hiveDeprecatedName = hiveDeprecatedName;
}
ConfVars(String varname, String hiveName, boolean defaultVal, String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
validator = null;
caseSensitive = false;
this.description = description;
}
ConfVars(String varname, String hiveName, double defaultVal, String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = defaultVal;
validator = null;
caseSensitive = false;
this.description = description;
}
ConfVars(String varname, String hiveName, long defaultVal, TimeUnit unit, String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = new TimeValue(defaultVal, unit);
validator = new TimeValidator(unit);
caseSensitive = false;
this.description = description;
}
ConfVars(String varname, String hiveName, long defaultVal, TimeUnit unit,
Validator validator, String description) {
this.varname = varname;
this.hiveName = hiveName;
this.defaultVal = new TimeValue(defaultVal, unit);
this.validator = validator;
caseSensitive = false;
this.description = description;
}
public void validate(String value) throws IllegalArgumentException {
if (validator != null) {
validator.validate(value);
}
}
public boolean isCaseSensitive() {
return caseSensitive;
}
/**
* If you are calling this, you're probably doing it wrong. You shouldn't need to use the
* underlying variable name. Use one of the getVar methods instead. Only use this if you
* are 100% sure you know you're doing. The reason for this is that MetastoreConf goes to a
* lot of trouble to make sure it checks both Hive and Metastore values for config keys. If
* you call {@link Configuration#get(String)} you are undermining that.
* @return variable name
*/
public String getVarname() {
return varname;
}
/**
* Use this method if you need to set a system property and are going to instantiate the
* configuration file via HiveConf. This is because HiveConf only looks for values it knows,
* so it will miss all of the metastore.* ones. Do not use this to explicitly set or get the
* underlying config value unless you are 100% sure you know what you're doing.
* The reason for this is that MetastoreConf goes to a
* lot of trouble to make sure it checks both Hive and Metastore values for config keys. If
* you call {@link Configuration#get(String)} you are undermining that.
* @return hive.* configuration key
*/
public String getHiveName() {
return hiveName;
}
public Object getDefaultVal() {
return defaultVal;
}
public String getDescription() {
return description;
}
/**
* This is useful if you need the variable name for a LOG message or
* {@link System#setProperty(String, String)}, beware however that you should only use this
* with setProperty if you're going to create a configuration via
* {@link MetastoreConf#newMetastoreConf()}. If you are going to create it with HiveConf,
* then use {@link #getHiveName()}.
* @return metastore.* configuration key
*/
@Override
public String toString() {
return varname;
}
}
public static final ConfVars[] dataNucleusAndJdoConfs = {
ConfVars.AUTO_CREATE_ALL,
ConfVars.CONNECTION_DRIVER,
ConfVars.CONNECTION_POOLING_MAX_CONNECTIONS,
ConfVars.CONNECTION_POOLING_TYPE,
ConfVars.CONNECT_URL_KEY,
ConfVars.CONNECTION_USER_NAME,
ConfVars.DATANUCLEUS_AUTOSTART,
ConfVars.DATANUCLEUS_CACHE_LEVEL2,
ConfVars.DATANUCLEUS_CACHE_LEVEL2_TYPE,
ConfVars.DATANUCLEUS_INIT_COL_INFO,
ConfVars.DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK,
ConfVars.DATANUCLEUS_TRANSACTION_ISOLATION,
ConfVars.DATANUCLEUS_USE_LEGACY_VALUE_STRATEGY,
ConfVars.DETACH_ALL_ON_COMMIT,
ConfVars.IDENTIFIER_FACTORY,
ConfVars.MANAGER_FACTORY_CLASS,
ConfVars.MULTITHREADED,
ConfVars.NON_TRANSACTIONAL_READ,
ConfVars.PWD,
ConfVars.STORE_MANAGER_TYPE,
ConfVars.VALIDATE_COLUMNS,
ConfVars.VALIDATE_CONSTRAINTS,
ConfVars.VALIDATE_TABLES
};
// Make sure no one calls this
private MetastoreConf() {
throw new RuntimeException("You should never be creating one of these!");
}
public static void setHiveSiteLocation(URL location) {
hiveSiteURL = location;
}
public static Configuration newMetastoreConf() {
return newMetastoreConf(new Configuration());
}
public static Configuration newMetastoreConf(Configuration conf) {
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
if (classLoader == null) {
classLoader = MetastoreConf.class.getClassLoader();
}
// We don't add this to the resources because we don't want to read config values from it.
// But we do find it because we want to remember where it is for later in case anyone calls
// getHiveDefaultLocation().
hiveDefaultURL = classLoader.getResource("hive-default.xml");
// Add in hive-site.xml. We add this first so that it gets overridden by the new metastore
// specific files if they exist.
if(hiveSiteURL == null) {
/*
* this 'if' is pretty lame - QTestUtil.QTestUtil() uses hiveSiteURL to load a specific
* hive-site.xml from data/conf/<subdir> so this makes it follow the same logic - otherwise
* HiveConf and MetastoreConf may load different hive-site.xml ( For example,
* HiveConf uses data/conf/spark/hive-site.xml and MetastoreConf data/conf/hive-site.xml)
*/
hiveSiteURL = findConfigFile(classLoader, "hive-site.xml");
}
if (hiveSiteURL != null) {
conf.addResource(hiveSiteURL);
}
// Now add hivemetastore-site.xml. Again we add this before our own config files so that the
// newer overrides the older.
hiveMetastoreSiteURL = findConfigFile(classLoader, "hivemetastore-site.xml");
if (hiveMetastoreSiteURL != null) {
conf.addResource(hiveMetastoreSiteURL);
}
// Add in our conf file
metastoreSiteURL = findConfigFile(classLoader, "metastore-site.xml");
if (metastoreSiteURL != null) {
conf.addResource(metastoreSiteURL);
}
// If a system property that matches one of our conf value names is set then use the value
// it's set to to set our own conf value.
for (ConfVars var : ConfVars.values()) {
if (System.getProperty(var.varname) != null) {
LOG.debug("Setting conf value " + var.varname + " using value " +
System.getProperty(var.varname));
conf.set(var.varname, System.getProperty(var.varname));
}
}
// Pick up any system properties that start with "hive." and set them in our config. This
// way we can properly pull any Hive values from the environment without needing to know all
// of the Hive config values.
System.getProperties().stringPropertyNames().stream()
.filter(s -> s.startsWith("hive."))
.forEach(s -> {
String v = System.getProperty(s);
LOG.debug("Picking up system property " + s + " with value " + v);
conf.set(s, v);
});
// If we are going to validate the schema, make sure we don't create it
if (getBoolVar(conf, ConfVars.SCHEMA_VERIFICATION)) {
setBoolVar(conf, ConfVars.AUTO_CREATE_ALL, false);
}
if (!beenDumped.getAndSet(true) && getBoolVar(conf, ConfVars.DUMP_CONFIG_ON_CREATION) &&
LOG.isDebugEnabled()) {
LOG.debug(dumpConfig(conf));
}
/*
Add deprecated config names to configuration.
The parameters for Configuration.addDeprecation are (oldKey, newKey) and it is assumed that the config is set via
newKey and the value is retrieved via oldKey.
However in this case we assume the value is set with the deprecated key (oldKey) in some config file and we
retrieve it in the code via the new key. So the parameter order we use here is: (newKey, deprecatedKey).
We do this with the HiveConf configs as well.
*/
for (ConfVars var : ConfVars.values()) {
if (var.deprecatedName != null) {
Configuration.addDeprecation(var.getVarname(), var.deprecatedName);
}
if (var.hiveDeprecatedName != null) {
Configuration.addDeprecation(var.getHiveName(), var.hiveDeprecatedName);
}
}
return conf;
}
private static URL findConfigFile(ClassLoader classLoader, String name) {
// First, look in the classpath
URL result = classLoader.getResource(name);
if (result == null) {
// Nope, so look to see if our conf dir has been explicitly set
result = seeIfConfAtThisLocation("METASTORE_CONF_DIR", name, false);
}
if (result == null) {
// Nope, so look to see if our home dir has been explicitly set
result = seeIfConfAtThisLocation("METASTORE_HOME", name, true);
}
if (result == null) {
// Nope, so look to see if Hive's conf dir has been explicitly set
result = seeIfConfAtThisLocation("HIVE_CONF_DIR", name, false);
}
if (result == null) {
// Nope, so look to see if Hive's home dir has been explicitly set
result = seeIfConfAtThisLocation("HIVE_HOME", name, true);
}
if (result == null) {
// Nope, so look to see if we can find a conf file by finding our jar, going up one
// directory, and looking for a conf directory.
URI jarUri = null;
try {
jarUri = MetastoreConf.class.getProtectionDomain().getCodeSource().getLocation().toURI();
} catch (Throwable e) {
LOG.warn("Cannot get jar URI", e);
}
if (jarUri != null) {
result = seeIfConfAtThisLocation(new File(jarUri).getParent(), name, true);
}
}
if (result == null) {
LOG.info("Unable to find config file: " + name);
} else {
LOG.info("Found configuration file: " + result);
}
return result;
}
private static URL seeIfConfAtThisLocation(String envVar, String name, boolean inConfDir) {
String path = System.getenv(envVar);
if (path == null) {
// Workaround for testing since tests can't set the env vars.
path = System.getProperty(TEST_ENV_WORKAROUND + envVar);
}
if (path != null) {
String suffix = inConfDir ? "conf" + File.separatorChar + name : name;
return checkConfigFile(new File(path, suffix));
}
return null;
}
private static URL checkConfigFile(File f) {
try {
return (f.exists() && f.isFile()) ? f.toURI().toURL() : null;
} catch (Throwable e) {
LOG.warn("Error looking for config " + f, e);
return null;
}
}
// In all of the getters, we try the metastore value name first. If it is not set we try the
// Hive value name.
/**
* Get the variable as a string
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @return value, or default value if value not in config file
*/
public static String getVar(Configuration conf, ConfVars var) {
assert var.defaultVal.getClass() == String.class;
String val = conf.get(var.varname);
return val == null ? conf.get(var.hiveName, (String)var.defaultVal) : val;
}
/**
* Get the variable as a string
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @param defaultVal default to return if the variable is unset
* @return value, or default value passed in if the value is not in the config file
*/
public static String getVar(Configuration conf, ConfVars var, String defaultVal) {
assert var.defaultVal.getClass() == String.class;
String val = conf.get(var.varname);
return val == null ? conf.get(var.hiveName, defaultVal) : val;
}
/**
* Treat a configuration value as a comma separated list.
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @return collection of strings. If the value is unset it will return an empty collection.
*/
public static Collection<String> getStringCollection(Configuration conf, ConfVars var) {
assert var.defaultVal.getClass() == String.class;
String val = conf.get(var.varname);
if (val == null) {
val = conf.get(var.hiveName, (String)var.defaultVal);
}
if (val == null) {
return Collections.emptySet();
}
return StringUtils.asSet(val.split(","));
}
/**
* Set the variable as a string
* @param conf configuration file to set it in
* @param var variable to set
* @param val value to set it to
*/
public static void setVar(Configuration conf, ConfVars var, String val) {
assert var.defaultVal.getClass() == String.class;
conf.set(var.varname, val);
}
/**
* Get the variable as a int. Note that all integer valued variables are stored as longs, thus
* this downcasts from a long to an in.
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @return value, or default value if value not in config file
*/
public static int getIntVar(Configuration conf, ConfVars var) {
long val = getLongVar(conf, var);
assert val <= Integer.MAX_VALUE;
return (int)val;
}
/**
* Get the variable as a long
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @return value, or default value if value not in config file
*/
public static long getLongVar(Configuration conf, ConfVars var) {
assert var.defaultVal.getClass() == Long.class;
String val = conf.get(var.varname);
return val == null ? conf.getLong(var.hiveName, (Long)var.defaultVal) : Long.parseLong(val);
}
/**
* Set the variable as a long
* @param conf configuration file to set it in
* @param var variable to set
* @param val value to set it to
*/
public static void setLongVar(Configuration conf, ConfVars var, long val) {
assert var.defaultVal.getClass() == Long.class;
conf.setLong(var.varname, val);
}
/**
* Get the variable as a boolean
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @return value, or default value if value not in config file
*/
public static boolean getBoolVar(Configuration conf, ConfVars var) {
assert var.defaultVal.getClass() == Boolean.class;
String val = conf.get(var.varname);
return val == null ? conf.getBoolean(var.hiveName, (Boolean)var.defaultVal) : Boolean.valueOf(val);
}
/**
* Get values from comma-separated config, to an array after extracting individual values.
* @param conf Configuration to retrieve it from
* @param var variable to retrieve
* @return Array of String, containing each value from the comma-separated config,
* or default value if value not in config file
*/
public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) {
assert var.defaultVal.getClass() == String.class;
String[] result = conf.getTrimmedStrings(var.varname, (String[]) null);
if (result != null) {
return result;
}
if (var.hiveName != null) {
result = conf.getTrimmedStrings(var.hiveName, (String[]) null);
if (result != null) {
return result;
}
}
return org.apache.hadoop.util.StringUtils.getTrimmedStrings((String) var.getDefaultVal());
}
/**
* Set the variable as a boolean
* @param conf configuration file to set it in
* @param var variable to set
* @param val value to set it to
*/
public static void setBoolVar(Configuration conf, ConfVars var, boolean val) {
assert var.defaultVal.getClass() == Boolean.class;
conf.setBoolean(var.varname, val);
}
/**
* Get the variable as a double
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @return value, or default value if value not in config file
*/
public static double getDoubleVar(Configuration conf, ConfVars var) {
assert var.defaultVal.getClass() == Double.class;
String val = conf.get(var.varname);
return val == null ? conf.getDouble(var.hiveName, (Double)var.defaultVal) : Double.valueOf(val);
}
/**
* Set the variable as a double
* @param conf configuration file to set it in
* @param var variable to set
* @param val value to set it to
*/
public static void setDoubleVar(Configuration conf, ConfVars var, double val) {
assert var.defaultVal.getClass() == Double.class;
conf.setDouble(var.varname, val);
}
public static long getSizeVar(Configuration conf, ConfVars var) {
return SizeValidator.toSizeBytes(getVar(conf, var));
}
/**
* Get a class instance based on a configuration value
* @param conf configuration file to retrieve it from
* @param var variable to retrieve
* @param defaultValue default class to return if the value isn't set
* @param xface interface that class must implement
* @param <I> interface that class implements
* @return instance of the class
*/
public static <I> Class<? extends I> getClass(Configuration conf, ConfVars var,
Class<? extends I> defaultValue,
Class<I> xface) {
assert var.defaultVal.getClass() == String.class;
String val = conf.get(var.varname);
return val == null ? conf.getClass(var.hiveName, defaultValue, xface) :
conf.getClass(var.varname, defaultValue, xface);
}
/**
* Set the class name in the configuration file
* @param conf configuration file to set it in
* @param var variable to set
* @param theClass the class to set it to
* @param xface interface that the class implements. I don't know why this is required, but
* the underlying {@link Configuration#setClass(String, Class, Class)} requires it.
* @param <I> interface the class implements.
*/
public static <I> void setClass(Configuration conf, ConfVars var, Class<? extends I> theClass,
Class<I> xface) {
assert var.defaultVal.getClass() == String.class;
conf.setClass(var.varname, theClass, xface);
}
/**
* Get the variable as a long indicating a period of time
* @param conf configuration to retrieve it from
* @param var variable to retrieve
* @param outUnit Timeout to return value in
* @return value, or default value if value not in config file
*/
public static long getTimeVar(Configuration conf, ConfVars var, TimeUnit outUnit) {
assert var.defaultVal.getClass() == TimeValue.class;
String val = conf.get(var.varname);
if (val == null) {
// Look for it under the old Hive name
val = conf.get(var.hiveName);
}
if (val != null) {
return convertTimeStr(val, ((TimeValue)var.defaultVal).unit, outUnit);
} else {
return outUnit.convert(((TimeValue)var.defaultVal).val, ((TimeValue)var.defaultVal).unit);
}
}
/**
* Set the variable as a string
* @param conf configuration file to set it in
* @param var variable to set
* @param duration value to set it to
* @param unit time unit that duration is expressed in
*/
public static void setTimeVar(Configuration conf, ConfVars var, long duration, TimeUnit unit) {
assert var.defaultVal.getClass() == TimeValue.class;
conf.setTimeDuration(var.varname, duration, unit);
}
public static long convertTimeStr(String val, TimeUnit defaultUnit, TimeUnit outUnit) {
if (val.charAt(val.length() - 1) >= 'A') {
// It ends in a character, this means they appended a time indicator (e.g. 600s)
Matcher m = TIME_UNIT_SUFFIX.matcher(val);
if (m.matches()) {
long duration = Long.parseLong(m.group(1));
String unit = m.group(2).toLowerCase();
// If/else chain arranged in likely order of frequency for performance
if (unit.equals("s") || unit.startsWith("sec")) {
return outUnit.convert(duration, TimeUnit.SECONDS);
} else if (unit.equals("ms") || unit.startsWith("msec")) {
return outUnit.convert(duration, TimeUnit.MILLISECONDS);
} else if (unit.equals("m") || unit.startsWith("min")) {
return outUnit.convert(duration, TimeUnit.MINUTES);
} else if (unit.equals("us") || unit.startsWith("usec")) {
return outUnit.convert(duration, TimeUnit.MICROSECONDS);
} else if (unit.equals("ns") || unit.startsWith("nsec")) {
return outUnit.convert(duration, TimeUnit.NANOSECONDS);
} else if (unit.equals("h") || unit.startsWith("hour")) {
return outUnit.convert(duration, TimeUnit.HOURS);
} else if (unit.equals("d") || unit.startsWith("day")) {
return outUnit.convert(duration, TimeUnit.DAYS);
} else {
throw new IllegalArgumentException("Invalid time unit " + unit);
}
} else {
throw new IllegalArgumentException("Invalid time unit " + val);
}
}
// If they gave a value but not a time unit assume the default time unit.
return outUnit.convert(Long.parseLong(val), defaultUnit);
}
static String timeAbbreviationFor(TimeUnit timeunit) {
switch (timeunit) {
case DAYS: return "d";
case HOURS: return "h";
case MINUTES: return "m";
case SECONDS: return "s";
case MILLISECONDS: return "ms";
case MICROSECONDS: return "us";
case NANOSECONDS: return "ns";
}
throw new IllegalArgumentException("Invalid timeunit " + timeunit);
}
/**
* Get a password from the configuration file. This uses Hadoop's
* {@link Configuration#getPassword(String)} to handle getting secure passwords.
* @param conf configuration file to read from
* @param var configuration value to read
* @return the password as a string, or the default value.
* @throws IOException if thrown by Configuration.getPassword
*/
public static String getPassword(Configuration conf, ConfVars var) throws IOException {
assert var.defaultVal.getClass() == String.class;
char[] pw = conf.getPassword(var.varname);
if (pw == null) {
// Might be under the hive name
pw = conf.getPassword(var.hiveName);
}
return pw == null ? var.defaultVal.toString() : new String(pw);
}
/**
* Get the configuration value based on a string rather than a ConfVar. This will do the
* mapping between metastore keys and Hive keys. That is, if there's a ConfVar with a
* metastore key of "metastore.a" and a hive key of "hive.a", the value for that variable will
* be returned if either of those keys is present in the config. If neither are present than
* the default value will be returned.
* @param conf configuration to read.
* @param key metastore or hive key to read.
* @return the value set
*/
public static String get(Configuration conf, String key) {
ConfVars var = keyToVars.get(key);
if (var == null) {
// Ok, this isn't one we track. Just return whatever matches the string
return conf.get(key);
}
// Check if the metastore key is set first
String val = conf.get(var.varname);
return val == null ? conf.get(var.hiveName, var.defaultVal.toString()) : val;
}
public static boolean isPrintable(String key) {
return !unprintables.contains(key);
}
/**
* Return the configuration value as a String. For time based values it will be returned in
* the default time unit appended with an appropriate abbreviation (eg s for seconds, ...)
* @param conf configuration to read
* @param var variable to read
* @return value as a String
*/
public static String getAsString(Configuration conf, ConfVars var) {
if (var.defaultVal.getClass() == String.class) {
return getVar(conf, var);
} else if (var.defaultVal.getClass() == Boolean.class) {
return Boolean.toString(getBoolVar(conf, var));
} else if (var.defaultVal.getClass() == Long.class) {
return Long.toString(getLongVar(conf, var));
} else if (var.defaultVal.getClass() == Double.class) {
return Double.toString(getDoubleVar(conf, var));
} else if (var.defaultVal.getClass() == TimeValue.class) {
TimeUnit timeUnit = ((TimeValue)var.defaultVal).unit;
return getTimeVar(conf, var, timeUnit) + timeAbbreviationFor(timeUnit);
} else {
throw new RuntimeException("Unknown type for getObject " + var.defaultVal.getClass().getName());
}
}
public static URL getHiveDefaultLocation() {
return hiveDefaultURL;
}
public static URL getHiveSiteLocation() {
return hiveSiteURL;
}
public static URL getHiveMetastoreSiteURL() {
return hiveMetastoreSiteURL;
}
public static URL getMetastoreSiteURL() {
return metastoreSiteURL;
}
public List<URL> getResourceFileLocations() {
return Arrays.asList(hiveSiteURL, hiveMetastoreSiteURL, metastoreSiteURL);
}
/**
* Check if metastore is being used in embedded mode.
* This utility function exists so that the logic for determining the mode is same
* in HiveConf and HiveMetaStoreClient
* @param msUri - metastore server uri
* @return true if the metastore is embedded
*/
public static boolean isEmbeddedMetaStore(String msUri) {
return (msUri == null) || msUri.trim().isEmpty();
}
public static ZooKeeperHiveHelper getZKConfig(Configuration conf) {
String keyStorePassword = "";
String trustStorePassword = "";
if (MetastoreConf.getBoolVar(conf, ConfVars.THRIFT_ZOOKEEPER_SSL_ENABLE)) {
try {
keyStorePassword = MetastoreConf.getPassword(conf, ConfVars.THRIFT_ZOOKEEPER_SSL_KEYSTORE_PASSWORD);
trustStorePassword = MetastoreConf.getPassword(conf, ConfVars.THRIFT_ZOOKEEPER_SSL_TRUSTSTORE_PASSWORD);
} catch (IOException e) {
throw new RuntimeException("Failed to read zookeeper configuration passwords", e);
}
}
return ZooKeeperHiveHelper.builder()
.quorum(MetastoreConf.getVar(conf, ConfVars.THRIFT_URIS))
.clientPort(MetastoreConf.getVar(conf, ConfVars.THRIFT_ZOOKEEPER_CLIENT_PORT))
.serverRegistryNameSpace(MetastoreConf.getVar(conf, ConfVars.THRIFT_ZOOKEEPER_NAMESPACE))
.connectionTimeout((int) getTimeVar(conf, ConfVars.THRIFT_ZOOKEEPER_CONNECTION_TIMEOUT,
TimeUnit.MILLISECONDS))
.sessionTimeout((int) MetastoreConf.getTimeVar(conf, ConfVars.THRIFT_ZOOKEEPER_SESSION_TIMEOUT,
TimeUnit.MILLISECONDS))
.baseSleepTime((int) MetastoreConf.getTimeVar(conf, ConfVars.THRIFT_ZOOKEEPER_CONNECTION_BASESLEEPTIME,
TimeUnit.MILLISECONDS))
.maxRetries(MetastoreConf.getIntVar(conf, ConfVars.THRIFT_ZOOKEEPER_CONNECTION_MAX_RETRIES))
.sslEnabled(MetastoreConf.getBoolVar(conf, ConfVars.THRIFT_ZOOKEEPER_SSL_ENABLE))
.keyStoreLocation(MetastoreConf.getVar(conf, ConfVars.THRIFT_ZOOKEEPER_SSL_KEYSTORE_LOCATION))
.keyStorePassword(keyStorePassword)
.trustStoreLocation(MetastoreConf.getVar(conf, ConfVars.THRIFT_ZOOKEEPER_SSL_TRUSTSTORE_LOCATION))
.trustStorePassword(trustStorePassword).build();
}
/**
* Dump the configuration file to the log. It will be dumped at an INFO level. This can
* potentially produce a lot of logs, so you might want to be careful when and where you do it.
* It takes care not to dump hidden keys.
* @param conf Configuration file to dump
* @return String containing dumped config file.
*/
static String dumpConfig(Configuration conf) {
StringBuilder buf = new StringBuilder("MetastoreConf object:\n");
if (hiveSiteURL != null) {
buf.append("Used hive-site file: ")
.append(hiveSiteURL)
.append('\n');
}
if (hiveMetastoreSiteURL != null) {
buf.append("Used hivemetastore-site file: ")
.append(hiveMetastoreSiteURL)
.append('\n');
}
if (metastoreSiteURL != null) {
buf.append("Used metastore-site file: ")
.append(metastoreSiteURL)
.append('\n');
}
for (ConfVars var : ConfVars.values()) {
if (!unprintables.contains(var.varname)) {
buf.append("Key: <")
.append(var.varname)
.append("> old hive key: <")
.append(var.hiveName)
.append("> value: <")
.append(getAsString(conf, var))
.append(">\n");
}
}
buf.append("Finished MetastoreConf object.\n");
return buf.toString();
}
public static char[] getValueFromKeystore(String keystorePath, String key) throws IOException {
char[] valueCharArray = null;
if (keystorePath != null && key != null) {
Configuration conf = new Configuration();
conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, keystorePath);
valueCharArray = conf.getPassword(key);
}
return valueCharArray;
}
}