blob: ccfcd74c279ac79fb3fc0c6214778ea04c03e980 [file] [log] [blame]
<?xml version="1.0"?>
<!-- ! Copyright 2009-2013 by The Regents of the University of California
! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
not use this file except in compliance with the License. ! you may obtain
a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
! ! Unless required by applicable law or agreed to in writing, software !
distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
License for the specific language governing permissions and ! limitations
under the License. ! -->
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<configuration>
<!-- Hivesterix Execution Parameters -->
<property>
<name>hive.hyracks.connectorpolicy</name>
<value>PIPELINING</value>
</property>
<property>
<name>hive.hyracks.parrallelism</name>
<value>4</value>
</property>
<property>
<name>hive.algebricks.groupby.external</name>
<value>true</value>
</property>
<property>
<name>hive.algebricks.groupby.external.memory</name>
<value>33554432</value>
</property>
<property>
<name>hive.algebricks.sort.memory</name>
<value>33554432</value>
</property>
<property>
<name>hive.algebricks.framesize</name>
<value>32768</value>
</property>
<!-- Hive Execution Parameters -->
<property>
<name>mapred.reduce.tasks</name>
<value>-1</value>
<description>The default number of reduce tasks per job. Typically
set
to a prime close to the number of available hosts. Ignored when
mapred.job.tracker is "local". Hadoop set this to 1 by default,
whereas hive uses -1 as its default value.
By setting this property
to -1, Hive will automatically figure out
what should be the number
of reducers.
</description>
</property>
<property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>1000000000</value>
<description>size per reducer.The default is 1G, i.e if the input
size is 10G, it will use 10 reducers.
</description>
</property>
<property>
<name>hive.exec.reducers.max</name>
<value>999</value>
<description>max number of reducers will be used. If the one
specified in the configuration parameter mapred.reduce.tasks is
negative, hive will use this one as the max number of reducers when
automatically determine number of reducers.
</description>
</property>
<property>
<name>hive.cli.print.header</name>
<value>false</value>
<description>Whether to print the names of the columns in query
output.
</description>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>false</value>
<description>Whether to include the current database in the hive
prompt.
</description>
</property>
<property>
<name>hive.cli.prompt</name>
<value>hive</value>
<description>Command line prompt configuration value. Other hiveconf
can be used in
this configuration value. Variable substitution will
only be invoked at
the hive
cli startup.
</description>
</property>
<property>
<name>hive.cli.pretty.output.num.cols</name>
<value>-1</value>
<description>The number of columns to use when formatting output
generated
by the DESCRIBE PRETTY table_name command. If the value of
this
property
is -1, then hive will use the auto-detected terminal
width.
</description>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive-${user.name}</value>
<description>Scratch space for Hive jobs</description>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>/tmp/${user.name}</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.test.mode</name>
<value>false</value>
<description>whether hive is running in test mode. If yes, it turns
on sampling and prefixes the output tablename
</description>
</property>
<property>
<name>hive.test.mode.prefix</name>
<value>test_</value>
<description>if hive is running in test mode, prefixes the output
table by this string
</description>
</property>
<!-- If the input table is not bucketed, the denominator of the tablesample
is determinied by the parameter below -->
<!-- For example, the following query: -->
<!-- INSERT OVERWRITE TABLE dest -->
<!-- SELECT col1 from src -->
<!-- would be converted to -->
<!-- INSERT OVERWRITE TABLE test_dest -->
<!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
<property>
<name>hive.test.mode.samplefreq</name>
<value>32</value>
<description>if hive is running in test mode and table is not
bucketed, sampling frequency
</description>
</property>
<property>
<name>hive.test.mode.nosamplelist</name>
<value></value>
<description>if hive is running in test mode, dont sample the above
comma seperated list of tables
</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value></value>
<description>Thrift uri for the remote metastore. Used by metastore
client to connect to remote metastore.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=metastore_db;create=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.PersistenceManagerFactoryClass</name>
<value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
<description>class implementing the jdo persistence</description>
</property>
<property>
<name>javax.jdo.option.DetachAllOnCommit</name>
<value>true</value>
<description>detaches all objects from session so that they can be
used after transaction is committed
</description>
</property>
<property>
<name>javax.jdo.option.NonTransactionalRead</name>
<value>true</value>
<description>reads outside of transactions</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>APP</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>mine</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.Multithreaded</name>
<value>true</value>
<description>Set this to true if multiple threads access metastore
through JDO concurrently.
</description>
</property>
<property>
<name>datanucleus.connectionPoolingType</name>
<value>DBCP</value>
<description>Uses a DBCP connection pool for JDBC metastore
</description>
</property>
<property>
<name>datanucleus.validateTables</name>
<value>false</value>
<description>validates existing schema against code. turn this on if
you want to verify existing schema
</description>
</property>
<property>
<name>datanucleus.validateColumns</name>
<value>false</value>
<description>validates existing schema against code. turn this on if
you want to verify existing schema
</description>
</property>
<property>
<name>datanucleus.validateConstraints</name>
<value>false</value>
<description>validates existing schema against code. turn this on if
you want to verify existing schema
</description>
</property>
<property>
<name>datanucleus.storeManagerType</name>
<value>rdbms</value>
<description>metadata store type</description>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
<description>creates necessary schema on a startup if one doesn't
exist. set this to false, after creating it once
</description>
</property>
<property>
<name>datanucleus.autoStartMechanismMode</name>
<value>checked</value>
<description>throw exception if metadata tables are incorrect
</description>
</property>
<property>
<name>datanucleus.transactionIsolation</name>
<value>read-committed</value>
<description>Default transaction isolation level for identity
generation.
</description>
</property>
<property>
<name>datanucleus.cache.level2</name>
<value>false</value>
<description>Use a level 2 cache. Turn this off if metadata is
changed independently of hive metastore server
</description>
</property>
<property>
<name>datanucleus.cache.level2.type</name>
<value>SOFT</value>
<description>SOFT=soft reference based cache, WEAK=weak reference
based cache.
</description>
</property>
<property>
<name>datanucleus.identifierFactory</name>
<value>datanucleus</value>
<description>Name of the identifier factory to use when generating
table/column names etc. 'datanucleus' is used for backward
compatibility
</description>
</property>
<property>
<name>datanucleus.plugin.pluginRegistryBundleCheck</name>
<value>LOG</value>
<description>Defines what happens when plugin bundles are found and
are duplicated [EXCEPTION|LOG|NONE]
</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse
</description>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>false</value>
<description>In unsecure mode, setting this property to true will
cause the metastore to execute DFS operations using the client's
reported user and group permissions. Note that this property must be
set on both the client and server sides. Further note that its best
effort. If client sets its to true and server sets it to false,
client setting will be ignored.
</description>
</property>
<property>
<name>hive.metastore.event.listeners</name>
<value></value>
<description>list of comma seperated listeners for metastore events.
</description>
</property>
<property>
<name>hive.metastore.partition.inherit.table.properties</name>
<value></value>
<description>list of comma seperated keys occurring in table
properties which will get inherited to newly created partitions. *
implies all the keys will get inherited.
</description>
</property>
<property>
<name>hive.metadata.export.location</name>
<value></value>
<description>When used in conjunction with the
org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
listener, it is the location to which the metadata will be exported.
The default is an empty string, which results in the metadata being
exported to the current user's home directory on HDFS.
</description>
</property>
<property>
<name>hive.metadata.move.exported.metadata.to.trash</name>
<value></value>
<description>When used in conjunction with the
org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
listener, this setting determines if the metadata that is exported
will subsequently be moved to the user's trash directory alongside
the dropped table data. This ensures that the metadata will be
cleaned up along with the dropped table data.
</description>
</property>
<property>
<name>hive.metastore.partition.name.whitelist.pattern</name>
<value></value>
<description>Partition names will be checked against this regex
pattern and rejected if not matched.
</description>
</property>
<property>
<name>hive.metastore.end.function.listeners</name>
<value></value>
<description>list of comma separated listeners for the end of
metastore functions.
</description>
</property>
<property>
<name>hive.metastore.event.expiry.duration</name>
<value>0</value>
<description>Duration after which events expire from events table (in
seconds)
</description>
</property>
<property>
<name>hive.metastore.event.clean.freq</name>
<value>0</value>
<description>Frequency at which timer task runs to purge expired
events in metastore(in seconds).
</description>
</property>
<property>
<name>hive.metastore.connect.retries</name>
<value>5</value>
<description>Number of retries while opening a connection to
metastore
</description>
</property>
<property>
<name>hive.metastore.failure.retries</name>
<value>3</value>
<description>Number of retries upon failure of Thrift metastore calls
</description>
</property>
<property>
<name>hive.metastore.client.connect.retry.delay</name>
<value>1</value>
<description>Number of seconds for the client to wait between
consecutive connection attempts
</description>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>20</value>
<description>MetaStore Client socket timeout in seconds</description>
</property>
<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
<description>Name of the class that implements
org.apache.hadoop.hive.metastore.rawstore interface. This class is
used to store and retrieval of raw metadata objects such as table,
database
</description>
</property>
<property>
<name>hive.metastore.batch.retrieve.max</name>
<value>300</value>
<description>Maximum number of objects (tables/partitions) can be
retrieved from metastore in one batch. The higher the number, the
less the number of round trips is needed to the Hive metastore
server, but it may also cause higher memory requirement at the
client side.
</description>
</property>
<property>
<name>hive.metastore.batch.retrieve.table.partition.max</name>
<value>1000</value>
<description>Maximum number of table partitions that metastore
internally retrieves in one batch.
</description>
</property>
<property>
<name>hive.default.fileformat</name>
<value>TextFile</value>
<description>Default file format for CREATE TABLE statement. Options
are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override
</description>
</property>
<property>
<name>hive.fileformat.check</name>
<value>true</value>
<description>Whether to check file format or not when loading data
files
</description>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
<description>Whether to use map-side aggregation in Hive Group By
queries
</description>
</property>
<property>
<name>hive.groupby.skewindata</name>
<value>false</value>
<description>Whether there is skew in data to optimize group by
queries
</description>
</property>
<property>
<name>hive.optimize.multigroupby.common.distincts</name>
<value>true</value>
<description>Whether to optimize a multi-groupby query with the same
distinct.
Consider a query like:
from src
insert overwrite table dest1
select col1, count(distinct colx) group by
col1
insert overwrite table
dest2 select col2, count(distinct colx) group by
col2;
With this
parameter set to true, first we spray by the distinct value
(colx),
and then
perform the 2 groups bys. This makes sense if map-side
aggregation is
turned off. However,
with maps-side aggregation, it
might be useful in some cases to treat
the 2 inserts independently,
thereby performing the query above in 2MR jobs instead of 3 (due to
spraying by distinct key first).
If this parameter is turned off, we
dont consider the fact that the
distinct key is the same across
different MR jobs.
</description>
</property>
<property>
<name>hive.groupby.mapaggr.checkinterval</name>
<value>100000</value>
<description>Number of rows after which size of the grouping
keys/aggregation classes is performed
</description>
</property>
<property>
<name>hive.mapred.local.mem</name>
<value>0</value>
<description>For local mode, memory of the mappers/reducers
</description>
</property>
<property>
<name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
<value>0.3</value>
<description>Portion of total memory to be used by map-side grup
aggregation hash table, when this group by is followed by map join
</description>
</property>
<property>
<name>hive.map.aggr.hash.force.flush.memory.threshold</name>
<value>0.9</value>
<description>The max memory to be used by map-side grup aggregation
hash table, if the memory usage is higher than this number, force to
flush data
</description>
</property>
<property>
<name>hive.map.aggr.hash.percentmemory</name>
<value>0.5</value>
<description>Portion of total memory to be used by map-side grup
aggregation hash table
</description>
</property>
<property>
<name>hive.map.aggr.hash.min.reduction</name>
<value>0.5</value>
<description>Hash aggregation will be turned off if the ratio between
hash
table size and input rows is bigger than this number. Set to 1
to make
sure
hash aggregation is never turned off.
</description>
</property>
<property>
<name>hive.optimize.cp</name>
<value>true</value>
<description>Whether to enable column pruner</description>
</property>
<property>
<name>hive.optimize.index.filter</name>
<value>false</value>
<description>Whether to enable automatic use of indexes</description>
</property>
<property>
<name>hive.optimize.index.groupby</name>
<value>false</value>
<description>Whether to enable optimization of group-by queries using
Aggregate indexes.
</description>
</property>
<property>
<name>hive.optimize.ppd</name>
<value>true</value>
<description>Whether to enable predicate pushdown</description>
</property>
<property>
<name>hive.optimize.ppd.storage</name>
<value>true</value>
<description>Whether to push predicates down into storage handlers.
Ignored when hive.optimize.ppd is false.
</description>
</property>
<property>
<name>hive.ppd.recognizetransivity</name>
<value>true</value>
<description>Whether to transitively replicate predicate filters over
equijoin conditions.
</description>
</property>
<property>
<name>hive.optimize.groupby</name>
<value>true</value>
<description>Whether to enable the bucketed group by from bucketed
partitions/tables.
</description>
</property>
<property>
<name>hive.optimize.skewjoin.compiletime</name>
<value>false</value>
<description>Whether to create a separate plan for skewed keys for
the tables in the join.
This is based on the skewed keys stored in
the metadata. At compile
time, the plan is broken
into different
joins: one for the skewed keys, and the other for the
remaining keys.
And then,
a union is performed for the 2 joins generated above. So
unless the
same skewed key is present
in both the joined tables, the
join for the skewed key will be
performed as a map-side join.
The main
difference between this paramater and hive.optimize.skewjoin
is that
this parameter
uses the skew information stored in the metastore to
optimize the plan
at compile time itself.
If there is no skew
information in the metadata, this parameter will
not have any affect.
Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
should be set to true.
Ideally, hive.optimize.skewjoin should be
renamed as
hive.optimize.skewjoin.runtime, but not doing
so for
backward compatibility.
If the skew information is correctly stored
in the metadata,
hive.optimize.skewjoin.compiletime
would change the
query plan to take care of it, and
hive.optimize.skewjoin will be a
no-op.
</description>
</property>
<property>
<name>hive.optimize.union.remove</name>
<value>false</value>
<description>
Whether to remove the union and push the operators
between union and the
filesink above
union. This avoids an extra scan
of the output by union. This is
independently useful for union
queries, and specially useful when
hive.optimize.skewjoin.compiletime is set
to true, since an
extra
union is inserted.
The merge is triggered if either of
hive.merge.mapfiles or
hive.merge.mapredfiles is set to true.
If the
user has set hive.merge.mapfiles to true and
hive.merge.mapredfiles
to false, the idea was the
number of reducers are few, so the number
of files anyway are small.
However, with this optimization,
we are
increasing the number of files possibly by a big margin. So, we
merge aggresively.
</description>
</property>
<property>
<name>hive.mapred.supports.subdirectories</name>
<value>false</value>
<description>Whether the version of hadoop which is running supports
sub-directories for tables/partitions.
Many hive optimizations can be
applied if the hadoop version supports
sub-directories for
tables/partitions. It was added by MAPREDUCE-1501
</description>
</property>
<property>
<name>hive.multigroupby.singlemr</name>
<value>true</value>
<description>Whether to optimize multi group by query to generate
single M/R
job plan. If the multi group by query has common group by
keys, it will
be
optimized to generate single M/R job.
</description>
</property>
<property>
<name>hive.map.groupby.sorted</name>
<value>false</value>
<description>If the bucketing/sorting properties of the table exactly
match the grouping key, whether to
perform the group by in the mapper
by using BucketizedHiveInputFormat. The
only downside to this
is that
it limits the number of mappers to the number of files.
</description>
</property>
<property>
<name>hive.map.groupby.sorted.testmode</name>
<value>false</value>
<description>If the bucketing/sorting properties of the table exactly
match the grouping key, whether to
perform the group by in the mapper
by using BucketizedHiveInputFormat. If
the test mode is set, the plan
is not converted, but a query property is set to denote the same.
</description>
</property>
<property>
<name>hive.new.job.grouping.set.cardinality</name>
<value>30</value>
<description>
Whether a new map-reduce job should be launched for
grouping
sets/rollups/cubes.
For a query like: select a, b, c,
count(1) from T group by a, b, c with
rollup;
4 rows are created per
row: (a, b, c), (a, b, null), (a, null, null),
(null, null, null).
This can lead to explosion across map-reduce boundary if the
cardinality
of T is very high,
and map-side aggregation does not do a
very good job.
This parameter decides if hive should add an
additional map-reduce job.
If the grouping set
cardinality (4 in the
example above), is more than this value, a new MR job is
added under
the
assumption that the orginal group by will reduce the data size.
</description>
</property>
<property>
<name>hive.join.emit.interval</name>
<value>1000</value>
<description>How many rows in the right-most join operand Hive should
buffer before emitting the join result.
</description>
</property>
<property>
<name>hive.join.cache.size</name>
<value>25000</value>
<description>How many rows in the joining tables (except the
streaming table) should be cached in memory.
</description>
</property>
<property>
<name>hive.mapjoin.bucket.cache.size</name>
<value>100</value>
<description>How many values in each keys in the map-joined table
should be cached in memory.
</description>
</property>
<property>
<name>hive.mapjoin.cache.numrows</name>
<value>25000</value>
<description>How many rows should be cached by jdbm for map join.
</description>
</property>
<property>
<name>hive.optimize.skewjoin</name>
<value>false</value>
<description>Whether to enable skew join optimization.
The algorithm
is as follows: At runtime, detect the keys with a large
skew. Instead
of
processing those keys, store them temporarily in a hdfs directory.
In a
follow-up map-reduce
job, process those skewed keys. The same key
need not be skewed for all
the tables, and so,
the follow-up
map-reduce job (for the skewed keys) would be much
faster, since it
would be a
map-join.
</description>
</property>
<property>
<name>hive.skewjoin.key</name>
<value>100000</value>
<description>Determine if we get a skew key in join. If we see more
than the specified number of rows with the same key in join
operator,
we think the key as a skew join key.
</description>
</property>
<property>
<name>hive.skewjoin.mapjoin.map.tasks</name>
<value>10000</value>
<description> Determine the number of map task used in the follow up
map join job
for a skew join. It should be used together with
hive.skewjoin.mapjoin.min.split
to perform a fine grained control.
</description>
</property>
<property>
<name>hive.skewjoin.mapjoin.min.split</name>
<value>33554432</value>
<description> Determine the number of map task at most used in the
follow up map join job
for a skew join by specifying the minimum
split size. It should be used
together with
hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.
</description>
</property>
<property>
<name>hive.mapred.mode</name>
<value>nonstrict</value>
<description>The mode in which the hive operations are being
performed.
In strict mode, some risky queries are not allowed to run.
They
include:
Cartesian Product.
No partition being picked up for a
query.
Comparing bigints and strings.
Comparing bigints and doubles.
Orderby without limit.
</description>
</property>
<property>
<name>hive.enforce.bucketmapjoin</name>
<value>false</value>
<description>If the user asked for bucketed map-side join, and it
cannot be performed,
should the query fail or not ? For eg, if the
buckets in the tables being
joined are
not a multiple of each other,
bucketed map-side join cannot be
performed, and the
query will fail if
hive.enforce.bucketmapjoin is set to true.
</description>
</property>
<property>
<name>hive.exec.script.maxerrsize</name>
<value>100000</value>
<description>Maximum number of bytes a script is allowed to emit to
standard error (per map-reduce task). This prevents runaway scripts
from filling logs partitions to capacity
</description>
</property>
<property>
<name>hive.exec.script.allow.partial.consumption</name>
<value>false</value>
<description> When enabled, this option allows a user script to exit
successfully without consuming all the data from the standard input.
</description>
</property>
<property>
<name>hive.script.operator.id.env.var</name>
<value>HIVE_SCRIPT_OPERATOR_ID</value>
<description> Name of the environment variable that holds the unique
script operator ID in the user's transform function (the custom
mapper/reducer that the user has specified in the query)
</description>
</property>
<property>
<name>hive.script.operator.truncate.env</name>
<value>false</value>
<description>Truncate each environment variable for external script
in scripts operator to 20KB (to fit system limits)
</description>
</property>
<property>
<name>hive.exec.compress.output</name>
<value>false</value>
<description> This controls whether the final outputs of a query (to
a local/hdfs file or a hive table) is compressed. The compression
codec and other options are determined from hadoop config variables
mapred.output.compress*
</description>
</property>
<property>
<name>hive.exec.compress.intermediate</name>
<value>false</value>
<description> This controls whether intermediate files produced by
hive between multiple map-reduce jobs are compressed. The
compression codec and other options are determined from hadoop
config variables mapred.output.compress*
</description>
</property>
<property>
<name>hive.exec.parallel</name>
<value>false</value>
<description>Whether to execute jobs in parallel</description>
</property>
<property>
<name>hive.exec.parallel.thread.number</name>
<value>8</value>
<description>How many jobs at most can be executed in parallel
</description>
</property>
<property>
<name>hive.exec.rowoffset</name>
<value>false</value>
<description>Whether to provide the row offset virtual column
</description>
</property>
<property>
<name>hive.task.progress</name>
<value>false</value>
<description>Whether Hive should periodically update task progress
counters during execution. Enabling this allows task progress to be
monitored more closely in the job tracker, but may impose a
performance penalty. This flag is automatically set to true for jobs
with hive.exec.dynamic.partition set to true.
</description>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi-@VERSION@.war</value>
<description>This sets the path to the HWI war file, relative to
${HIVE_HOME}.
</description>
</property>
<property>
<name>hive.hwi.listen.host</name>
<value>0.0.0.0</value>
<description>This is the host address the Hive Web Interface will
listen on
</description>
</property>
<property>
<name>hive.hwi.listen.port</name>
<value>9999</value>
<description>This is the port the Hive Web Interface will listen on
</description>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value></value>
<description>Comma-separated list of pre-execution hooks to be
invoked for each statement. A pre-execution hook is specified as the
name of a Java class which implements the
org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
</description>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value></value>
<description>Comma-separated list of post-execution hooks to be
invoked for each statement. A post-execution hook is specified as
the name of a Java class which implements the
org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
</description>
</property>
<property>
<name>hive.exec.failure.hooks</name>
<value></value>
<description>Comma-separated list of on-failure hooks to be invoked
for each statement. An on-failure hook is specified as the name of
Java class which implements the
org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
</description>
</property>
<property>
<name>hive.metastore.init.hooks</name>
<value></value>
<description>A comma separated list of hooks to be invoked at the
beginning of HMSHandler initialization. Aninit hook is specified as
the name of Java class which extends
org.apache.hadoop.hive.metastore.MetaStoreInitListener.
</description>
</property>
<property>
<name>hive.client.stats.publishers</name>
<value></value>
<description>Comma-separated list of statistics publishers to be
invoked on counters on each job. A client stats publisher is
specified as the name of a Java class which implements the
org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
</description>
</property>
<property>
<name>hive.client.stats.counters</name>
<value></value>
<description>Subset of counters that should be of interest for
hive.client.stats.publishers (when one wants to limit their
publishing). Non-display names should be used
</description>
</property>
<property>
<name>hive.merge.mapfiles</name>
<value>true</value>
<description>Merge small files at the end of a map-only job
</description>
</property>
<property>
<name>hive.merge.mapredfiles</name>
<value>false</value>
<description>Merge small files at the end of a map-reduce job
</description>
</property>
<property>
<name>hive.heartbeat.interval</name>
<value>1000</value>
<description>Send a heartbeat after this interval - used by mapjoin
and filter operators
</description>
</property>
<property>
<name>hive.merge.size.per.task</name>
<value>256000000</value>
<description>Size of merged files at the end of the job</description>
</property>
<property>
<name>hive.merge.smallfiles.avgsize</name>
<value>16000000</value>
<description>When the average output file size of a job is less than
this number, Hive will start an additional map-reduce job to merge
the output files into bigger files. This is only done for map-only
jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
hive.merge.mapredfiles is true.
</description>
</property>
<property>
<name>hive.mapjoin.smalltable.filesize</name>
<value>25000000</value>
<description>The threshold for the input file size of the small
tables; if the file size is smaller than this threshold, it will try
to convert the common join into map join
</description>
</property>
<property>
<name>hive.ignore.mapjoin.hint</name>
<value>true</value>
<description>Ignore the mapjoin hint</description>
</property>
<property>
<name>hive.mapjoin.localtask.max.memory.usage</name>
<value>0.90</value>
<description>This number means how much memory the local task can
take to hold the key/value into in-memory hash table; If the local
task's memory usage is more than this number, the local task will be
abort by themself. It means the data of small table is too large to
be hold in the memory.
</description>
</property>
<property>
<name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
<value>0.55</value>
<description>This number means how much memory the local task can
take to hold the key/value into in-memory hash table when this map
join followed by a group by; If the local task's memory usage is
more than this number, the local task will be abort by themself. It
means the data of small table is too large to be hold in the memory.
</description>
</property>
<property>
<name>hive.mapjoin.check.memory.rows</name>
<value>100000</value>
<description>The number means after how many rows processed it needs
to check the memory usage
</description>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>false</value>
<description>Whether Hive enable the optimization about converting
common join into mapjoin based on the input file size
</description>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
<description>Whether Hive enable the optimization about converting
common join into mapjoin based on the input file
size. If this
paramater is on, and the sum of size for n-1 of the
tables/partitions for a n-way join is smaller than the
specified
size, the join is directly converted to a mapjoin (there is no
conditional task).
</description>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>10000000</value>
<description>If hive.auto.convert.join.noconditionaltask is off, this
parameter does not take affect. However, if it
is on, and the sum of
size for n-1 of the tables/partitions for a
n-way join is smaller
than this size, the join is directly
converted to a mapjoin(there is
no conditional task). The default is 10MB
</description>
</property>
<property>
<name>hive.optimize.mapjoin.mapreduce</name>
<value>false</value>
<description>If hive.auto.convert.join is off, this parameter does
not take
affect. If it is on, and if there are map-join jobs followed
by a
map-reduce
job (for e.g a group by), each map-only job is merged
with the
following
map-reduce job.
</description>
</property>
<property>
<name>hive.script.auto.progress</name>
<value>false</value>
<description>Whether Hive Tranform/Map/Reduce Clause should
automatically send progress information to TaskTracker to avoid the
task getting killed because of inactivity. Hive sends progress
information when the script is outputting to stderr. This option
removes the need of periodically producing stderr messages, but
users should be cautious because this may prevent infinite loops in
the scripts to be killed by TaskTracker.
</description>
</property>
<property>
<name>hive.script.serde</name>
<value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
<description>The default serde for trasmitting input data to and
reading output data from the user scripts.
</description>
</property>
<property>
<name>hive.binary.record.max.length</name>
<value>1000</value>
<description>Read from a binary stream and treat each
hive.binary.record.max.length bytes as a record.
The last record
before the end of stream can have less than
hive.binary.record.max.length bytes
</description>
</property>
<property>
<name>hive.script.recordreader</name>
<value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
<description>The default record reader for reading data from the user
scripts.
</description>
</property>
<property>
<name>hive.script.recordwriter</name>
<value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
<description>The default record writer for writing data to the user
scripts.
</description>
</property>
<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
<description>The default input format. Set this to HiveInputFormat if
you encounter problems with CombineHiveInputFormat.
</description>
</property>
<property>
<name>hive.udtf.auto.progress</name>
<value>false</value>
<description>Whether Hive should automatically send progress
information to TaskTracker when using UDTF's to prevent the task
getting killed because of inactivity. Users should be cautious
because this may prevent TaskTracker from killing tasks with infinte
loops.
</description>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>true</value>
<description>Whether speculative execution for reducers should be
turned on.
</description>
</property>
<property>
<name>hive.exec.counters.pull.interval</name>
<value>1000</value>
<description>The interval with which to poll the JobTracker for the
counters the running job. The smaller it is the more load there will
be on the jobtracker, the higher it is the less granular the caught
will be.
</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/tmp/${user.name}</value>
<description>
Location of Hive run time structured log file
</description>
</property>
<property>
<name>hive.querylog.enable.plan.progress</name>
<value>true</value>
<description>
Whether to log the plan's progress every time a job's
progress is checked.
These logs are written to the location specified
by
hive.querylog.location
</description>
</property>
<property>
<name>hive.querylog.plan.progress.interval</name>
<value>60000</value>
<description>
The interval to wait between logging the plan's progress
in
milliseconds.
If there is a whole number percentage change in the
progress of the
mappers or the reducers,
the progress is logged
regardless of this value.
The actual interval will be the ceiling of
(this value divided by the
value of
hive.exec.counters.pull.interval)
multiplied by the value of hive.exec.counters.pull.interval
I.e. if
it is not divide evenly by the value of
hive.exec.counters.pull.interval it will be
logged less frequently
than specified.
This only has an effect if
hive.querylog.enable.plan.progress is set to
true.
</description>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>false</value>
<description>Whether bucketing is enforced. If true, while inserting
into the table, bucketing is enforced.
</description>
</property>
<property>
<name>hive.enforce.sorting</name>
<value>false</value>
<description>Whether sorting is enforced. If true, while inserting
into the table, sorting is enforced.
</description>
</property>
<property>
<name>hive.optimize.bucketingsorting</name>
<value>true</value>
<description>If hive.enforce.bucketing or hive.enforce.sorting is
true, dont create a reducer for enforcing
bucketing/sorting for
queries of the form:
insert overwrite table T2 select * from T1;
where T1 and T2 are bucketed/sorted by the same keys into the same
number
of buckets.
</description>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>false</value>
<description>If the user asked for sort-merge bucketed map-side join,
and it cannot be performed,
should the query fail or not ?
</description>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>false</value>
<description>Will the join be automatically converted to a sort-merge
join, if the joined tables pass
the criteria for sort-merge join.
</description>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
</name>
<value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
</value>
<description>The policy to choose the big table for automatic
conversion to sort-merge join.
By default, the table with the largest
partitions is assigned the big
table. All policies are:
. based on
position of the table - the leftmost table is selected
org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
. based on
total size (all the partitions selected in the query) of
the table
org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
. based on average size (all the partitions selected in the query)
of the table
org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
New policies can be added in future.
</description>
</property>
<property>
<name>hive.metastore.ds.connection.url.hook</name>
<value></value>
<description>Name of the hook to use for retriving the JDO connection
URL. If empty, the value in javax.jdo.option.ConnectionURL is used
</description>
</property>
<property>
<name>hive.metastore.ds.retry.attempts</name>
<value>1</value>
<description>The number of times to retry a metastore call if there
were a connection error
</description>
</property>
<property>
<name>hive.metastore.ds.retry.interval</name>
<value>1000</value>
<description>The number of miliseconds between metastore retry
attempts
</description>
</property>
<property>
<name>hive.metastore.server.min.threads</name>
<value>200</value>
<description>Minimum number of worker threads in the Thrift server's
pool.
</description>
</property>
<property>
<name>hive.metastore.server.max.threads</name>
<value>100000</value>
<description>Maximum number of worker threads in the Thrift server's
pool.
</description>
</property>
<property>
<name>hive.metastore.server.tcp.keepalive</name>
<value>true</value>
<description>Whether to enable TCP keepalive for the metastore
server. Keepalive will prevent accumulation of half-open
connections.
</description>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>false</value>
<description>If true, the metastore thrift interface will be secured
with SASL. Clients must authenticate with Kerberos.
</description>
</property>
<property>
<name>hive.metastore.thrift.framed.transport.enabled</name>
<value>false</value>
<description>If true, the metastore thrift interface will use
TFramedTransport. When false (default) a standard TTransport is
used.
</description>
</property>
<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value></value>
<description>The path to the Kerberos Keytab file containing the
metastore thrift server's service principal.
</description>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>hive-metastore/_HOST@EXAMPLE.COM</value>
<description>The service principal for the metastore thrift server.
The special string _HOST will be replaced automatically with the
correct host name.
</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.class</name>
<value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
<description>The delegation token store implementation. Set to
org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
cluster.
</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.connectString
</name>
<value>localhost:2181</value>
<description>The ZooKeeper token store connect string.</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.znode</name>
<value>/hive/cluster/delegation</value>
<description>The root path for token store data.</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.acl</name>
<value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
</value>
<description>ACL for token store entries. List comma separated all
server principals for the cluster.
</description>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
</value>
<description>List of comma separated metastore object types that
should be pinned in the cache
</description>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
<description>Remove extra map-reduce jobs if the data is already
clustered by the same key which needs to be used again. This should
always be set to true. Since it is a new feature, it has been made
configurable.
</description>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>4</value>
<description>Reduce deduplication merges two RSs by moving
key/parts/reducer-num of the child RS to parent RS.
That means if
reducer-num of the child RS is fixed (order by or forced
bucketing)
and small, it can make very slow, single MR.
The optimization will be
disabled if number of reducers is less than
specified value.
</description>
</property>
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
<description>Whether or not to allow dynamic partitions in DML/DDL.
</description>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>strict</value>
<description>In strict mode, the user must specify at least one
static partition in case the user accidentally overwrites all
partitions.
</description>
</property>
<property>
<name>hive.exec.max.dynamic.partitions</name>
<value>1000</value>
<description>Maximum number of dynamic partitions allowed to be
created in total.
</description>
</property>
<property>
<name>hive.exec.max.dynamic.partitions.pernode</name>
<value>100</value>
<description>Maximum number of dynamic partitions allowed to be
created in each mapper/reducer node.
</description>
</property>
<property>
<name>hive.exec.max.created.files</name>
<value>100000</value>
<description>Maximum number of HDFS files created by all
mappers/reducers in a MapReduce job.
</description>
</property>
<property>
<name>hive.exec.default.partition.name</name>
<value>__HIVE_DEFAULT_PARTITION__</value>
<description>The default partition name in case the dynamic partition
column value is null/empty string or anyother values that cannot be
escaped. This value must not contain any special character used in
HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that
the dynamic partition value should not contain this value to avoid
confusions.
</description>
</property>
<property>
<name>hive.stats.dbclass</name>
<value>jdbc:derby</value>
<description>The default database that stores temporary hive
statistics.
</description>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
<description>A flag to gather statistics automatically during the
INSERT OVERWRITE command.
</description>
</property>
<property>
<name>hive.stats.jdbcdriver</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
<description>The JDBC driver for the database that stores temporary
hive statistics.
</description>
</property>
<property>
<name>hive.stats.dbconnectionstring</name>
<value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
<description>The default connection string for the database that
stores temporary hive statistics.
</description>
</property>
<property>
<name>hive.stats.default.publisher</name>
<value></value>
<description>The Java class (implementing the StatsPublisher
interface) that is used by default if hive.stats.dbclass is not JDBC
or HBase.
</description>
</property>
<property>
<name>hive.stats.default.aggregator</name>
<value></value>
<description>The Java class (implementing the StatsAggregator
interface) that is used by default if hive.stats.dbclass is not JDBC
or HBase.
</description>
</property>
<property>
<name>hive.stats.jdbc.timeout</name>
<value>30</value>
<description>Timeout value (number of seconds) used by JDBC
connection and statements.
</description>
</property>
<property>
<name>hive.stats.retries.max</name>
<value>0</value>
<description>Maximum number of retries when stats
publisher/aggregator got an exception updating intermediate
database. Default is no tries on failures.
</description>
</property>
<property>
<name>hive.stats.retries.wait</name>
<value>3000</value>
<description>The base waiting window (in milliseconds) before the
next retry. The actual wait time is calculated by baseWindow *
failues baseWindow * (failure 1) * (random number between
[0.0,1.0]).
</description>
</property>
<property>
<name>hive.stats.reliable</name>
<value>false</value>
<description>Whether queries will fail because stats cannot be
collected completely accurately.
If this is set to true,
reading/writing from/into a partition may fail
becuase the stats
could not be computed accurately.
</description>
</property>
<property>
<name>hive.stats.collect.tablekeys</name>
<value>false</value>
<description>Whether join and group by keys on tables are derived and
maintained in the QueryPlan.
This is useful to identify how tables
are accessed and to determine if
they should be bucketed.
</description>
</property>
<property>
<name>hive.stats.collect.scancols</name>
<value>false</value>
<description>Whether column accesses are tracked in the QueryPlan.
This is useful to identify how tables are accessed and to determine
if there are wasted columns that can be trimmed.
</description>
</property>
<property>
<name>hive.stats.ndv.error</name>
<value>20.0</value>
<description>Standard error expressed in percentage. Provides a
tradeoff between accuracy and compute cost.A lower value for error
indicates higher accuracy and a higher compute cost.
</description>
</property>
<property>
<name>hive.stats.key.prefix.max.length</name>
<value>200</value>
<description>
Determines if when the prefix of the key used for
intermediate stats
collection
exceeds a certain length, a hash of the
key is used instead. If the value
&lt; 0 then hashing
is never used,
if the value >= 0 then hashing is used only when the
key prefixes
length
exceeds that value. The key prefix is defined as everything
preceding the
task ID in the key.
</description>
</property>
<property>
<name>hive.support.concurrency</name>
<value>false</value>
<description>Whether hive supports concurrency or not. A zookeeper
instance must be up and running for the default hive lock manager to
support read-write locks.
</description>
</property>
<property>
<name>hive.lock.numretries</name>
<value>100</value>
<description>The number of times you want to try to get all the locks
</description>
</property>
<property>
<name>hive.unlock.numretries</name>
<value>10</value>
<description>The number of times you want to retry to do one unlock
</description>
</property>
<property>
<name>hive.lock.sleep.between.retries</name>
<value>60</value>
<description>The sleep time (in seconds) between various retries
</description>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value></value>
<description>The list of zookeeper servers to talk to. This is only
needed for read/write locks.
</description>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
<description>The port of zookeeper servers to talk to. This is only
needed for read/write locks.
</description>
</property>
<property>
<name>hive.zookeeper.session.timeout</name>
<value>600000</value>
<description>Zookeeper client's session timeout. The client is
disconnected, and as a result, all locks released, if a heartbeat is
not sent in the timeout.
</description>
</property>
<property>
<name>hive.zookeeper.namespace</name>
<value>hive_zookeeper_namespace</value>
<description>The parent node under which all zookeeper nodes are
created.
</description>
</property>
<property>
<name>hive.zookeeper.clean.extra.nodes</name>
<value>false</value>
<description>Clean extra nodes at the end of the session.
</description>
</property>
<property>
<name>fs.har.impl</name>
<value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
<description>The implementation for accessing Hadoop Archives. Note
that this won't be applicable to Hadoop vers less than 0.20
</description>
</property>
<property>
<name>hive.archive.enabled</name>
<value>false</value>
<description>Whether archiving operations are permitted</description>
</property>
<property>
<name>hive.fetch.output.serde</name>
<value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
<description>The serde used by FetchTask to serialize the fetch
output.
</description>
</property>
<property>
<name>hive.exec.mode.local.auto</name>
<value>false</value>
<description> Let hive determine whether to run in local mode
automatically
</description>
</property>
<property>
<name>hive.exec.drop.ignorenonexistent</name>
<value>true</value>
<description>
Do not report an error if DROP TABLE/VIEW specifies a
non-existent
table/view
</description>
</property>
<property>
<name>hive.exec.show.job.failure.debug.info</name>
<value>true</value>
<description>
If a job fails, whether to provide a link in the CLI to
the task with
the
most failures, along with debugging hints if
applicable.
</description>
</property>
<property>
<name>hive.auto.progress.timeout</name>
<value>0</value>
<description>
How long to run autoprogressor for the script/UDTF
operators (in
seconds).
Set to 0 for forever.
</description>
</property>
<!-- HBase Storage Handler Parameters -->
<property>
<name>hive.hbase.wal.enabled</name>
<value>true</value>
<description>Whether writes to HBase should be forced to the
write-ahead log. Disabling this improves HBase write performance at
the risk of lost writes in case of a crash.
</description>
</property>
<property>
<name>hive.table.parameters.default</name>
<value></value>
<description>Default property values for newly created tables
</description>
</property>
<property>
<name>hive.entity.separator</name>
<value>@</value>
<description>Separator used to construct names of tables and
partitions. For example, dbname@tablename@partitionname
</description>
</property>
<property>
<name>hive.ddl.createtablelike.properties.whitelist</name>
<value></value>
<description>Table Properties to copy over when executing a Create
Table Like.
</description>
</property>
<property>
<name>hive.variable.substitute</name>
<value>true</value>
<description>This enables substitution using syntax like ${var}
${system:var} and ${env:var}.
</description>
</property>
<property>
<name>hive.variable.substitute.depth</name>
<value>40</value>
<description>The maximum replacements the substitution engine will
do.
</description>
</property>
<property>
<name>hive.conf.validation</name>
<value>true</value>
<description>Eables type checking for registered hive configurations
</description>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>false</value>
<description>enable or disable the hive client authorization
</description>
</property>
<property>
<name>hive.security.authorization.createtable.user.grants</name>
<value></value>
<description>the privileges automatically granted to some users
whenever a table gets created.
An example like
"userX,userY:select;userZ:create" will grant select
privilege to
userX and userY,
and grant create privilege to userZ whenever a new
table created.
</description>
</property>
<property>
<name>hive.security.authorization.createtable.group.grants</name>
<value></value>
<description>the privileges automatically granted to some groups
whenever a table gets created.
An example like
"groupX,groupY:select;groupZ:create" will grant select
privilege to
groupX and groupY,
and grant create privilege to groupZ whenever a
new table created.
</description>
</property>
<property>
<name>hive.security.authorization.createtable.role.grants</name>
<value></value>
<description>the privileges automatically granted to some roles
whenever a table gets created.
An example like
"roleX,roleY:select;roleZ:create" will grant select
privilege to
roleX and roleY,
and grant create privilege to roleZ whenever a new
table created.
</description>
</property>
<property>
<name>hive.security.authorization.createtable.owner.grants</name>
<value></value>
<description>the privileges automatically granted to the owner
whenever a table gets created.
An example like "select,drop" will
grant select and drop privilege to
the owner of the table
</description>
</property>
<property>
<name>hive.metastore.authorization.storage.checks</name>
<value>false</value>
<description>Should the metastore do authorization checks against the
underlying storage
for operations like drop-partition (disallow the
drop-partition if the
user in
question doesn't have permissions to
delete the corresponding directory
on the storage).
</description>
</property>
<property>
<name>hive.error.on.empty.partition</name>
<value>false</value>
<description>Whether to throw an excpetion if dynamic partition
insert generates empty results.
</description>
</property>
<property>
<name>hive.index.compact.file.ignore.hdfs</name>
<value>false</value>
<description>True the hdfs location stored in the index file will be
igbored at runtime.
If the data got moved or the name of the cluster
got changed, the
index data should still be usable.
</description>
</property>
<property>
<name>hive.optimize.index.filter.compact.minsize</name>
<value>5368709120</value>
<description>Minimum size (in bytes) of the inputs on which a compact
index is automatically used.
</description>
</property>
<property>
<name>hive.optimize.index.filter.compact.maxsize</name>
<value>-1</value>
<description>Maximum size (in bytes) of the inputs on which a compact
index is automatically used.
A negative number is equivalent to
infinity.
</description>
</property>
<property>
<name>hive.index.compact.query.max.size</name>
<value>10737418240</value>
<description>The maximum number of bytes that a query using the
compact index can read. Negative value is equivalent to infinity.
</description>
</property>
<property>
<name>hive.index.compact.query.max.entries</name>
<value>10000000</value>
<description>The maximum number of index entries to read during a
query that uses the compact index. Negative value is equivalent to
infinity.
</description>
</property>
<property>
<name>hive.index.compact.binary.search</name>
<value>true</value>
<description>Whether or not to use a binary search to find the
entries in an index table that match the filter, where possible
</description>
</property>
<property>
<name>hive.exim.uri.scheme.whitelist</name>
<value>hdfs,pfile</value>
<description>A comma separated list of acceptable URI schemes for
import and export.
</description>
</property>
<property>
<name>hive.lock.mapred.only.operation</name>
<value>false</value>
<description>This param is to control whether or not only do lock on
queries
that need to execute at least one mapred job.
</description>
</property>
<property>
<name>hive.limit.row.max.size</name>
<value>100000</value>
<description>When trying a smaller subset of data for simple LIMIT,
how much size we need to guarantee
each row to have at least.
</description>
</property>
<property>
<name>hive.limit.optimize.limit.file</name>
<value>10</value>
<description>When trying a smaller subset of data for simple LIMIT,
maximum number of files we can
sample.
</description>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>false</value>
<description>Whether to enable to optimization to trying a smaller
subset of data for simple LIMIT first.
</description>
</property>
<property>
<name>hive.limit.optimize.fetch.max</name>
<value>50000</value>
<description>Maximum number of rows allowed for a smaller subset of
data for simple LIMIT, if it is a fetch query.
Insert queries are not
restricted by this limit.
</description>
</property>
<property>
<name>hive.rework.mapredwork</name>
<value>false</value>
<description>should rework the mapred work or not.
This is first
introduced by SymlinkTextInputFormat to replace symlink
files with
real paths at compile time.
</description>
</property>
<property>
<name>hive.exec.concatenate.check.index</name>
<value>true</value>
<description>If this sets to true, hive will throw error when doing
'alter table tbl_name [partSpec] concatenate' on a table/partition
that has indexes on it. The reason the user want to set this to true
is because it can help user to avoid handling all index drop,
recreation,
rebuild work. This is very helpful for tables with
thousands of partitions.
</description>
</property>
<property>
<name>hive.sample.seednumber</name>
<value>0</value>
<description>A number used to percentage sampling. By changing this
number, user will change the subsets
of data sampled.
</description>
</property>
<property>
<name>hive.io.exception.handlers</name>
<value></value>
<description>A list of io exception handler class names. This is used
to construct a list exception handlers to handle exceptions thrown
by record readers
</description>
</property>
<property>
<name>hive.autogen.columnalias.prefix.label</name>
<value>_c</value>
<description>String used as a prefix when auto generating column
alias.
By default the prefix label will be appended with a column
position
number to form the column alias. Auto generation would
happen if an
aggregate function is used in a select clause without an
explicit
alias.
</description>
</property>
<property>
<name>hive.autogen.columnalias.prefix.includefuncname</name>
<value>false</value>
<description>Whether to include function name in the column alias
auto generated by hive.
</description>
</property>
<property>
<name>hive.exec.perf.logger</name>
<value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
<description>The class responsible logging client side performance
metrics. Must be a subclass of
org.apache.hadoop.hive.ql.log.PerfLogger
</description>
</property>
<property>
<name>hive.start.cleanup.scratchdir</name>
<value>false</value>
<description>To cleanup the hive scratchdir while starting the hive
server
</description>
</property>
<property>
<name>hive.output.file.extension</name>
<value></value>
<description>String used as a file extension for output files. If not
set, defaults to the codec extension for text files (e.g. ".gz"), or
no extension otherwise.
</description>
</property>
<property>
<name>hive.insert.into.multilevel.dirs</name>
<value>false</value>
<description>Where to insert into multilevel directories like
"insert
directory '/HIVEFT25686/chinna/' from table"
</description>
</property>
<property>
<name>hive.warehouse.subdir.inherit.perms</name>
<value>false</value>
<description>Set this to true if the the table directories should
inherit the
permission of the warehouse or database directory instead
of being created
with the permissions derived from dfs umask
</description>
</property>
<property>
<name>hive.exec.job.debug.capture.stacktraces</name>
<value>true</value>
<description>Whether or not stack traces parsed from the task logs of
a sampled failed task for
each failed job should be stored in the
SessionState
</description>
</property>
<property>
<name>hive.exec.driver.run.hooks</name>
<value></value>
<description>A comma separated list of hooks which implement
HiveDriverRunHook and will be run at the
beginning and end of
Driver.run, these will be run in the order specified
</description>
</property>
<property>
<name>hive.ddl.output.format</name>
<value>text</value>
<description>
The data format to use for DDL output. One of "text"
(for human
readable text) or "json" (for a json object).
</description>
</property>
<property>
<name>hive.transform.escape.input</name>
<value>false</value>
<description>
This adds an option to escape special chars (newlines,
carriage returns
and
tabs) when they are passed to the user script.
This is useful if the hive
tables
can contain data that contains
special characters.
</description>
</property>
<property>
<name>hive.exec.rcfile.use.explicit.header</name>
<value>true</value>
<description>
If this is set the header for RC Files will simply be
RCF. If this is
not
set the header will be that borrowed from sequence
files, e.g. SEQ-
followed
by the input and output RC File formats.
</description>
</property>
<property>
<name>hive.multi.insert.move.tasks.share.dependencies</name>
<value>false</value>
<description>
If this is set all move tasks for tables/partitions (not
directories)
at the end of a
multi-insert query will only begin once
the dependencies for all these move
tasks have been
met.
Advantages: If
concurrency is enabled, the locks will only be released once the
query has
finished, so with this config enabled, the time when the
table/partition is
generated will be much closer to when the lock on
it is released.
Disadvantages: If concurrency is not enabled, with
this disabled,
the tables/partitions which
are produced by this query
and finish earlier will be available for
querying
much earlier. Since
the locks are only released once the query finishes,
this
does not
apply if concurrency is enabled.
</description>
</property>
<property>
<name>hive.fetch.task.conversion</name>
<value>minimal</value>
<description>
Some select queries can be converted to single FETCH
task minimizing
latency.
Currently the query should be single sourced
not having any subquery and
should not have
any aggregations or
distincts (which incurrs RS), lateral views and
joins.
1. minimal :
SELECT STAR, FILTER on partition columns, LIMIT only
2. more :
SELECT, FILTER, LIMIT only (TABLESAMPLE, virtual columns)
</description>
</property>
<property>
<name>hive.hmshandler.retry.attempts</name>
<value>1</value>
<description>The number of times to retry a HMSHandler call if there
were a connection error
</description>
</property>
<property>
<name>hive.hmshandler.retry.interval</name>
<value>1000</value>
<description>The number of miliseconds between HMSHandler retry
attempts
</description>
</property>
<property>
<name>hive.server.read.socket.timeout</name>
<value>10</value>
<description>Timeout for the HiveServer to close the connection if no
response from the client in N seconds, defaults to 10 seconds.
</description>
</property>
<property>
<name>hive.server.tcp.keepalive</name>
<value>true</value>
<description>Whether to enable TCP keepalive for the Hive server.
Keepalive will prevent accumulation of half-open connections.
</description>
</property>
<property>
<name>hive.decode.partition.name</name>
<value>false</value>
<description>Whether to show the unquoted partition names in query
results.
</description>
</property>
<property>
<name>hive.log4j.file</name>
<value></value>
<description>Hive log4j configuration file.
If the property is not
set, then logging will be initialized using
hive-log4j.properties
found on the classpath.
If the property is set, the value must be a
valid URI (java.net.URI,
e.g. "file:///tmp/my-logging.properties"),
which you can then
extract a URL from and pass to
PropertyConfigurator.configure(URL).
</description>
</property>
<property>
<name>hive.exec.log4j.file</name>
<value></value>
<description>Hive log4j configuration file for execution mode(sub
command).
If the property is not set, then logging will be
initialized using
hive-exec-log4j.properties found on the classpath.
If the property is set, the value must be a valid URI (java.net.URI,
e.g. "file:///tmp/my-logging.properties"), which you can then
extract a URL from and pass to PropertyConfigurator.configure(URL).
</description>
</property>
<property>
<name>hive.exec.infer.bucket.sort</name>
<value>false</value>
<description>
If this is set, when writing partitions, the metadata
will include the
bucketing/sorting
properties with which the data was
written if any (this will not overwrite the
metadata
inherited from
the table if the table is bucketed/sorted)
</description>
</property>
<property>
<name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
<value>false</value>
<description>
If this is set, when setting the number of reducers for
the map reduce
task which writes the
final output files, it will
choose a number which is a power of two,
unless the user specifies
the number of reducers to use using mapred.reduce.tasks. The number
of
reducers
may be set to a power of two, only to be followed by a
merge task
meaning preventing
anything from being inferred.
With
hive.exec.infer.bucket.sort set to true:
Advantages: If this is not
set, the number of buckets for partitions will seem
arbitrary,
which
means that the number of mappers used for optimized joins, for
example, will
be very low. With this set, since the number of buckets
used for any
partition is
a power of two, the number of mappers used
for optimized joins will
be the least
number of buckets used by any
partition being joined.
Disadvantages: This may mean a much larger or
much smaller number of reducers
being used in the
final map reduce
job, e.g. if a job was originally going to take 257
reducers,
it will
now take 512 reducers, similarly if the max number of reducers
is
511,
and a job was going to use this many, it will now use 256
reducers.
</description>
</property>
<property>
<name>hive.groupby.orderby.position.alias</name>
<value>false</value>
<description>Whether to enable using Column Position Alias in Group
By or Order By
</description>
</property>
<property>
<name>hive.server2.thrift.min.worker.threads</name>
<value>5</value>
<description>Minimum number of Thrift worker threads</description>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>100</value>
<description>Maximum number of Thrift worker threads</description>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>Port number of HiveServer2 Thrift interface.
Can be
overridden by setting $HIVE_SERVER2_THRIFT_PORT
</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>localhost</value>
<description>Bind host on which to run the HiveServer2 Thrift
interface.
Can be overridden by setting
$HIVE_SERVER2_THRIFT_BIND_HOST
</description>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NONE</value>
<description>
Client authentication types.
NONE: no authentication
check
LDAP: LDAP/AD based authentication
KERBEROS: Kerberos/GSSAPI
authentication
CUSTOM: Custom authentication provider
(Use with
property hive.server2.custom.authentication.class)
</description>
</property>
<property>
<name>hive.server2.custom.authentication.class</name>
<value></value>
<description>
Custom authentication class. Used when property
'hive.server2.authentication' is set to 'CUSTOM'. Provided class
must be a proper implementation of the interface
org.apache.hive.service.auth.PasswdAuthenticationProvider.
HiveServer2
will call its Authenticate(user, passed) method to
authenticate
requests.
The implementation may optionally extend the
Hadoop's
org.apache.hadoop.conf.Configured class to grab Hive's
Configuration
object.
</description>
</property>
<property>
<name>>hive.server2.authentication.kerberos.principal</name>
<value></value>
<description>
Kerberos server principal
</description>
</property>
<property>
<name>>hive.server2.authentication.kerberos.keytab</name>
<value></value>
<description>
Kerberos keytab file for server principal
</description>
</property>
<property>
<name>hive.server2.authentication.ldap.url</name>
<value></value>
<description>
LDAP connection URL
</description>
</property>
<property>
<name>hive.server2.authentication.ldap.baseDN</name>
<value></value>
<description>
LDAP base DN
</description>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
<description>
Setting this property to true will have hive server2
execute
hive operations as the user making the calls to it.
</description>
</property>
</configuration>
<!-- Hive Execution Parameters -->
<property>
<name>mapred.reduce.tasks</name>
<value>-1</value>
<description>The default number of reduce tasks per job. Typically set
to a prime close to the number of available hosts. Ignored when
mapred.job.tracker is "local". Hadoop set this to 1 by default,
whereas hive uses -1 as its default value.
By setting this property to
-1, Hive will automatically figure out what
should be the number of
reducers.
</description>
</property>
<property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>1000000000</value>
<description>size per reducer.The default is 1G, i.e if the input size
is 10G, it will use 10 reducers.
</description>
</property>
<property>
<name>hive.exec.reducers.max</name>
<value>999</value>
<description>max number of reducers will be used. If the one
specified
in the configuration parameter mapred.reduce.tasks is
negative, hive
will use this one as the max number of reducers when
automatically
determine number of reducers.
</description>
</property>
<property>
<name>hive.cli.print.header</name>
<value>false</value>
<description>Whether to print the names of the columns in query
output.
</description>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>false</value>
<description>Whether to include the current database in the hive
prompt.
</description>
</property>
<property>
<name>hive.cli.prompt</name>
<value>hive</value>
<description>Command line prompt configuration value. Other hiveconf
can be used in
this configuration value. Variable substitution will
only be invoked at
the hive
cli startup.
</description>
</property>
<property>
<name>hive.cli.pretty.output.num.cols</name>
<value>-1</value>
<description>The number of columns to use when formatting output
generated
by the DESCRIBE PRETTY table_name command. If the value of
this
property
is -1, then hive will use the auto-detected terminal
width.
</description>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive-${user.name}</value>
<description>Scratch space for Hive jobs</description>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>/tmp/${user.name}</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.test.mode</name>
<value>false</value>
<description>whether hive is running in test mode. If yes, it turns on
sampling and prefixes the output tablename
</description>
</property>
<property>
<name>hive.test.mode.prefix</name>
<value>test_</value>
<description>if hive is running in test mode, prefixes the output
table by this string
</description>
</property>
<!-- If the input table is not bucketed, the denominator of the tablesample
is determinied by the parameter below -->
<!-- For example, the following query: -->
<!-- INSERT OVERWRITE TABLE dest -->
<!-- SELECT col1 from src -->
<!-- would be converted to -->
<!-- INSERT OVERWRITE TABLE test_dest -->
<!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
<property>
<name>hive.test.mode.samplefreq</name>
<value>32</value>
<description>if hive is running in test mode and table is not
bucketed, sampling frequency
</description>
</property>
<property>
<name>hive.test.mode.nosamplelist</name>
<value></value>
<description>if hive is running in test mode, dont sample the above
comma seperated list of tables
</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value></value>
<description>Thrift uri for the remote metastore. Used by metastore
client to connect to remote metastore.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=metastore_db;create=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.PersistenceManagerFactoryClass</name>
<value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
<description>class implementing the jdo persistence</description>
</property>
<property>
<name>javax.jdo.option.DetachAllOnCommit</name>
<value>true</value>
<description>detaches all objects from session so that they can be
used after transaction is committed
</description>
</property>
<property>
<name>javax.jdo.option.NonTransactionalRead</name>
<value>true</value>
<description>reads outside of transactions</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>APP</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>mine</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.Multithreaded</name>
<value>true</value>
<description>Set this to true if multiple threads access metastore
through JDO concurrently.
</description>
</property>
<property>
<name>datanucleus.connectionPoolingType</name>
<value>DBCP</value>
<description>Uses a DBCP connection pool for JDBC metastore
</description>
</property>
<property>
<name>datanucleus.validateTables</name>
<value>false</value>
<description>validates existing schema against code. turn this on if
you want to verify existing schema
</description>
</property>
<property>
<name>datanucleus.validateColumns</name>
<value>false</value>
<description>validates existing schema against code. turn this on if
you want to verify existing schema
</description>
</property>
<property>
<name>datanucleus.validateConstraints</name>
<value>false</value>
<description>validates existing schema against code. turn this on if
you want to verify existing schema
</description>
</property>
<property>
<name>datanucleus.storeManagerType</name>
<value>rdbms</value>
<description>metadata store type</description>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
<description>creates necessary schema on a startup if one doesn't
exist. set this to false, after creating it once
</description>
</property>
<property>
<name>datanucleus.autoStartMechanismMode</name>
<value>checked</value>
<description>throw exception if metadata tables are incorrect
</description>
</property>
<property>
<name>datanucleus.transactionIsolation</name>
<value>read-committed</value>
<description>Default transaction isolation level for identity
generation.
</description>
</property>
<property>
<name>datanucleus.cache.level2</name>
<value>false</value>
<description>Use a level 2 cache. Turn this off if metadata is changed
independently of hive metastore server
</description>
</property>
<property>
<name>datanucleus.cache.level2.type</name>
<value>SOFT</value>
<description>SOFT=soft reference based cache, WEAK=weak reference
based cache.
</description>
</property>
<property>
<name>datanucleus.identifierFactory</name>
<value>datanucleus</value>
<description>Name of the identifier factory to use when generating
table/column names etc. 'datanucleus' is used for backward
compatibility
</description>
</property>
<property>
<name>datanucleus.plugin.pluginRegistryBundleCheck</name>
<value>LOG</value>
<description>Defines what happens when plugin bundles are found and
are duplicated [EXCEPTION|LOG|NONE]
</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse
</description>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>false</value>
<description>In unsecure mode, setting this property to true will
cause the metastore to execute DFS operations using the client's
reported user and group permissions. Note that this property must be
set on both the client and server sides. Further note that its best
effort. If client sets its to true and server sets it to false,
client setting will be ignored.
</description>
</property>
<property>
<name>hive.metastore.event.listeners</name>
<value></value>
<description>list of comma seperated listeners for metastore events.
</description>
</property>
<property>
<name>hive.metastore.partition.inherit.table.properties</name>
<value></value>
<description>list of comma seperated keys occurring in table
properties which will get inherited to newly created partitions. *
implies all the keys will get inherited.
</description>
</property>
<property>
<name>hive.metadata.export.location</name>
<value></value>
<description>When used in conjunction with the
org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
listener, it is the location to which the metadata will be exported.
The default is an empty string, which results in the metadata being
exported to the current user's home directory on HDFS.
</description>
</property>
<property>
<name>hive.metadata.move.exported.metadata.to.trash</name>
<value></value>
<description>When used in conjunction with the
org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
listener, this setting determines if the metadata that is exported
will subsequently be moved to the user's trash directory alongside
the dropped table data. This ensures that the metadata will be
cleaned up along with the dropped table data.
</description>
</property>
<property>
<name>hive.metastore.partition.name.whitelist.pattern</name>
<value></value>
<description>Partition names will be checked against this regex
pattern and rejected if not matched.
</description>
</property>
<property>
<name>hive.metastore.end.function.listeners</name>
<value></value>
<description>list of comma separated listeners for the end of
metastore functions.
</description>
</property>
<property>
<name>hive.metastore.event.expiry.duration</name>
<value>0</value>
<description>Duration after which events expire from events table (in
seconds)
</description>
</property>
<property>
<name>hive.metastore.event.clean.freq</name>
<value>0</value>
<description>Frequency at which timer task runs to purge expired
events in metastore(in seconds).
</description>
</property>
<property>
<name>hive.metastore.connect.retries</name>
<value>5</value>
<description>Number of retries while opening a connection to metastore
</description>
</property>
<property>
<name>hive.metastore.failure.retries</name>
<value>3</value>
<description>Number of retries upon failure of Thrift metastore calls
</description>
</property>
<property>
<name>hive.metastore.client.connect.retry.delay</name>
<value>1</value>
<description>Number of seconds for the client to wait between
consecutive connection attempts
</description>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>20</value>
<description>MetaStore Client socket timeout in seconds</description>
</property>
<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
<description>Name of the class that implements
org.apache.hadoop.hive.metastore.rawstore interface. This class is
used to store and retrieval of raw metadata objects such as table,
database
</description>
</property>
<property>
<name>hive.metastore.batch.retrieve.max</name>
<value>300</value>
<description>Maximum number of objects (tables/partitions) can be
retrieved from metastore in one batch. The higher the number, the
less the number of round trips is needed to the Hive metastore
server, but it may also cause higher memory requirement at the client
side.
</description>
</property>
<property>
<name>hive.metastore.batch.retrieve.table.partition.max</name>
<value>1000</value>
<description>Maximum number of table partitions that metastore
internally retrieves in one batch.
</description>
</property>
<property>
<name>hive.default.fileformat</name>
<value>TextFile</value>
<description>Default file format for CREATE TABLE statement. Options
are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
</property>
<property>
<name>hive.fileformat.check</name>
<value>true</value>
<description>Whether to check file format or not when loading data
files
</description>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
<description>Whether to use map-side aggregation in Hive Group By
queries
</description>
</property>
<property>
<name>hive.groupby.skewindata</name>
<value>false</value>
<description>Whether there is skew in data to optimize group by
queries
</description>
</property>
<property>
<name>hive.optimize.multigroupby.common.distincts</name>
<value>true</value>
<description>Whether to optimize a multi-groupby query with the same
distinct.
Consider a query like:
from src
insert overwrite table dest1
select col1, count(distinct colx) group by
col1
insert overwrite table
dest2 select col2, count(distinct colx) group by
col2;
With this
parameter set to true, first we spray by the distinct value
(colx),
and then
perform the 2 groups bys. This makes sense if map-side
aggregation is turned
off. However,
with maps-side aggregation, it
might be useful in some cases to treat the
2 inserts independently,
thereby performing the query above in 2MR jobs instead of 3 (due to
spraying
by distinct key first).
If this parameter is turned off, we
dont consider the fact that the
distinct key is the same across
different MR jobs.
</description>
</property>
<property>
<name>hive.groupby.mapaggr.checkinterval</name>
<value>100000</value>
<description>Number of rows after which size of the grouping
keys/aggregation classes is performed
</description>
</property>
<property>
<name>hive.mapred.local.mem</name>
<value>0</value>
<description>For local mode, memory of the mappers/reducers
</description>
</property>
<property>
<name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
<value>0.3</value>
<description>Portion of total memory to be used by map-side grup
aggregation hash table, when this group by is followed by map join
</description>
</property>
<property>
<name>hive.map.aggr.hash.force.flush.memory.threshold</name>
<value>0.9</value>
<description>The max memory to be used by map-side grup aggregation
hash table, if the memory usage is higher than this number, force to
flush data
</description>
</property>
<property>
<name>hive.map.aggr.hash.percentmemory</name>
<value>0.5</value>
<description>Portion of total memory to be used by map-side grup
aggregation hash table
</description>
</property>
<property>
<name>hive.map.aggr.hash.min.reduction</name>
<value>0.5</value>
<description>Hash aggregation will be turned off if the ratio between
hash
table size and input rows is bigger than this number. Set to 1 to
make
sure
hash aggregation is never turned off.
</description>
</property>
<property>
<name>hive.optimize.cp</name>
<value>true</value>
<description>Whether to enable column pruner</description>
</property>
<property>
<name>hive.optimize.index.filter</name>
<value>false</value>
<description>Whether to enable automatic use of indexes</description>
</property>
<property>
<name>hive.optimize.index.groupby</name>
<value>false</value>
<description>Whether to enable optimization of group-by queries using
Aggregate indexes.
</description>
</property>
<property>
<name>hive.optimize.ppd</name>
<value>true</value>
<description>Whether to enable predicate pushdown</description>
</property>
<property>
<name>hive.optimize.ppd.storage</name>
<value>true</value>
<description>Whether to push predicates down into storage handlers.
Ignored when hive.optimize.ppd is false.
</description>
</property>
<property>
<name>hive.ppd.recognizetransivity</name>
<value>true</value>
<description>Whether to transitively replicate predicate filters over
equijoin conditions.
</description>
</property>
<property>
<name>hive.optimize.groupby</name>
<value>true</value>
<description>Whether to enable the bucketed group by from bucketed
partitions/tables.
</description>
</property>
<property>
<name>hive.optimize.skewjoin.compiletime</name>
<value>false</value>
<description>Whether to create a separate plan for skewed keys for the
tables in the join.
This is based on the skewed keys stored in the
metadata. At compile time,
the plan is broken
into different joins: one
for the skewed keys, and the other for the
remaining keys. And then,
a
union is performed for the 2 joins generated above. So unless the
same skewed key is present
in both the joined tables, the join for the
skewed key will be
performed as a map-side join.
The main difference
between this paramater and hive.optimize.skewjoin is
that this
parameter
uses the skew information stored in the metastore to
optimize the plan at
compile time itself.
If there is no skew
information in the metadata, this parameter will
not have any affect.
Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
should
be set to true.
Ideally, hive.optimize.skewjoin should be
renamed as
hive.optimize.skewjoin.runtime, but not doing
so for
backward compatibility.
If the skew information is correctly stored in
the metadata,
hive.optimize.skewjoin.compiletime
would change the query
plan to take care of it, and hive.optimize.skewjoin
will be a no-op.
</description>
</property>
<property>
<name>hive.optimize.union.remove</name>
<value>false</value>
<description>
Whether to remove the union and push the operators
between union and the
filesink above
union. This avoids an extra scan
of the output by union. This is
independently useful for union
queries, and specially useful when hive.optimize.skewjoin.compiletime
is set
to true, since an
extra union is inserted.
The merge is triggered
if either of hive.merge.mapfiles or
hive.merge.mapredfiles is set to
true.
If the user has set hive.merge.mapfiles to true and
hive.merge.mapredfiles to false, the idea was the
number of reducers
are few, so the number of files anyway are small.
However, with this
optimization,
we are increasing the number of files possibly by a big
margin. So, we
merge aggresively.
</description>
</property>
<property>
<name>hive.mapred.supports.subdirectories</name>
<value>false</value>
<description>Whether the version of hadoop which is running supports
sub-directories for tables/partitions.
Many hive optimizations can be
applied if the hadoop version supports
sub-directories for
tables/partitions. It was added by MAPREDUCE-1501
</description>
</property>
<property>
<name>hive.multigroupby.singlemr</name>
<value>false</value>
<description>Whether to optimize multi group by query to generate
single M/R
job plan. If the multi group by query has common group by
keys, it will
be
optimized to generate single M/R job.
</description>
</property>
<property>
<name>hive.map.groupby.sorted</name>
<value>false</value>
<description>If the bucketing/sorting properties of the table exactly
match the grouping key, whether to
perform the group by in the mapper
by using BucketizedHiveInputFormat. The
only downside to this
is that
it limits the number of mappers to the number of files.
</description>
</property>
<property>
<name>hive.map.groupby.sorted.testmode</name>
<value>false</value>
<description>If the bucketing/sorting properties of the table exactly
match the grouping key, whether to
perform the group by in the mapper
by using BucketizedHiveInputFormat. If
the test mode is set, the plan
is not converted, but a query property is set to denote the same.
</description>
</property>
<property>
<name>hive.new.job.grouping.set.cardinality</name>
<value>30</value>
<description>
Whether a new map-reduce job should be launched for
grouping
sets/rollups/cubes.
For a query like: select a, b, c, count(1)
from T group by a, b, c with
rollup;
4 rows are created per row: (a, b,
c), (a, b, null), (a, null, null),
(null, null, null).
This can lead to
explosion across map-reduce boundary if the cardinality
of T is very
high,
and map-side aggregation does not do a very good job.
This
parameter decides if hive should add an additional map-reduce job.
If
the grouping set
cardinality (4 in the example above), is more than
this value, a new MR job is
added under the
assumption that the orginal
group by will reduce the data size.
</description>
</property>
<property>
<name>hive.join.emit.interval</name>
<value>1000</value>
<description>How many rows in the right-most join operand Hive should
buffer before emitting the join result.
</description>
</property>
<property>
<name>hive.join.cache.size</name>
<value>25000</value>
<description>How many rows in the joining tables (except the streaming
table) should be cached in memory.
</description>
</property>
<property>
<name>hive.mapjoin.bucket.cache.size</name>
<value>100</value>
<description>How many values in each keys in the map-joined table
should be cached in memory.
</description>
</property>
<property>
<name>hive.mapjoin.cache.numrows</name>
<value>25000</value>
<description>How many rows should be cached by jdbm for map join.
</description>
</property>
<property>
<name>hive.optimize.skewjoin</name>
<value>false</value>
<description>Whether to enable skew join optimization.
The algorithm is
as follows: At runtime, detect the keys with a large
skew. Instead of
processing those keys, store them temporarily in a hdfs directory. In
a
follow-up map-reduce
job, process those skewed keys. The same key
need not be skewed for all
the tables, and so,
the follow-up map-reduce
job (for the skewed keys) would be much faster,
since it would be a
map-join.
</description>
</property>
<property>
<name>hive.skewjoin.key</name>
<value>100000</value>
<description>Determine if we get a skew key in join. If we see more
than the specified number of rows with the same key in join operator,
we think the key as a skew join key.
</description>
</property>
<property>
<name>hive.skewjoin.mapjoin.map.tasks</name>
<value>10000</value>
<description> Determine the number of map task used in the follow up
map join job
for a skew join. It should be used together with
hive.skewjoin.mapjoin.min.split
to perform a fine grained control.
</description>
</property>
<property>
<name>hive.skewjoin.mapjoin.min.split</name>
<value>33554432</value>
<description> Determine the number of map task at most used in the
follow up map join job
for a skew join by specifying the minimum split
size. It should be used
together with
hive.skewjoin.mapjoin.map.tasks
to perform a fine grained control.
</description>
</property>
<property>
<name>hive.mapred.mode</name>
<value>nonstrict</value>
<description>The mode in which the hive operations are being
performed.
In strict mode, some risky queries are not allowed to run.
They
include:
Cartesian Product.
No partition being picked up for a
query.
Comparing bigints and strings.
Comparing bigints and doubles.
Orderby without limit.
</description>
</property>
<property>
<name>hive.enforce.bucketmapjoin</name>
<value>false</value>
<description>If the user asked for bucketed map-side join, and it
cannot be performed,
should the query fail or not ? For eg, if the
buckets in the tables being
joined are
not a multiple of each other,
bucketed map-side join cannot be
performed, and the
query will fail if
hive.enforce.bucketmapjoin is set to true.
</description>
</property>
<property>
<name>hive.exec.script.maxerrsize</name>
<value>100000</value>
<description>Maximum number of bytes a script is allowed to emit to
standard error (per map-reduce task). This prevents runaway scripts
from filling logs partitions to capacity
</description>
</property>
<property>
<name>hive.exec.script.allow.partial.consumption</name>
<value>false</value>
<description> When enabled, this option allows a user script to exit
successfully without consuming all the data from the standard input.
</description>
</property>
<property>
<name>hive.script.operator.id.env.var</name>
<value>HIVE_SCRIPT_OPERATOR_ID</value>
<description> Name of the environment variable that holds the unique
script operator ID in the user's transform function (the custom
mapper/reducer that the user has specified in the query)
</description>
</property>
<property>
<name>hive.script.operator.truncate.env</name>
<value>false</value>
<description>Truncate each environment variable for external script in
scripts operator to 20KB (to fit system limits)
</description>
</property>
<property>
<name>hive.exec.compress.output</name>
<value>false</value>
<description> This controls whether the final outputs of a query (to a
local/hdfs file or a hive table) is compressed. The compression codec
and other options are determined from hadoop config variables
mapred.output.compress*
</description>
</property>
<property>
<name>hive.exec.compress.intermediate</name>
<value>false</value>
<description> This controls whether intermediate files produced by
hive between multiple map-reduce jobs are compressed. The compression
codec and other options are determined from hadoop config variables
mapred.output.compress*
</description>
</property>
<property>
<name>hive.exec.parallel</name>
<value>false</value>
<description>Whether to execute jobs in parallel</description>
</property>
<property>
<name>hive.exec.parallel.thread.number</name>
<value>8</value>
<description>How many jobs at most can be executed in parallel
</description>
</property>
<property>
<name>hive.exec.rowoffset</name>
<value>false</value>
<description>Whether to provide the row offset virtual column
</description>
</property>
<property>
<name>hive.task.progress</name>
<value>false</value>
<description>Whether Hive should periodically update task progress
counters during execution. Enabling this allows task progress to be
monitored more closely in the job tracker, but may impose a
performance penalty. This flag is automatically set to true for jobs
with hive.exec.dynamic.partition set to true.
</description>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi-@VERSION@.war</value>
<description>This sets the path to the HWI war file, relative to
${HIVE_HOME}.
</description>
</property>
<property>
<name>hive.hwi.listen.host</name>
<value>0.0.0.0</value>
<description>This is the host address the Hive Web Interface will
listen on
</description>
</property>
<property>
<name>hive.hwi.listen.port</name>
<value>9999</value>
<description>This is the port the Hive Web Interface will listen on
</description>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value></value>
<description>Comma-separated list of pre-execution hooks to be invoked
for each statement. A pre-execution hook is specified as the name of
a Java class which implements the
org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
</description>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value></value>
<description>Comma-separated list of post-execution hooks to be
invoked for each statement. A post-execution hook is specified as the
name of a Java class which implements the
org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
</description>
</property>
<property>
<name>hive.exec.failure.hooks</name>
<value></value>
<description>Comma-separated list of on-failure hooks to be invoked
for each statement. An on-failure hook is specified as the name of
Java class which implements the
org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
</description>
</property>
<property>
<name>hive.metastore.init.hooks</name>
<value></value>
<description>A comma separated list of hooks to be invoked at the
beginning of HMSHandler initialization. Aninit hook is specified as
the name of Java class which extends
org.apache.hadoop.hive.metastore.MetaStoreInitListener.
</description>
</property>
<property>
<name>hive.client.stats.publishers</name>
<value></value>
<description>Comma-separated list of statistics publishers to be
invoked on counters on each job. A client stats publisher is
specified as the name of a Java class which implements the
org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
</description>
</property>
<property>
<name>hive.client.stats.counters</name>
<value></value>
<description>Subset of counters that should be of interest for
hive.client.stats.publishers (when one wants to limit their
publishing). Non-display names should be used
</description>
</property>
<property>
<name>hive.merge.mapfiles</name>
<value>true</value>
<description>Merge small files at the end of a map-only job
</description>
</property>
<property>
<name>hive.merge.mapredfiles</name>
<value>false</value>
<description>Merge small files at the end of a map-reduce job
</description>
</property>
<property>
<name>hive.heartbeat.interval</name>
<value>1000</value>
<description>Send a heartbeat after this interval - used by mapjoin
and filter operators
</description>
</property>
<property>
<name>hive.merge.size.per.task</name>
<value>256000000</value>
<description>Size of merged files at the end of the job</description>
</property>
<property>
<name>hive.merge.smallfiles.avgsize</name>
<value>16000000</value>
<description>When the average output file size of a job is less than
this number, Hive will start an additional map-reduce job to merge
the output files into bigger files. This is only done for map-only
jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
hive.merge.mapredfiles is true.
</description>
</property>
<property>
<name>hive.mapjoin.smalltable.filesize</name>
<value>25000000</value>
<description>The threshold for the input file size of the small
tables; if the file size is smaller than this threshold, it will try
to convert the common join into map join
</description>
</property>
<property>
<name>hive.ignore.mapjoin.hint</name>
<value>true</value>
<description>Ignore the mapjoin hint</description>
</property>
<property>
<name>hive.mapjoin.localtask.max.memory.usage</name>
<value>0.90</value>
<description>This number means how much memory the local task can take
to hold the key/value into in-memory hash table; If the local task's
memory usage is more than this number, the local task will be abort
by themself. It means the data of small table is too large to be hold
in the memory.
</description>
</property>
<property>
<name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
<value>0.55</value>
<description>This number means how much memory the local task can take
to hold the key/value into in-memory hash table when this map join
followed by a group by; If the local task's memory usage is more than
this number, the local task will be abort by themself. It means the
data of small table is too large to be hold in the memory.
</description>
</property>
<property>
<name>hive.mapjoin.check.memory.rows</name>
<value>100000</value>
<description>The number means after how many rows processed it needs
to check the memory usage
</description>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>false</value>
<description>Whether Hive enable the optimization about converting
common join into mapjoin based on the input file size
</description>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
<description>Whether Hive enable the optimization about converting
common join into mapjoin based on the input file
size. If this
paramater is on, and the sum of size for n-1 of the
tables/partitions
for a n-way join is smaller than the
specified size, the join is
directly converted to a mapjoin (there is no
conditional task).
</description>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>10000000</value>
<description>If hive.auto.convert.join.noconditionaltask is off, this
parameter does not take affect. However, if it
is on, and the sum of
size for n-1 of the tables/partitions for a n-way
join is smaller than
this size, the join is directly
converted to a mapjoin(there is no
conditional task). The default is 10MB
</description>
</property>
<property>
<name>hive.optimize.mapjoin.mapreduce</name>
<value>false</value>
<description>If hive.auto.convert.join is off, this parameter does not
take
affect. If it is on, and if there are map-join jobs followed by a
map-reduce
job (for e.g a group by), each map-only job is merged with
the
following
map-reduce job.
</description>
</property>
<property>
<name>hive.script.auto.progress</name>
<value>false</value>
<description>Whether Hive Tranform/Map/Reduce Clause should
automatically send progress information to TaskTracker to avoid the
task getting killed because of inactivity. Hive sends progress
information when the script is outputting to stderr. This option
removes the need of periodically producing stderr messages, but users
should be cautious because this may prevent infinite loops in the
scripts to be killed by TaskTracker.
</description>
</property>
<property>
<name>hive.script.serde</name>
<value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
<description>The default serde for trasmitting input data to and
reading output data from the user scripts.
</description>
</property>
<property>
<name>hive.binary.record.max.length</name>
<value>1000</value>
<description>Read from a binary stream and treat each
hive.binary.record.max.length bytes as a record.
The last record
before the end of stream can have less than
hive.binary.record.max.length bytes
</description>
</property>
<property>
<name>hive.script.recordreader</name>
<value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
<description>The default record reader for reading data from the user
scripts.
</description>
</property>
<property>
<name>hive.script.recordwriter</name>
<value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
<description>The default record writer for writing data to the user
scripts.
</description>
</property>
<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
<description>The default input format. Set this to HiveInputFormat if
you encounter problems with CombineHiveInputFormat.
</description>
</property>
<property>
<name>hive.udtf.auto.progress</name>
<value>false</value>
<description>Whether Hive should automatically send progress
information to TaskTracker when using UDTF's to prevent the task
getting killed because of inactivity. Users should be cautious
because this may prevent TaskTracker from killing tasks with infinte
loops.
</description>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>true</value>
<description>Whether speculative execution for reducers should be
turned on.
</description>
</property>
<property>
<name>hive.exec.counters.pull.interval</name>
<value>1000</value>
<description>The interval with which to poll the JobTracker for the
counters the running job. The smaller it is the more load there will
be on the jobtracker, the higher it is the less granular the caught
will be.
</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/tmp/${user.name}</value>
<description>
Location of Hive run time structured log file
</description>
</property>
<property>
<name>hive.querylog.enable.plan.progress</name>
<value>true</value>
<description>
Whether to log the plan's progress every time a job's
progress is checked.
These logs are written to the location specified
by
hive.querylog.location
</description>
</property>
<property>
<name>hive.querylog.plan.progress.interval</name>
<value>60000</value>
<description>
The interval to wait between logging the plan's progress
in
milliseconds.
If there is a whole number percentage change in the
progress of the
mappers or the reducers,
the progress is logged
regardless of this value.
The actual interval will be the ceiling of
(this value divided by the
value of
hive.exec.counters.pull.interval)
multiplied by the value of hive.exec.counters.pull.interval
I.e. if it
is not divide evenly by the value of
hive.exec.counters.pull.interval
it will be
logged less frequently than specified.
This only has an
effect if hive.querylog.enable.plan.progress is set to
true.
</description>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>false</value>
<description>Whether bucketing is enforced. If true, while inserting
into the table, bucketing is enforced.
</description>
</property>
<property>
<name>hive.enforce.sorting</name>
<value>false</value>
<description>Whether sorting is enforced. If true, while inserting
into the table, sorting is enforced.
</description>
</property>
<property>
<name>hive.optimize.bucketingsorting</name>
<value>true</value>
<description>If hive.enforce.bucketing or hive.enforce.sorting is
true, dont create a reducer for enforcing
bucketing/sorting for
queries of the form:
insert overwrite table T2 select * from T1;
where
T1 and T2 are bucketed/sorted by the same keys into the same number
of buckets.
</description>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>false</value>
<description>If the user asked for sort-merge bucketed map-side join,
and it cannot be performed,
should the query fail or not ?
</description>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>false</value>
<description>Will the join be automatically converted to a sort-merge
join, if the joined tables pass
the criteria for sort-merge join.
</description>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
</name>
<value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
</value>
<description>The policy to choose the big table for automatic
conversion to sort-merge join.
By default, the table with the largest
partitions is assigned the big
table. All policies are:
. based on
position of the table - the leftmost table is selected
org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
. based on
total size (all the partitions selected in the query) of
the table
org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
. based on average size (all the partitions selected in the query) of
the table
org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
New policies can be added in future.
</description>
</property>
<property>
<name>hive.metastore.ds.connection.url.hook</name>
<value></value>
<description>Name of the hook to use for retriving the JDO connection
URL. If empty, the value in javax.jdo.option.ConnectionURL is used
</description>
</property>
<property>
<name>hive.metastore.ds.retry.attempts</name>
<value>1</value>
<description>The number of times to retry a metastore call if there
were a connection error
</description>
</property>
<property>
<name>hive.metastore.ds.retry.interval</name>
<value>1000</value>
<description>The number of miliseconds between metastore retry
attempts
</description>
</property>
<property>
<name>hive.metastore.server.min.threads</name>
<value>200</value>
<description>Minimum number of worker threads in the Thrift server's
pool.
</description>
</property>
<property>
<name>hive.metastore.server.max.threads</name>
<value>100000</value>
<description>Maximum number of worker threads in the Thrift server's
pool.
</description>
</property>
<property>
<name>hive.metastore.server.tcp.keepalive</name>
<value>true</value>
<description>Whether to enable TCP keepalive for the metastore server.
Keepalive will prevent accumulation of half-open connections.
</description>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>false</value>
<description>If true, the metastore thrift interface will be secured
with SASL. Clients must authenticate with Kerberos.
</description>
</property>
<property>
<name>hive.metastore.thrift.framed.transport.enabled</name>
<value>false</value>
<description>If true, the metastore thrift interface will use
TFramedTransport. When false (default) a standard TTransport is used.
</description>
</property>
<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value></value>
<description>The path to the Kerberos Keytab file containing the
metastore thrift server's service principal.
</description>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>hive-metastore/_HOST@EXAMPLE.COM</value>
<description>The service principal for the metastore thrift server.
The special string _HOST will be replaced automatically with the
correct host name.
</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.class</name>
<value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
<description>The delegation token store implementation. Set to
org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
cluster.
</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.connectString
</name>
<value>localhost:2181</value>
<description>The ZooKeeper token store connect string.</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.znode</name>
<value>/hive/cluster/delegation</value>
<description>The root path for token store data.</description>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.acl</name>
<value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
</value>
<description>ACL for token store entries. List comma separated all
server principals for the cluster.
</description>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
</value>
<description>List of comma separated metastore object types that
should be pinned in the cache
</description>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
<description>Remove extra map-reduce jobs if the data is already
clustered by the same key which needs to be used again. This should
always be set to true. Since it is a new feature, it has been made
configurable.
</description>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>4</value>
<description>Reduce deduplication merges two RSs by moving
key/parts/reducer-num of the child RS to parent RS.
That means if
reducer-num of the child RS is fixed (order by or forced
bucketing)
and small, it can make very slow, single MR.
The optimization will be
disabled if number of reducers is less than
specified value.
</description>
</property>
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
<description>Whether or not to allow dynamic partitions in DML/DDL.
</description>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>strict</value>
<description>In strict mode, the user must specify at least one static
partition in case the user accidentally overwrites all partitions.
</description>
</property>
<property>
<name>hive.exec.max.dynamic.partitions</name>
<value>1000</value>
<description>Maximum number of dynamic partitions allowed to be
created in total.
</description>
</property>
<property>
<name>hive.exec.max.dynamic.partitions.pernode</name>
<value>100</value>
<description>Maximum number of dynamic partitions allowed to be
created in each mapper/reducer node.
</description>
</property>
<property>
<name>hive.exec.max.created.files</name>
<value>100000</value>
<description>Maximum number of HDFS files created by all
mappers/reducers in a MapReduce job.
</description>
</property>
<property>
<name>hive.exec.default.partition.name</name>
<value>__HIVE_DEFAULT_PARTITION__</value>
<description>The default partition name in case the dynamic partition
column value is null/empty string or anyother values that cannot be
escaped. This value must not contain any special character used in
HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
dynamic partition value should not contain this value to avoid
confusions.
</description>
</property>
<property>
<name>hive.stats.dbclass</name>
<value>jdbc:derby</value>
<description>The default database that stores temporary hive
statistics.
</description>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
<description>A flag to gather statistics automatically during the
INSERT OVERWRITE command.
</description>
</property>
<property>
<name>hive.stats.jdbcdriver</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
<description>The JDBC driver for the database that stores temporary
hive statistics.
</description>
</property>
<property>
<name>hive.stats.dbconnectionstring</name>
<value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
<description>The default connection string for the database that
stores temporary hive statistics.
</description>
</property>
<property>
<name>hive.stats.default.publisher</name>
<value></value>
<description>The Java class (implementing the StatsPublisher
interface) that is used by default if hive.stats.dbclass is not JDBC
or HBase.
</description>
</property>
<property>
<name>hive.stats.default.aggregator</name>
<value></value>
<description>The Java class (implementing the StatsAggregator
interface) that is used by default if hive.stats.dbclass is not JDBC
or HBase.
</description>
</property>
<property>
<name>hive.stats.jdbc.timeout</name>
<value>30</value>
<description>Timeout value (number of seconds) used by JDBC connection
and statements.
</description>
</property>
<property>
<name>hive.stats.retries.max</name>
<value>0</value>
<description>Maximum number of retries when stats publisher/aggregator
got an exception updating intermediate database. Default is no tries
on failures.
</description>
</property>
<property>
<name>hive.stats.retries.wait</name>
<value>3000</value>
<description>The base waiting window (in milliseconds) before the next
retry. The actual wait time is calculated by baseWindow * failues
baseWindow * (failure 1) * (random number between [0.0,1.0]).
</description>
</property>
<property>
<name>hive.stats.reliable</name>
<value>false</value>
<description>Whether queries will fail because stats cannot be
collected completely accurately.
If this is set to true,
reading/writing from/into a partition may fail
becuase the stats
could
not be computed accurately.
</description>
</property>
<property>
<name>hive.stats.collect.tablekeys</name>
<value>false</value>
<description>Whether join and group by keys on tables are derived and
maintained in the QueryPlan.
This is useful to identify how tables are
accessed and to determine if
they should be bucketed.
</description>
</property>
<property>
<name>hive.stats.collect.scancols</name>
<value>false</value>
<description>Whether column accesses are tracked in the QueryPlan.
This is useful to identify how tables are accessed and to determine
if there are wasted columns that can be trimmed.
</description>
</property>
<property>
<name>hive.stats.ndv.error</name>
<value>20.0</value>
<description>Standard error expressed in percentage. Provides a
tradeoff between accuracy and compute cost.A lower value for error
indicates higher accuracy and a higher compute cost.
</description>
</property>
<property>
<name>hive.stats.key.prefix.max.length</name>
<value>200</value>
<description>
Determines if when the prefix of the key used for
intermediate stats collection
exceeds a certain length, a hash of the
key is used instead. If the
value &lt; 0 then hashing
is never used, if
the value >= 0 then hashing is used only when the key
prefixes length
exceeds that value. The key prefix is defined as everything preceding
the
task ID in the key.
</description>
</property>
<property>
<name>hive.support.concurrency</name>
<value>false</value>
<description>Whether hive supports concurrency or not. A zookeeper
instance must be up and running for the default hive lock manager to
support read-write locks.
</description>
</property>
<property>
<name>hive.lock.numretries</name>
<value>100</value>
<description>The number of times you want to try to get all the locks
</description>
</property>
<property>
<name>hive.unlock.numretries</name>
<value>10</value>
<description>The number of times you want to retry to do one unlock
</description>
</property>
<property>
<name>hive.lock.sleep.between.retries</name>
<value>60</value>
<description>The sleep time (in seconds) between various retries
</description>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value></value>
<description>The list of zookeeper servers to talk to. This is only
needed for read/write locks.
</description>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
<description>The port of zookeeper servers to talk to. This is only
needed for read/write locks.
</description>
</property>
<property>
<name>hive.zookeeper.session.timeout</name>
<value>600000</value>
<description>Zookeeper client's session timeout. The client is
disconnected, and as a result, all locks released, if a heartbeat is
not sent in the timeout.
</description>
</property>
<property>
<name>hive.zookeeper.namespace</name>
<value>hive_zookeeper_namespace</value>
<description>The parent node under which all zookeeper nodes are
created.
</description>
</property>
<property>
<name>hive.zookeeper.clean.extra.nodes</name>
<value>false</value>
<description>Clean extra nodes at the end of the session.
</description>
</property>
<property>
<name>fs.har.impl</name>
<value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
<description>The implementation for accessing Hadoop Archives. Note
that this won't be applicable to Hadoop vers less than 0.20
</description>
</property>
<property>
<name>hive.archive.enabled</name>
<value>false</value>
<description>Whether archiving operations are permitted</description>
</property>
<property>
<name>hive.fetch.output.serde</name>
<value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
<description>The serde used by FetchTask to serialize the fetch
output.
</description>
</property>
<property>
<name>hive.exec.mode.local.auto</name>
<value>false</value>
<description> Let hive determine whether to run in local mode
automatically
</description>
</property>
<property>
<name>hive.exec.drop.ignorenonexistent</name>
<value>true</value>
<description>
Do not report an error if DROP TABLE/VIEW specifies a
non-existent
table/view
</description>
</property>
<property>
<name>hive.exec.show.job.failure.debug.info</name>
<value>true</value>
<description>
If a job fails, whether to provide a link in the CLI to
the task with
the
most failures, along with debugging hints if
applicable.
</description>
</property>
<property>
<name>hive.auto.progress.timeout</name>
<value>0</value>
<description>
How long to run autoprogressor for the script/UDTF
operators (in
seconds).
Set to 0 for forever.
</description>
</property>
<!-- HBase Storage Handler Parameters -->
<property>
<name>hive.hbase.wal.enabled</name>
<value>true</value>
<description>Whether writes to HBase should be forced to the
write-ahead log. Disabling this improves HBase write performance at
the risk of lost writes in case of a crash.
</description>
</property>
<property>
<name>hive.table.parameters.default</name>
<value></value>
<description>Default property values for newly created tables
</description>
</property>
<property>
<name>hive.entity.separator</name>
<value>@</value>
<description>Separator used to construct names of tables and
partitions. For example, dbname@tablename@partitionname
</description>
</property>
<property>
<name>hive.ddl.createtablelike.properties.whitelist</name>
<value></value>
<description>Table Properties to copy over when executing a Create
Table Like.
</description>
</property>
<property>
<name>hive.variable.substitute</name>
<value>true</value>
<description>This enables substitution using syntax like ${var}
${system:var} and ${env:var}.
</description>
</property>
<property>
<name>hive.variable.substitute.depth</name>
<value>40</value>
<description>The maximum replacements the substitution engine will do.
</description>
</property>
<property>
<name>hive.conf.validation</name>
<value>true</value>
<description>Eables type checking for registered hive configurations
</description>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>false</value>
<description>enable or disable the hive client authorization
</description>
</property>
<property>
<name>hive.security.authorization.createtable.user.grants</name>
<value></value>
<description>the privileges automatically granted to some users
whenever a table gets created.
An example like
"userX,userY:select;userZ:create" will grant select
privilege to userX
and userY,
and grant create privilege to userZ whenever a new table
created.
</description>
</property>
<property>
<name>hive.security.authorization.createtable.group.grants</name>
<value></value>
<description>the privileges automatically granted to some groups
whenever a table gets created.
An example like
"groupX,groupY:select;groupZ:create" will grant select
privilege to
groupX and groupY,
and grant create privilege to groupZ whenever a new
table created.
</description>
</property>
<property>
<name>hive.security.authorization.createtable.role.grants</name>
<value></value>
<description>the privileges automatically granted to some roles
whenever a table gets created.
An example like
"roleX,roleY:select;roleZ:create" will grant select
privilege to roleX
and roleY,
and grant create privilege to roleZ whenever a new table
created.
</description>
</property>
<property>
<name>hive.security.authorization.createtable.owner.grants</name>
<value></value>
<description>the privileges automatically granted to the owner
whenever a table gets created.
An example like "select,drop" will
grant select and drop privilege to
the owner of the table
</description>
</property>
<property>
<name>hive.metastore.authorization.storage.checks</name>
<value>false</value>
<description>Should the metastore do authorization checks against the
underlying storage
for operations like drop-partition (disallow the
drop-partition if the
user in
question doesn't have permissions to
delete the corresponding directory
on the storage).
</description>
</property>
<property>
<name>hive.error.on.empty.partition</name>
<value>false</value>
<description>Whether to throw an excpetion if dynamic partition insert
generates empty results.
</description>
</property>
<property>
<name>hive.index.compact.file.ignore.hdfs</name>
<value>false</value>
<description>True the hdfs location stored in the index file will be
igbored at runtime.
If the data got moved or the name of the cluster
got changed, the index
data should still be usable.
</description>
</property>
<property>
<name>hive.optimize.index.filter.compact.minsize</name>
<value>5368709120</value>
<description>Minimum size (in bytes) of the inputs on which a compact
index is automatically used.
</description>
</property>
<property>
<name>hive.optimize.index.filter.compact.maxsize</name>
<value>-1</value>
<description>Maximum size (in bytes) of the inputs on which a compact
index is automatically used.
A negative number is equivalent to
infinity.
</description>
</property>
<property>
<name>hive.index.compact.query.max.size</name>
<value>10737418240</value>
<description>The maximum number of bytes that a query using the
compact index can read. Negative value is equivalent to infinity.
</description>
</property>
<property>
<name>hive.index.compact.query.max.entries</name>
<value>10000000</value>
<description>The maximum number of index entries to read during a
query that uses the compact index. Negative value is equivalent to
infinity.
</description>
</property>
<property>
<name>hive.index.compact.binary.search</name>
<value>true</value>
<description>Whether or not to use a binary search to find the entries
in an index table that match the filter, where possible
</description>
</property>
<property>
<name>hive.exim.uri.scheme.whitelist</name>
<value>hdfs,pfile</value>
<description>A comma separated list of acceptable URI schemes for
import and export.
</description>
</property>
<property>
<name>hive.lock.mapred.only.operation</name>
<value>false</value>
<description>This param is to control whether or not only do lock on
queries
that need to execute at least one mapred job.
</description>
</property>
<property>
<name>hive.limit.row.max.size</name>
<value>100000</value>
<description>When trying a smaller subset of data for simple LIMIT,
how much size we need to guarantee
each row to have at least.
</description>
</property>
<property>
<name>hive.limit.optimize.limit.file</name>
<value>10</value>
<description>When trying a smaller subset of data for simple LIMIT,
maximum number of files we can
sample.
</description>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>false</value>
<description>Whether to enable to optimization to trying a smaller
subset of data for simple LIMIT first.
</description>
</property>
<property>
<name>hive.limit.optimize.fetch.max</name>
<value>50000</value>
<description>Maximum number of rows allowed for a smaller subset of
data for simple LIMIT, if it is a fetch query.
Insert queries are not
restricted by this limit.
</description>
</property>
<property>
<name>hive.rework.mapredwork</name>
<value>false</value>
<description>should rework the mapred work or not.
This is first
introduced by SymlinkTextInputFormat to replace symlink
files with
real paths at compile time.
</description>
</property>
<property>
<name>hive.exec.concatenate.check.index</name>
<value>true</value>
<description>If this sets to true, hive will throw error when doing
'alter table tbl_name [partSpec] concatenate' on a table/partition
that has indexes on it. The reason the user want to set this to true
is because it can help user to avoid handling all index drop,
recreation,
rebuild work. This is very helpful for tables with
thousands of partitions.
</description>
</property>
<property>
<name>hive.sample.seednumber</name>
<value>0</value>
<description>A number used to percentage sampling. By changing this
number, user will change the subsets
of data sampled.
</description>
</property>
<property>
<name>hive.io.exception.handlers</name>
<value></value>
<description>A list of io exception handler class names. This is used
to construct a list exception handlers to handle exceptions thrown
by
record readers
</description>
</property>
<property>
<name>hive.autogen.columnalias.prefix.label</name>
<value>_c</value>
<description>String used as a prefix when auto generating column
alias.
By default the prefix label will be appended with a column
position
number to form the column alias. Auto generation would happen
if an
aggregate function is used in a select clause without an
explicit
alias.
</description>
</property>
<property>
<name>hive.autogen.columnalias.prefix.includefuncname</name>
<value>false</value>
<description>Whether to include function name in the column alias auto
generated by hive.
</description>
</property>
<property>
<name>hive.exec.perf.logger</name>
<value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
<description>The class responsible logging client side performance
metrics. Must be a subclass of
org.apache.hadoop.hive.ql.log.PerfLogger
</description>
</property>
<property>
<name>hive.start.cleanup.scratchdir</name>
<value>false</value>
<description>To cleanup the hive scratchdir while starting the hive
server
</description>
</property>
<property>
<name>hive.output.file.extension</name>
<value></value>
<description>String used as a file extension for output files. If not
set, defaults to the codec extension for text files (e.g. ".gz"), or
no extension otherwise.
</description>
</property>
<property>
<name>hive.insert.into.multilevel.dirs</name>
<value>false</value>
<description>Where to insert into multilevel directories like
"insert
directory '/HIVEFT25686/chinna/' from table"
</description>
</property>
<property>
<name>hive.warehouse.subdir.inherit.perms</name>
<value>false</value>
<description>Set this to true if the the table directories should
inherit the
permission of the warehouse or database directory instead
of being created
with the permissions derived from dfs umask
</description>
</property>
<property>
<name>hive.exec.job.debug.capture.stacktraces</name>
<value>true</value>
<description>Whether or not stack traces parsed from the task logs of
a sampled failed task for
each failed job should be stored in the
SessionState
</description>
</property>
<property>
<name>hive.exec.driver.run.hooks</name>
<value></value>
<description>A comma separated list of hooks which implement
HiveDriverRunHook and will be run at the
beginning and end of
Driver.run, these will be run in the order specified
</description>
</property>
<property>
<name>hive.ddl.output.format</name>
<value>text</value>
<description>
The data format to use for DDL output. One of "text" (for
human
readable text) or "json" (for a json object).
</description>
</property>
<property>
<name>hive.transform.escape.input</name>
<value>false</value>
<description>
This adds an option to escape special chars (newlines,
carriage returns
and
tabs) when they are passed to the user script.
This is useful if the hive
tables
can contain data that contains
special characters.
</description>
</property>
<property>
<name>hive.exec.rcfile.use.explicit.header</name>
<value>true</value>
<description>
If this is set the header for RC Files will simply be
RCF. If this is
not
set the header will be that borrowed from sequence
files, e.g. SEQ-
followed
by the input and output RC File formats.
</description>
</property>
<property>
<name>hive.multi.insert.move.tasks.share.dependencies</name>
<value>false</value>
<description>
If this is set all move tasks for tables/partitions (not
directories)
at the end of a
multi-insert query will only begin once
the dependencies for all these move tasks
have been
met.
Advantages: If
concurrency is enabled, the locks will only be released once the
query has
finished, so with this config enabled, the time when the
table/partition is
generated will be much closer to when the lock on
it is released.
Disadvantages: If concurrency is not enabled, with
this disabled, the
tables/partitions which
are produced by this query
and finish earlier will be available for
querying
much earlier. Since
the locks are only released once the query finishes,
this
does not
apply if concurrency is enabled.
</description>
</property>
<property>
<name>hive.fetch.task.conversion</name>
<value>minimal</value>
<description>
Some select queries can be converted to single FETCH task
minimizing
latency.
Currently the query should be single sourced not
having any subquery and
should not have
any aggregations or distincts
(which incurrs RS), lateral views and
joins.
1. minimal : SELECT STAR,
FILTER on partition columns, LIMIT only
2. more : SELECT, FILTER,
LIMIT only (TABLESAMPLE, virtual columns)
</description>
</property>
<property>
<name>hive.hmshandler.retry.attempts</name>
<value>1</value>
<description>The number of times to retry a HMSHandler call if there
were a connection error
</description>
</property>
<property>
<name>hive.hmshandler.retry.interval</name>
<value>1000</value>
<description>The number of miliseconds between HMSHandler retry
attempts
</description>
</property>
<property>
<name>hive.server.read.socket.timeout</name>
<value>10</value>
<description>Timeout for the HiveServer to close the connection if no
response from the client in N seconds, defaults to 10 seconds.
</description>
</property>
<property>
<name>hive.server.tcp.keepalive</name>
<value>true</value>
<description>Whether to enable TCP keepalive for the Hive server.
Keepalive will prevent accumulation of half-open connections.
</description>
</property>
<property>
<name>hive.decode.partition.name</name>
<value>false</value>
<description>Whether to show the unquoted partition names in query
results.
</description>
</property>
<property>
<name>hive.log4j.file</name>
<value></value>
<description>Hive log4j configuration file.
If the property is not set,
then logging will be initialized using
hive-log4j.properties found on
the classpath.
If the property is set, the value must be a valid URI
(java.net.URI,
e.g. "file:///tmp/my-logging.properties"), which you
can then extract
a URL from and pass to
PropertyConfigurator.configure(URL).
</description>
</property>
<property>
<name>hive.exec.log4j.file</name>
<value></value>
<description>Hive log4j configuration file for execution mode(sub
command).
If the property is not set, then logging will be initialized
using
hive-exec-log4j.properties found on the classpath.
If the
property is set, the value must be a valid URI (java.net.URI,
e.g.
"file:///tmp/my-logging.properties"), which you can then extract
a URL
from and pass to PropertyConfigurator.configure(URL).
</description>
</property>
<property>
<name>hive.exec.infer.bucket.sort</name>
<value>false</value>
<description>
If this is set, when writing partitions, the metadata
will include the
bucketing/sorting
properties with which the data was
written if any (this will not overwrite the
metadata
inherited from the
table if the table is bucketed/sorted)
</description>
</property>
<property>
<name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
<value>false</value>
<description>
If this is set, when setting the number of reducers for
the map reduce
task which writes the
final output files, it will choose
a number which is a power of two,
unless the user specifies
the number
of reducers to use using mapred.reduce.tasks. The number of
reducers
may be set to a power of two, only to be followed by a merge task
meaning preventing
anything from being inferred.
With
hive.exec.infer.bucket.sort set to true:
Advantages: If this is not
set, the number of buckets for partitions will seem
arbitrary,
which
means that the number of mappers used for optimized joins, for
example, will
be very low. With this set, since the number of buckets
used for any
partition is
a power of two, the number of mappers used
for optimized joins will be
the least
number of buckets used by any
partition being joined.
Disadvantages: This may mean a much larger or
much smaller number of reducers being
used in the
final map reduce job,
e.g. if a job was originally going to take 257
reducers,
it will now
take 512 reducers, similarly if the max number of reducers
is 511,
and
a job was going to use this many, it will now use 256 reducers.
</description>
</property>
<property>
<name>hive.groupby.orderby.position.alias</name>
<value>false</value>
<description>Whether to enable using Column Position Alias in Group By
or Order By
</description>
</property>
<property>
<name>hive.server2.thrift.min.worker.threads</name>
<value>5</value>
<description>Minimum number of Thrift worker threads</description>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>100</value>
<description>Maximum number of Thrift worker threads</description>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>Port number of HiveServer2 Thrift interface.
Can be
overridden by setting $HIVE_SERVER2_THRIFT_PORT
</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>localhost</value>
<description>Bind host on which to run the HiveServer2 Thrift
interface.
Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST
</description>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NONE</value>
<description>
Client authentication types.
NONE: no authentication check
LDAP: LDAP/AD based authentication
KERBEROS: Kerberos/GSSAPI
authentication
CUSTOM: Custom authentication provider
(Use with
property hive.server2.custom.authentication.class)
</description>
</property>
<property>
<name>hive.server2.custom.authentication.class</name>
<value></value>
<description>
Custom authentication class. Used when property
'hive.server2.authentication' is set to 'CUSTOM'. Provided class
must
be a proper implementation of the interface
org.apache.hive.service.auth.PasswdAuthenticationProvider.
HiveServer2
will call its Authenticate(user, passed) method to
authenticate requests.
The implementation may optionally extend the
Hadoop's
org.apache.hadoop.conf.Configured class to grab Hive's
Configuration
object.
</description>
</property>
<property>
<name>>hive.server2.authentication.kerberos.principal</name>
<value></value>
<description>
Kerberos server principal
</description>
</property>
<property>
<name>>hive.server2.authentication.kerberos.keytab</name>
<value></value>
<description>
Kerberos keytab file for server principal
</description>
</property>
<property>
<name>hive.server2.authentication.ldap.url</name>
<value></value>
<description>
LDAP connection URL
</description>
</property>
<property>
<name>hive.server2.authentication.ldap.baseDN</name>
<value></value>
<description>
LDAP base DN
</description>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
<description>
Setting this property to true will have hive server2
execute
hive operations as the user making the calls to it.
</description>
</property>
</configuration>