data/conf/iceberg/llap/hive-site.xml - hive - Git at Google

 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
    this work for additional information regarding copyright ownership.
    The ASF licenses this file to You under the Apache License, Version 2.0
    (the "License"); you may not use this file except in compliance with
    the License.  You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
 -->

 <configuration>

     <property>
         <name>hive.in.test</name>
         <value>true</value>
         <description>Internal marker for test. Used for masking env-dependent values</description>
     </property>

     <!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
     <!-- that are implied by Hadoop setup variables.                                                -->
     <!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
     <!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
     <!-- resource).                                                                                 -->

     <!-- Hive Execution Parameters -->
     <property>
         <name>hadoop.tmp.dir</name>
         <value>${test.tmp.dir}/hadoop-tmp</value>
         <description>A base for other temporary directories.</description>
     </property>

     <!-- in LLAP mode hive.tez.container.size isn't used to actually determine container size, however while -->
     <!-- calculating available slots (in split generation) in it's used through -->
     <!-- the Vertex resource, so this has to be defined in order to get consistent test results -->
     <property>
       <name>hive.tez.container.size</name>
       <value>128</value>
     </property>

     <property>
         <name>hive.merge.tezfiles</name>
         <value>false</value>
         <description>Merge small files at the end of a Tez DAG</description>
     </property>

     <property>
         <name>hive.tez.input.format</name>
         <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
         <description>The default input format for tez. Tez groups splits in the AM.</description>
     </property>

     <property>
         <name>hive.exec.scratchdir</name>
         <value>${test.tmp.dir}/scratchdir</value>
         <description>Scratch space for Hive jobs</description>
     </property>

     <property>
         <name>datanucleus.schema.autoCreateAll</name>
         <value>true</value>
     </property>

     <property>
         <name>datanucleus.connectionPool.maxPoolSize</name>
         <value>4</value>
     </property>

     <property>
         <name>hive.exec.local.scratchdir</name>
         <value>${test.tmp.dir}/localscratchdir/</value>
         <description>Local scratch space for Hive jobs</description>
     </property>

     <property>
         <name>javax.jdo.option.ConnectionURL</name>
         <value>jdbc:derby:memory:${test.tmp.dir}/junit_metastore_db;create=true</value>
     </property>

     <property>
         <name>hive.metastore.schema.verification</name>
         <value>false</value>
     </property>

     <property>
         <name>javax.jdo.option.ConnectionDriverName</name>
         <value>org.apache.derby.jdbc.EmbeddedDriver</value>
     </property>

     <property>
         <name>javax.jdo.option.ConnectionUserName</name>
         <value>APP</value>
     </property>

     <property>
         <name>javax.jdo.option.ConnectionPassword</name>
         <value>mine</value>
     </property>

     <property>
         <!--  this should eventually be deprecated since the metastore should supply this -->
         <name>hive.metastore.warehouse.dir</name>
         <value>${test.warehouse.dir}</value>
         <description></description>
     </property>

     <property>
         <name>test.log.dir</name>
         <value>${test.tmp.dir}/log/</value>
         <description></description>
     </property>

     <property>
         <name>test.data.files</name>
         <value>${hive.root}/data/files</value>
         <description></description>
     </property>

     <property>
         <name>test.data.scripts</name>
         <value>${hive.root}/data/scripts</value>
         <description></description>
     </property>

     <property>
         <name>hive.jar.path</name>
         <value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value>
         <description></description>
     </property>

     <property>
         <name>hive.metastore.rawstore.impl</name>
         <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
         <description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
     </property>

     <property>
         <name>hive.querylog.location</name>
         <value>${test.tmp.dir}/tmp</value>
         <description>Location of the structured hive logs</description>
     </property>

     <property>
         <name>hive.exec.pre.hooks</name>
         <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
         <description>Pre Execute Hook for Tests</description>
     </property>

     <property>
         <name>hive.exec.post.hooks</name>
         <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
         <description>Post Execute Hook for Tests</description>
     </property>

     <property>
         <name>hive.support.concurrency</name>
         <value>false</value>
         <description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
     </property>

     <property>
         <name>fs.pfile.impl</name>
         <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
         <description>A proxy for local file system used for cross file system testing</description>
     </property>

     <property>
         <name>hive.exec.mode.local.auto</name>
         <value>false</value>
         <description>
             Let hive determine whether to run in local mode automatically
             Disabling this for tests so that minimr is not affected
         </description>
     </property>

     <property>
         <name>hive.auto.convert.join</name>
         <value>false</value>
         <description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description>
     </property>

     <property>
         <name>hive.ignore.mapjoin.hint</name>
         <value>true</value>
         <description>Whether Hive ignores the mapjoin hint</description>
     </property>

     <property>
         <name>io.sort.mb</name>
         <value>10</value>
     </property>

     <property>
         <name>hive.input.format</name>
         <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
         <description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
     </property>

     <property>
         <name>hive.default.rcfile.serde</name>
         <value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
         <description>The default SerDe hive will use for the rcfile format</description>
     </property>

     <property>
         <name>hive.stats.dbclass</name>
         <value>fs</value>
         <description>The default storatge that stores temporary hive statistics. Currently, fs type is supported</description>
     </property>

     <property>
         <name>hive.stats.fetch.column.stats</name>
         <value>true</value>
         <description>Use column stats to annotate stats for physical optimization phase</description>
     </property>

     <property>
         <name>hive.execution.engine</name>
         <value>tez</value>
         <description>Whether to use MR or Tez</description>
     </property>

     <property>
         <name>tez.am.node-blacklisting.enabled</name>
         <value>false</value>
     </property>

     <property>
         <name>hive.prewarm.enabled</name>
         <value>true</value>
         <description>
             Enables container prewarm for tez (hadoop 2 only)
         </description>
     </property>

     <property>
         <name>hive.prewarm.numcontainers</name>
         <value>3</value>
         <description>
             Controls the number of containers to prewarm for tez (hadoop 2 only)
         </description>
     </property>

     <property>
         <name>hive.in.tez.test</name>
         <value>true</value>
         <description>
             Indicates that we are in tez testing mode.
         </description>
     </property>

     <property>
         <name>hive.execution.mode</name>
         <value>llap</value>
     </property>

     <!--property>
          This is determined by the cluster type, and is set in code - HadoopShims, etc.
       <name>hive.llap.execution.mode</name>
       <value>all</value>
     </property-->

     <property>
         <name>hive.tez.java.opts</name>
         <value> -Dlog4j.configurationFile=tez-container-log4j2.properties -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
     </property>

     <property>
         <name>tez.am.launch.cmd-opts</name>
         <value> -Dlog4j.configurationFile=tez-container-log4j2.properties -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
     </property>

     <property>
         <name>hive.tez.exec.print.summary</name>
         <value>true</value>
     </property>

     <property>
         <name>hive.llap.cache.allow.synthetic.fileid</name>
         <value>true</value>
     </property>

     <property>
         <name>hive.llap.io.allocator.direct</name>
         <value>false</value>
     </property>

     <property>
         <name>hive.explain.user</name>
         <value>false</value>
     </property>

     <property>
         <name>hive.orc.splits.ms.footer.cache.enabled</name>
         <value>true</value>
     </property>

     <property>
         <name>hive.join.inner.residual</name>
         <value>true</value>
     </property>

     <property>
         <name>hive.llap.daemon.service.hosts</name>
         <value>localhost</value>
     </property>

     <property>
         <name>hive.llap.daemon.service.port</name>
         <value>0</value>
     </property>

     <property>
         <name>hive.llap.daemon.num.executors</name>
         <value>1</value>
     </property>

     <property>
         <name>hive.llap.io.threadpool.size</name>
         <value>1</value>
     </property>

     <property>
         <name>hive.llap.daemon.task.scheduler.wait.queue.size</name>
         <value>4</value>
     </property>

     <property>
         <name>hive.llap.cache.allow.synthetic.fileid</name>
         <value>true</value>
     </property>

     <!-- hadoop IPC options -->
     <property>
         <name>ipc.client.low-latency</name>
         <value>true</value>
         <description> </description>
     </property>
     <property>
         <name>ipc.client.tcpnodelay</name>
         <value>true</value>
         <description> </description>
     </property>
     <property>
         <name>ipc.clients-per-factory</name>
         <value>4</value>
         <description> </description>
     </property>

     <property>
         <name>hive.stats.fetch.bitvector</name>
         <value>true</value>
     </property>

     <property>
         <name>hive.tez.cartesian-product.enabled</name>
         <value>true</value>
     </property>

     <property>
         <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
         <value>99</value>
     </property>

     <property>
         <name>hive.query.results.cache.enabled</name>
         <value>false</value>
     </property>

     <property>
         <name>hive.query.reexecution.stats.persist.scope</name>
         <value>query</value>
     </property>

     <property>
         <name>hive.cbo.fallback.strategy</name>
         <value>TEST</value>
     </property>

     <property>
         <name>iceberg.hive.keep.stats</name>
         <value>true</value>
         <description>
             We want we keep the stats in Hive sessions.
         </description>
     </property>

     <property>
         <name>hive.txn.xlock.ctas</name>
         <value>false</value>
     </property>

 </configuration>
	<?xml version="1.0"?>
	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
	<!--
	Licensed to the Apache Software Foundation (ASF) under one or more
	contributor license agreements. See the NOTICE file distributed with
	this work for additional information regarding copyright ownership.
	The ASF licenses this file to You under the Apache License, Version 2.0
	(the "License"); you may not use this file except in compliance with
	the License. You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	-->

	<configuration>

	<property>
	<name>hive.in.test</name>
	<value>true</value>
	<description>Internal marker for test. Used for masking env-dependent values</description>
	</property>

	<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
	<!-- that are implied by Hadoop setup variables. -->
	<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
	<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
	<!-- resource). -->

	<!-- Hive Execution Parameters -->
	<property>
	<name>hadoop.tmp.dir</name>
	<value>${test.tmp.dir}/hadoop-tmp</value>
	<description>A base for other temporary directories.</description>
	</property>

	<!-- in LLAP mode hive.tez.container.size isn't used to actually determine container size, however while -->
	<!-- calculating available slots (in split generation) in it's used through -->
	<!-- the Vertex resource, so this has to be defined in order to get consistent test results -->
	<property>
	<name>hive.tez.container.size</name>
	<value>128</value>
	</property>

	<property>
	<name>hive.merge.tezfiles</name>
	<value>false</value>
	<description>Merge small files at the end of a Tez DAG</description>
	</property>

	<property>
	<name>hive.tez.input.format</name>
	<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
	<description>The default input format for tez. Tez groups splits in the AM.</description>
	</property>

	<property>
	<name>hive.exec.scratchdir</name>
	<value>${test.tmp.dir}/scratchdir</value>
	<description>Scratch space for Hive jobs</description>
	</property>

	<property>
	<name>datanucleus.schema.autoCreateAll</name>
	<value>true</value>
	</property>

	<property>
	<name>datanucleus.connectionPool.maxPoolSize</name>
	<value>4</value>
	</property>

	<property>
	<name>hive.exec.local.scratchdir</name>
	<value>${test.tmp.dir}/localscratchdir/</value>
	<description>Local scratch space for Hive jobs</description>
	</property>

	<property>
	<name>javax.jdo.option.ConnectionURL</name>
	<value>jdbc:derby:memory:${test.tmp.dir}/junit_metastore_db;create=true</value>
	</property>

	<property>
	<name>hive.metastore.schema.verification</name>
	<value>false</value>
	</property>

	<property>
	<name>javax.jdo.option.ConnectionDriverName</name>
	<value>org.apache.derby.jdbc.EmbeddedDriver</value>
	</property>

	<property>
	<name>javax.jdo.option.ConnectionUserName</name>
	<value>APP</value>
	</property>

	<property>
	<name>javax.jdo.option.ConnectionPassword</name>
	<value>mine</value>
	</property>

	<property>
	<!-- this should eventually be deprecated since the metastore should supply this -->
	<name>hive.metastore.warehouse.dir</name>
	<value>${test.warehouse.dir}</value>
	<description></description>
	</property>

	<property>
	<name>test.log.dir</name>
	<value>${test.tmp.dir}/log/</value>
	<description></description>
	</property>

	<property>
	<name>test.data.files</name>
	<value>${hive.root}/data/files</value>
	<description></description>
	</property>

	<property>
	<name>test.data.scripts</name>
	<value>${hive.root}/data/scripts</value>
	<description></description>
	</property>

	<property>
	<name>hive.jar.path</name>
	<value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value>
	<description></description>
	</property>

	<property>
	<name>hive.metastore.rawstore.impl</name>
	<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
	<description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
	</property>

	<property>
	<name>hive.querylog.location</name>
	<value>${test.tmp.dir}/tmp</value>
	<description>Location of the structured hive logs</description>
	</property>

	<property>
	<name>hive.exec.pre.hooks</name>
	<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
	<description>Pre Execute Hook for Tests</description>
	</property>

	<property>
	<name>hive.exec.post.hooks</name>
	<value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
	<description>Post Execute Hook for Tests</description>
	</property>

	<property>
	<name>hive.support.concurrency</name>
	<value>false</value>
	<description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
	</property>

	<property>
	<name>fs.pfile.impl</name>
	<value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
	<description>A proxy for local file system used for cross file system testing</description>
	</property>

	<property>
	<name>hive.exec.mode.local.auto</name>
	<value>false</value>
	<description>
	Let hive determine whether to run in local mode automatically
	Disabling this for tests so that minimr is not affected
	</description>
	</property>

	<property>
	<name>hive.auto.convert.join</name>
	<value>false</value>
	<description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description>
	</property>

	<property>
	<name>hive.ignore.mapjoin.hint</name>
	<value>true</value>
	<description>Whether Hive ignores the mapjoin hint</description>
	</property>

	<property>
	<name>io.sort.mb</name>
	<value>10</value>
	</property>

	<property>
	<name>hive.input.format</name>
	<value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
	<description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
	</property>

	<property>
	<name>hive.default.rcfile.serde</name>
	<value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
	<description>The default SerDe hive will use for the rcfile format</description>
	</property>

	<property>
	<name>hive.stats.dbclass</name>
	<value>fs</value>
	<description>The default storatge that stores temporary hive statistics. Currently, fs type is supported</description>
	</property>

	<property>
	<name>hive.stats.fetch.column.stats</name>
	<value>true</value>
	<description>Use column stats to annotate stats for physical optimization phase</description>
	</property>

	<property>
	<name>hive.execution.engine</name>
	<value>tez</value>
	<description>Whether to use MR or Tez</description>
	</property>

	<property>
	<name>tez.am.node-blacklisting.enabled</name>
	<value>false</value>
	</property>

	<property>
	<name>hive.prewarm.enabled</name>
	<value>true</value>
	<description>
	Enables container prewarm for tez (hadoop 2 only)
	</description>
	</property>

	<property>
	<name>hive.prewarm.numcontainers</name>
	<value>3</value>
	<description>
	Controls the number of containers to prewarm for tez (hadoop 2 only)
	</description>
	</property>

	<property>
	<name>hive.in.tez.test</name>
	<value>true</value>
	<description>
	Indicates that we are in tez testing mode.
	</description>
	</property>

	<property>
	<name>hive.execution.mode</name>
	<value>llap</value>
	</property>

	<!--property>
	This is determined by the cluster type, and is set in code - HadoopShims, etc.
	<name>hive.llap.execution.mode</name>
	<value>all</value>
	</property-->

	<property>
	<name>hive.tez.java.opts</name>
	<value> -Dlog4j.configurationFile=tez-container-log4j2.properties -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
	</property>

	<property>
	<name>tez.am.launch.cmd-opts</name>
	<value> -Dlog4j.configurationFile=tez-container-log4j2.properties -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
	</property>

	<property>
	<name>hive.tez.exec.print.summary</name>
	<value>true</value>
	</property>

	<property>
	<name>hive.llap.cache.allow.synthetic.fileid</name>
	<value>true</value>
	</property>

	<property>
	<name>hive.llap.io.allocator.direct</name>
	<value>false</value>
	</property>

	<property>
	<name>hive.explain.user</name>
	<value>false</value>
	</property>

	<property>
	<name>hive.orc.splits.ms.footer.cache.enabled</name>
	<value>true</value>
	</property>

	<property>
	<name>hive.join.inner.residual</name>
	<value>true</value>
	</property>

	<property>
	<name>hive.llap.daemon.service.hosts</name>
	<value>localhost</value>
	</property>

	<property>
	<name>hive.llap.daemon.service.port</name>
	<value>0</value>
	</property>

	<property>
	<name>hive.llap.daemon.num.executors</name>
	<value>1</value>
	</property>

	<property>
	<name>hive.llap.io.threadpool.size</name>
	<value>1</value>
	</property>

	<property>
	<name>hive.llap.daemon.task.scheduler.wait.queue.size</name>
	<value>4</value>
	</property>

	<property>
	<name>hive.llap.cache.allow.synthetic.fileid</name>
	<value>true</value>
	</property>

	<!-- hadoop IPC options -->
	<property>
	<name>ipc.client.low-latency</name>
	<value>true</value>
	<description> </description>
	</property>
	<property>
	<name>ipc.client.tcpnodelay</name>
	<value>true</value>
	<description> </description>
	</property>
	<property>
	<name>ipc.clients-per-factory</name>
	<value>4</value>
	<description> </description>
	</property>

	<property>
	<name>hive.stats.fetch.bitvector</name>
	<value>true</value>
	</property>

	<property>
	<name>hive.tez.cartesian-product.enabled</name>
	<value>true</value>
	</property>

	<property>
	<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
	<value>99</value>
	</property>

	<property>
	<name>hive.query.results.cache.enabled</name>
	<value>false</value>
	</property>

	<property>
	<name>hive.query.reexecution.stats.persist.scope</name>
	<value>query</value>
	</property>

	<property>
	<name>hive.cbo.fallback.strategy</name>
	<value>TEST</value>
	</property>

	<property>
	<name>iceberg.hive.keep.stats</name>
	<value>true</value>
	<description>
	We want we keep the stats in Hive sessions.
	</description>
	</property>

	<property>
	<name>hive.txn.xlock.ctas</name>
	<value>false</value>
	</property>

	</configuration>