blob: 130ae4f5bbab848b206720275e1aaba5ffe9852d [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Pig configuration file. All values can be overwritten by command line arguments.
# Use the "-h properties" command to see description of the properties
# log4jconf log4j configuration file
# log4jconf=./conf/log4j.properties
# a file that contains pig script
#file=
# load jarfile, colon separated
#jar=
#verbose print all log messages to screen (default to print only INFO and above to screen)
#verbose=true
#exectype local|mapreduce, mapreduce is default
#exectype=local
#the default timezone: if it is not set, the default timezone for this host is used.
#the correct timezone format is the UTC offset: e.g., +08:00.
#pig.datetime.default.tz=
#pig.logfile=
#Do not spill temp files smaller than this size (bytes)
#pig.spill.size.threshold=5000000
#EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes)
#This should help reduce the number of files being spilled.
#pig.spill.gc.activation.size=40000000
#the following two parameters are to help estimate the reducer number
#pig.exec.reducers.bytes.per.reducer=1000000000
#pig.exec.reducers.max=999
#Logging properties
#verbose=false
#brief=false
#debug=INFO
#aggregate.warning=true
#Performance tuning properties
#pig.cachedbag.memusage=0.2
#pig.skewedjoin.reduce.memusagea=0.3
#pig.exec.nocombiner=false
#opt.multiquery=true
#pig.tmpfilecompression=false
#value can be lzo or gzip
#pig.tmpfilecompression.codec=gzip
#pig.noSplitCombination=true
#pig.exec.mapPartAgg=false
#pig.exec.mapPartAgg.minReduction=10
#exectype=mapreduce
#pig.additional.jars=<comma seperated list of jars>
#udf.import.list=<comma seperated list of imports>
#stop.on.failure=false
#Use this option only when your Pig job will otherwise die because of
#using more counters than hadoop configured limit
#pig.disable.counter=true
# Use this option to turn on UDF timers. This will cause two
# counters to be tracked for every UDF and LoadFunc in your script:
# approx_microsecs measures approximate time spent inside a UDF
# approx_invocations reports the approximate number of times the UDF was invoked
# pig.udf.profile=false
#When enabled, 'describe' prints a multi-line formatted schema
#(similar to an indended json) rather than on a single line.
#pig.pretty.print.schema=true
#pig.sql.type=hcat
hcat.bin=/usr/local/hcat/bin/hcat
############################ SchemaTuple ############################
# Setting this value will turn on the SchemaTuple feature (PIG-2632)
# This will attempt to use code generation for more efficient within
# the pig code. This can lead to both CPU, serialization, and memory
# benefits (currently, the potential memory benefits are the largest).
# This parameter will enable the optimization in all available cases
#pig.schematuple=true
# Certain cases can be turned off by uncommenting the following. These will
# all be off by default, but will all be turned on if pig.schematuple is set
# to true.
# This will disable SchemaTuples in the case of udfs. Currently,
# the input to UDF's will be SchemaTuples.
#pig.schematuple.udf=false
# This is currently not implemented. In the future, LoadFunc's with known
# schema's should output SchemaTuples
#pig.schematuple.load=false
# This will use SchemaTuples in replicated joins. The potential memory saving
# here is significant. It will use SchemaTuples when it builds the HashMap of
# the join key to related values.
#pig.schematuple.fr_join=false
# In the current implementation of merge join, all of the Tuples in the left relation
# that share a given key will be stored in a List in memory. This will use SchemaTuples
# instead in that List.
#pig.schematuple.merge_join=false
#####################################################################
##### Set up optional Pig Progress Notification Listener ############
# Note that only one PPNL can be set up. If you need several, write a PPNL that will chain them.
# pig.notification.listener = <fully qualified class name of a PPNL implementation>
# Optionally, you can supply a single String argument to pass to your PPNL.
# pig.notification.listener.arg = <somevalue>
#####################################################################
########## Override the default Reducer Estimator logic #############
# By default, the logic to estimate the number of reducers to use for a given job lives in:
# org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.InputSizeReducerEstimator
# This logic can be relaced by implementing the following interface:
# org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigReducerEstimator
# This class will be invoked to estimate the number of reducers to use.
# pig.exec.reducer.estimator = <fully qualified class name of a PigReducerEstimator implementation>
# Optionally, you can supply a single String argument to pass to your PigReducerEstimator.
# pig.exec.reducer.estimator.arg = <somevalue>
#####################################################################
#pig.load.default.statements=