| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Pig configuration file. All values can be overwritten by command line arguments. |
| |
| # Use the "-h properties" command to see description of the properties |
| |
| # log4jconf log4j configuration file |
| # log4jconf=./conf/log4j.properties |
| |
| # a file that contains pig script |
| #file= |
| |
| # load jarfile, colon separated |
| #jar= |
| |
| #verbose print all log messages to screen (default to print only INFO and above to screen) |
| #verbose=true |
| |
| #exectype local|mapreduce, mapreduce is default |
| #exectype=local |
| |
| #the default timezone: if it is not set, the default timezone for this host is used. |
| #the correct timezone format is the UTC offset: e.g., +08:00. |
| #pig.datetime.default.tz= |
| |
| #pig.logfile= |
| |
| #Do not spill temp files smaller than this size (bytes) |
| #pig.spill.size.threshold=5000000 |
| |
| #EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes) |
| #This should help reduce the number of files being spilled. |
| #pig.spill.gc.activation.size=40000000 |
| |
| #the following two parameters are to help estimate the reducer number |
| #pig.exec.reducers.bytes.per.reducer=1000000000 |
| #pig.exec.reducers.max=999 |
| |
| #Logging properties |
| #verbose=false |
| #brief=false |
| #debug=INFO |
| #aggregate.warning=true |
| |
| #Performance tuning properties |
| #pig.cachedbag.memusage=0.2 |
| #pig.skewedjoin.reduce.memusagea=0.3 |
| #pig.exec.nocombiner=false |
| #opt.multiquery=true |
| #pig.tmpfilecompression=false |
| |
| #value can be lzo or gzip |
| #pig.tmpfilecompression.codec=gzip |
| #pig.noSplitCombination=true |
| |
| #pig.exec.mapPartAgg=false |
| #pig.exec.mapPartAgg.minReduction=10 |
| |
| #exectype=mapreduce |
| #pig.additional.jars=<comma seperated list of jars> |
| #udf.import.list=<comma seperated list of imports> |
| #stop.on.failure=false |
| |
| #Use this option only when your Pig job will otherwise die because of |
| #using more counters than hadoop configured limit |
| #pig.disable.counter=true |
| |
| # Use this option to turn on UDF timers. This will cause two |
| # counters to be tracked for every UDF and LoadFunc in your script: |
| # approx_microsecs measures approximate time spent inside a UDF |
| # approx_invocations reports the approximate number of times the UDF was invoked |
| # pig.udf.profile=false |
| |
| #When enabled, 'describe' prints a multi-line formatted schema |
| #(similar to an indended json) rather than on a single line. |
| #pig.pretty.print.schema=true |
| |
| #pig.sql.type=hcat |
| hcat.bin=/usr/local/hcat/bin/hcat |
| |
| ############################ SchemaTuple ############################ |
| |
| # Setting this value will turn on the SchemaTuple feature (PIG-2632) |
| # This will attempt to use code generation for more efficient within |
| # the pig code. This can lead to both CPU, serialization, and memory |
| # benefits (currently, the potential memory benefits are the largest). |
| |
| # This parameter will enable the optimization in all available cases |
| #pig.schematuple=true |
| |
| # Certain cases can be turned off by uncommenting the following. These will |
| # all be off by default, but will all be turned on if pig.schematuple is set |
| # to true. |
| |
| # This will disable SchemaTuples in the case of udfs. Currently, |
| # the input to UDF's will be SchemaTuples. |
| |
| #pig.schematuple.udf=false |
| |
| # This is currently not implemented. In the future, LoadFunc's with known |
| # schema's should output SchemaTuples |
| |
| #pig.schematuple.load=false |
| |
| # This will use SchemaTuples in replicated joins. The potential memory saving |
| # here is significant. It will use SchemaTuples when it builds the HashMap of |
| # the join key to related values. |
| |
| #pig.schematuple.fr_join=false |
| |
| # In the current implementation of merge join, all of the Tuples in the left relation |
| # that share a given key will be stored in a List in memory. This will use SchemaTuples |
| # instead in that List. |
| |
| #pig.schematuple.merge_join=false |
| |
| ##################################################################### |
| |
| ##### Set up optional Pig Progress Notification Listener ############ |
| |
| # Note that only one PPNL can be set up. If you need several, write a PPNL that will chain them. |
| # pig.notification.listener = <fully qualified class name of a PPNL implementation> |
| |
| # Optionally, you can supply a single String argument to pass to your PPNL. |
| # pig.notification.listener.arg = <somevalue> |
| |
| ##################################################################### |
| |
| ########## Override the default Reducer Estimator logic ############# |
| |
| # By default, the logic to estimate the number of reducers to use for a given job lives in: |
| # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.InputSizeReducerEstimator |
| # This logic can be replaced by implementing the following interface: |
| # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigReducerEstimator |
| |
| # This class will be invoked to estimate the number of reducers to use. |
| # pig.exec.reducer.estimator = <fully qualified class name of a PigReducerEstimator implementation> |
| |
| # Optionally, you can supply a single String argument to pass to your PigReducerEstimator. |
| # pig.exec.reducer.estimator.arg = <somevalue> |
| |
| ##################################################################### |
| |
| ###### Override the default Pig Stats Output Size Reader logic ###### |
| |
| # By default, the size of reducers output is computed as the total size of |
| # output files. But since not every storage is file-based, this logic is not |
| # always applicable. If that is the case, the logic can be replaced by |
| # implementing the following interface: |
| # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigStatsOutputSizeReader |
| |
| # This class will be invoked to compute the size of reducers output. |
| # pig.stats.output.size.reader = <fully qualified class name of a PigStatsOutputSizeReader implementation> |
| |
| # If you need to register more than one reader, you can register them as a comma |
| # separated list. Every reader implements a boolean supports(POStore sto) method. |
| # When there are more than one reader, they are consulted in order, and the |
| # first one whose supports() method returns true will be used. |
| # |
| ##################################################################### |
| |
| #pig.load.default.statements= |
| |
| ##################################################################### |
| |
| ########### Override hadoop configs programatically ################# |
| |
| # By default, Pig expects hadoop configs (hadoop-site.xml and core-site.xml) |
| # to be present on the classpath. There are cases when these configs are |
| # needed to be passed programatically, such as while using the PigServer API. |
| # In such cases, you can override hadoop configs by setting the property |
| # "pig.use.overriden.hadoop.configs". |
| # |
| # When this property is set to true, Pig ignores looking for hadoop configs |
| # in the classpath and instead picks it up from Properties/Configuration |
| # object passed to it. |
| |
| # pig.use.overriden.hadoop.configs=false |
| # |
| ###################################################################### |
| |
| # Check if the script needs to check multiple stores writing |
| # to the same location. When set to true, stops the execution |
| # of script right away. |
| pig.location.check.strict=false |
| |
| ###################################################################### |
| |
| # This key is used to define the default load func. Pig will fallback |
| # on PigStorage as default in case this is undefined. |
| |
| # pig.default.load.func=<fully qualified class name of a LoadFunc implementation> |
| # For eg, pig.default.load.func=org.apache.pig.custom.MyCustomStorage |
| |
| # This key is used to define the default store func. Pig will fallback |
| # on PigStorage as default in case this is undefined. |
| |
| # pig.default.store.func=<fully qualified class name of a StoreFunc implementation> |
| # For eg, pig.default.store.func=org.apache.pig.custom.MyCustomStorage |
| |
| # This option is used to define whether to support recovery to handle the |
| # application master getting restarted. |
| # pig.output.committer.recovery.support=true |
| |
| # Set this option to true if you need to use the old partition filter optimizer. |
| # Note: Old filter optimizer PColFilterOptimizer will be deprecated in the future. |
| # pig.exec.useOldPartitionFilterOptimize=true |