| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Pig configuration file. All values can be overwritten by command line arguments. |
| |
| # Use the "-h properties" command to see description of the properties |
| |
| # log4jconf log4j configuration file |
| # log4jconf=./conf/log4j.properties |
| |
| # a file that contains pig script |
| #file= |
| |
| # load jarfile, colon separated |
| #jar= |
| |
| #verbose print all log messages to screen (default to print only INFO and above to screen) |
| #verbose=true |
| |
| #exectype local|mapreduce, mapreduce is default |
| #exectype=local |
| |
| #the default timezone: if it is not set, the default timezone for this host is used. |
| #the correct timezone format is the UTC offset: e.g., +08:00. |
| #pig.datetime.default.tz= |
| |
| #pig.logfile= |
| |
| #Do not spill temp files smaller than this size (bytes) |
| #pig.spill.size.threshold=5000000 |
| |
| #EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes) |
| #This should help reduce the number of files being spilled. |
| #pig.spill.gc.activation.size=40000000 |
| |
| #the following two parameters are to help estimate the reducer number |
| #pig.exec.reducers.bytes.per.reducer=1000000000 |
| #pig.exec.reducers.max=999 |
| |
| #Logging properties |
| #verbose=false |
| #brief=false |
| #debug=INFO |
| #aggregate.warning=true |
| |
| #Performance tuning properties |
| #pig.cachedbag.memusage=0.2 |
| #pig.skewedjoin.reduce.memusagea=0.3 |
| #pig.exec.nocombiner=false |
| #opt.multiquery=true |
| #pig.tmpfilecompression=false |
| |
| #value can be lzo or gzip |
| #pig.tmpfilecompression.codec=gzip |
| #pig.noSplitCombination=true |
| |
| #pig.exec.mapPartAgg=false |
| #pig.exec.mapPartAgg.minReduction=10 |
| |
| #exectype=mapreduce |
| #pig.additional.jars=<comma seperated list of jars> |
| #udf.import.list=<comma seperated list of imports> |
| #stop.on.failure=false |
| |
| #Use this option only when your Pig job will otherwise die because of |
| #using more counters than hadoop configured limit |
| #pig.disable.counter=true |
| |
| # Use this option to turn on UDF timers. This will cause two |
| # counters to be tracked for every UDF and LoadFunc in your script: |
| # approx_microsecs measures approximate time spent inside a UDF |
| # approx_invocations reports the approximate number of times the UDF was invoked |
| # pig.udf.profile=false |
| |
| #When enabled, 'describe' prints a multi-line formatted schema |
| #(similar to an indended json) rather than on a single line. |
| #pig.pretty.print.schema=true |
| |
| #pig.sql.type=hcat |
| hcat.bin=/usr/local/hcat/bin/hcat |
| |
| ############################ SchemaTuple ############################ |
| |
| # Setting this value will turn on the SchemaTuple feature (PIG-2632) |
| # This will attempt to use code generation for more efficient within |
| # the pig code. This can lead to both CPU, serialization, and memory |
| # benefits (currently, the potential memory benefits are the largest). |
| |
| # This parameter will enable the optimization in all available cases |
| #pig.schematuple=true |
| |
| # Certain cases can be turned off by uncommenting the following. These will |
| # all be off by default, but will all be turned on if pig.schematuple is set |
| # to true. |
| |
| # This will disable SchemaTuples in the case of udfs. Currently, |
| # the input to UDF's will be SchemaTuples. |
| |
| #pig.schematuple.udf=false |
| |
| # This is currently not implemented. In the future, LoadFunc's with known |
| # schema's should output SchemaTuples |
| |
| #pig.schematuple.load=false |
| |
| # This will use SchemaTuples in replicated joins. The potential memory saving |
| # here is significant. It will use SchemaTuples when it builds the HashMap of |
| # the join key to related values. |
| |
| #pig.schematuple.fr_join=false |
| |
| # In the current implementation of merge join, all of the Tuples in the left relation |
| # that share a given key will be stored in a List in memory. This will use SchemaTuples |
| # instead in that List. |
| |
| #pig.schematuple.merge_join=false |
| |
| ##################################################################### |
| |
| ##### Set up optional Pig Progress Notification Listener ############ |
| |
| # Note that only one PPNL can be set up. If you need several, write a PPNL that will chain them. |
| # pig.notification.listener = <fully qualified class name of a PPNL implementation> |
| |
| # Optionally, you can supply a single String argument to pass to your PPNL. |
| # pig.notification.listener.arg = <somevalue> |
| |
| ##################################################################### |
| |
| ########## Override the default Reducer Estimator logic ############# |
| |
| # By default, the logic to estimate the number of reducers to use for a given job lives in: |
| # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.InputSizeReducerEstimator |
| # This logic can be relaced by implementing the following interface: |
| # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigReducerEstimator |
| |
| # This class will be invoked to estimate the number of reducers to use. |
| # pig.exec.reducer.estimator = <fully qualified class name of a PigReducerEstimator implementation> |
| |
| # Optionally, you can supply a single String argument to pass to your PigReducerEstimator. |
| # pig.exec.reducer.estimator.arg = <somevalue> |
| |
| ##################################################################### |
| |
| #pig.load.default.statements= |