| # Pig configuration file. All values can be overwritten by command line arguments. |
| |
| # Use the "-h properties" command to see description of the properties |
| |
| # log4jconf log4j configuration file |
| # log4jconf=./conf/log4j.properties |
| |
| # a file that contains pig script |
| #file= |
| |
| # load jarfile, colon separated |
| #jar= |
| |
| #verbose print all log messages to screen (default to print only INFO and above to screen) |
| #verbose=true |
| |
| #exectype local|mapreduce, mapreduce is default |
| #exectype=local |
| |
| #pig.logfile= |
| |
| #Do not spill temp files smaller than this size (bytes) |
| #pig.spill.size.threshold=5000000 |
| #EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes) |
| #This should help reduce the number of files being spilled. |
| #pig.spill.gc.activation.size=40000000 |
| |
| #the following two parameters are to help estimate the reducer number |
| #pig.exec.reducers.bytes.per.reducer=1000000000 |
| #pig.exec.reducers.max=999 |
| |
| #Logging properties |
| #verbose=false |
| #brief=false |
| #debug=INFO |
| #aggregate.warning=true |
| |
| #Performance tuning properties |
| #pig.cachedbag.memusage=0.2 |
| #pig.skewedjoin.reduce.memusagea=0.3 |
| #pig.exec.nocombiner=false |
| #opt.multiquery=true |
| #pig.tmpfilecompression=false |
| |
| #value can be lzo or gzip |
| #pig.tmpfilecompression.codec=gzip |
| #pig.noSplitCombination=true |
| #pig.exec.mapPartAgg=false |
| #pig.exec.mapPartAgg.minReduction=10 |
| |
| |
| #exectype=mapreduce |
| #pig.additional.jars=<comma seperated list of jars> |
| #udf.import.list=<comma seperated list of imports> |
| #stop.on.failure=false |
| |
| #Use this option only when your Pig job will otherwise die because of |
| #using more counter than hadoop configured limit |
| #pig.disable.counter=true |