| # Licensed to the Apache Software Foundation (ASF) under one | 
 | # or more contributor license agreements.  See the NOTICE file | 
 | # distributed with this work for additional information | 
 | # regarding copyright ownership.  The ASF licenses this file | 
 | # to you under the Apache License, Version 2.0 (the | 
 | # "License"); you may not use this file except in compliance | 
 | # with the License.  You may obtain a copy of the License at | 
 | # | 
 | #  http://www.apache.org/licenses/LICENSE-2.0 | 
 | # | 
 | # Unless required by applicable law or agreed to in writing, | 
 | # software distributed under the License is distributed on an | 
 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
 | # KIND, either express or implied.  See the License for the | 
 | # specific language governing permissions and limitations | 
 | # under the License. | 
 |  | 
 | # Pig configuration file. All values can be overwritten by command line | 
 | # arguments; for a description of the properties, run | 
 | # | 
 | #     pig -h properties | 
 | # | 
 |  | 
 | ############################################################################ | 
 | # | 
 | # == Logging properties | 
 | # | 
 |  | 
 | # Location of pig log file. If blank, a file with a timestamped slug | 
 | # ('pig_1399336559369.log') will be generated in the current working directory. | 
 | # | 
 | # pig.logfile= | 
 | # pig.logfile=/tmp/pig-err.log | 
 |  | 
 | # Log4j configuration file. Set at runtime with the -4 parameter. The source | 
 | # distribution has a ./conf/log4j.properties.template file you can rename and | 
 | # customize. | 
 | # | 
 | # log4jconf=./conf/log4j.properties | 
 |  | 
 | # Verbose Output. | 
 | # * false (default): print only INFO and above to screen | 
 | # * true: Print all log messages to screen | 
 | # | 
 | # verbose=false | 
 |  | 
 | # Omit timestamps on log messages. (default: false) | 
 | # | 
 | # brief=false | 
 |  | 
 | # Logging level. debug=OFF|ERROR|WARN|INFO|DEBUG (default: INFO) | 
 | # | 
 | # debug=INFO | 
 |  | 
 | # Roll up warnings across tasks, so that when millions of mappers suddenly cry | 
 | # out in error they are partially silenced. (default, recommended: true) | 
 | # | 
 | # aggregate.warning=true | 
 |  | 
 | # Should DESCRIBE pretty-print its schema? | 
 | # * false (default): print on a single-line, suitable for pasting back in to your script | 
 | # * true (recommended): prints on multiple lines with indentation, much more readable | 
 | # | 
 | # pig.pretty.print.schema=false | 
 |  | 
 | # === Profiling UDFs  === | 
 |  | 
 | # Turn on UDF timers? This will cause two counters to be | 
 | # tracked for every UDF and LoadFunc in your script: approx_microsecs measures | 
 | # approximate time spent inside a UDF approx_invocations reports the approximate | 
 | # number of times the UDF was invoked. | 
 | # | 
 | # * false (default): do not record timing information of UDFs. | 
 | # * true: report UDF performance. Uses more counters, but gives more insight | 
 | #   into script operation | 
 | # | 
 | # pig.udf.profile=false | 
 |  | 
 | # Specify frequency of profiling (default: every 100th). | 
 | # pig.udf.profile.frequency=100 | 
 |  | 
 | ############################################################################ | 
 | # | 
 | # == Site-specific Properties | 
 | # | 
 |  | 
 | # Execution Mode. Local mode is much faster, but only suitable for small amounts | 
 | # of data. Local mode interprets paths on the local file system; Mapreduce mode | 
 | # on the HDFS. Read more under 'Execution Modes' within the Getting Started | 
 | # documentation. | 
 | # | 
 | # * mapreduce (default): use the Hadoop cluster defined in your Hadoop config files | 
 | # * local: use local mode | 
 | # * tez: use Tez on Hadoop cluster | 
 | # * tez_local: use Tez local mode | 
 | # | 
 | # exectype=mapreduce | 
 |  | 
 | # Bootstrap file with default statements to execute in every Pig job, similar to | 
 | # .bashrc.  If blank, uses the file '.pigbootup' from your home directory; If a | 
 | # value is supplied, that file is NOT loaded.  This does not do tilde expansion | 
 | # -- you must supply the full path to the file. | 
 | # | 
 | # pig.load.default.statements= | 
 | # pig.load.default.statements=/home/bob/.pigrc | 
 |  | 
 | # Kill all waiting/running MR jobs upon a MR job failure? (default: false) If | 
 | # false, jobs that can proceed independently will do so unless a parent stage | 
 | # fails. If true, the failure of any stage in the script kills all jobs. | 
 | # | 
 | # stop.on.failure=false | 
 |  | 
 | # File containing the pig script to run. Rarely set in the properties file. | 
 | # Commandline: -f | 
 | # | 
 | # file= | 
 |  | 
 | # Jarfile to load, colon separated. Rarely used. | 
 | # | 
 | # jar= | 
 |  | 
 | # Register additional .jar files to use with your Pig script. | 
 | # Most typically used as a command line option (see http://pig.apache.org/docs/r0.12.0/basic.html#register): | 
 | # | 
 | #     pig -Dpig.additional.jars=hdfs://nn.mydomain.com:9020/myjars/my.jar | 
 | # | 
 | # pig.additional.jars=<colon separated list of jars with optional wildcards> | 
 | # pig.additional.jars=/usr/local/share/pig/pig/contrib/piggybank/java/piggybank.jar:/usr/local/share/pig/datafu/datafu-pig/build/libs/datafu-pig-1.2.1.jar | 
 |  | 
 | # Specify potential packages to which a UDF or a group of UDFs belong, | 
 | # eliminating the need to qualify the UDF on every call. See | 
 | # http://pig.apache.org/docs/r0.12.0/udf.html#use-short-names | 
 | # | 
 | # Commandline use: | 
 | # | 
 | #     pig \ | 
 | #       -Dpig.additional.jars=$PIG_HOME/contrib/piggybank/java/piggybank.jar:$PIG_HOME/../datafu/datafu-pig/build/libs/datafu-pig-1.2.1.jar \ | 
 | #       -Dudf.import.list=org.apache.pig.piggybank.evaluation:datafu.pig.util \ | 
 | #       happy_job.pig | 
 | # | 
 | # udf.import.list=<colon separated list of imports> | 
 | # udf.import.list=org.apache.pig.piggybank.evaluation:datafu.pig.bags:datafu.pig.hash:datafu.pig.stats:datafu.pig.util | 
 |  | 
 | # | 
 | # Reuse jars across jobs run by the same user? (default: false) If enabled, jars | 
 | # are placed in ${pig.user.cache.location}/${user.name}/.pigcache. Since most | 
 | # jars change infrequently, this gives a minor speedup. | 
 | # | 
 | # pig.user.cache.enabled=false | 
 |  | 
 | # Base path for storing jars cached by the pig.user.cache.enabled feature. (default: /tmp) | 
 | # | 
 | # pig.user.cache.location=/tmp | 
 |  | 
 | # Replication factor for cached jars. If not specified mapred.submit.replication | 
 | # is used, whose default is 10. | 
 | # | 
 | # pig.user.cache.replication=10 | 
 |  | 
 | # Default UTC offset. (default: the host's current UTC offset) Supply a UTC | 
 | # offset in Java's timezone format: e.g., +08:00. | 
 | # | 
 | # pig.datetime.default.tz= | 
 |  | 
 | # Path to download the artifacts when registering ivy coordinates. This defaults | 
 | # to the directory grape uses for downloading libraries. | 
 | # (default: ~/.groovy/grapes) | 
 | # | 
 | # pig.artifacts.download.location= | 
 |  | 
 | ############################################################################ | 
 | # | 
 | # Memory impacting properties | 
 | # | 
 |  | 
 | # Amount of memory (as fraction of heap) allocated to bags before a spill is | 
 | # forced. Default is 0.2, meaning 20% of available memory. Note that this memory | 
 | # is shared across all large bags used by the application. See | 
 | # http://pig.apache.org/docs/r0.12.0/perf.html#memory-management | 
 | # | 
 | # pig.cachedbag.memusage=0.2 | 
 |  | 
 | # Don't spill bags smaller than this size (bytes). Default: 5000000, or about | 
 | # 5MB. Usually, the more spilling the longer runtime, so you might want to tune | 
 | # it according to heap size of each task and so forth. | 
 | # | 
 | # pig.spill.size.threshold=5000000 | 
 |  | 
 | # EXPERIMENTAL: If a file bigger than this size (bytes) is spilled -- thus | 
 | # freeing a bunch of ram -- tell the JVM to perform garbage collection.  This | 
 | # should help reduce the number of files being spilled, but causes more-frequent | 
 | # garbage collection. Default: 40000000 (about 40 MB) | 
 | # | 
 | # pig.spill.gc.activation.size=40000000 | 
 |  | 
 | # Spill will be triggered if the fraction of Old Generation heap exceeds the usage or collection threshold.  | 
 | # For bigger heap sizes, using a fixed size for collection and usage thresholds will | 
 | # utilize memory better than a percentage of the heap. | 
 | # So usage threshold is calculated as  | 
 | #     Max(HeapSize * pig.spill.memory.usage.threshold.fraction, HeapSize - pig.spill.unused.memory.threshold.size) | 
 | # So collection threshold is calculated as  | 
 | #     Max(HeapSize * pig.spill.collection.threshold.fraction, HeapSize - pig.spill.unused.memory.threshold.size) | 
 |  | 
 | # pig.spill.memory.usage.threshold.fraction=0.7 | 
 | # pig.spill.collection.threshold.fraction=0.7  | 
 | # pig.spill.unused.memory.threshold.size=367001600 | 
 |  | 
 | # Maximum amount of data to replicate using the distributed cache when doing | 
 | # fragment-replicated join. (default: 1000000000, about 1GB) Consider increasing | 
 | # this in a production environment, but carefully. | 
 | # | 
 | # pig.join.replicated.max.bytes=1000000000 | 
 |  | 
 | # Fraction of heap available for the reducer to perform a skewed join. A low | 
 | # fraction forces Pig to use more reducers, but increases the copying cost. See | 
 | # http://pig.apache.org/docs/r0.12.0/perf.html#skewed-joins | 
 | # | 
 | # pig.skewedjoin.reduce.memusage=0.3 | 
 |  | 
 | # | 
 | # === SchemaTuple === | 
 | # | 
 | # The SchemaTuple feature (PIG-2632) uses a tuple's schema (when known) to | 
 | # generate a custom Java class to hold records. Otherwise, tuples are loaded as | 
 | # a plain list that is unaware of its contents' schema -- and so each element | 
 | # has to be wrapped as a Java object on its own. This can provide more efficient | 
 | # CPU utilization, serialization, and most of all memory usage. | 
 | # | 
 | # This feature is considered experimental and is off by default. You can | 
 | # selectively enable it for specific operations using pig.schematuple.udf, | 
 | # pig.schematuple.load, pig.schematuple.fr_join and pig.schematuple.merge_join | 
 | # | 
 |  | 
 | # Enable the SchemaTuple optimization in all available cases? (default: false; recommended: true) | 
 | # | 
 | # pig.schematuple=false | 
 |  | 
 | # EXPERIMENTAL: Use SchemaTuples with UDFs (default: value of pig.schematuple). | 
 | # pig.schematuple.udf=false | 
 |  | 
 | # EXPERIMENTAL, CURRENTLY NOT IMPLEMENTED, but in the future, LoadFunc's with | 
 | # known schemas should output SchemaTuples. (default: value of pig.schematuple) | 
 | # pig.schematuple.load=false | 
 |  | 
 | # EXPERIMENTAL: Use SchemaTuples in replicated joins. The potential memory | 
 | # saving here is significant. (default: value of pig.schematuple) | 
 | # pig.schematuple.fr_join=false | 
 |  | 
 | # EXPERIMENTAL: Use SchemaTuples in merge joins. (default: value of pig.schematuple). | 
 | # pig.schematuple.merge_join=false | 
 |  | 
 | ############################################################################ | 
 | # | 
 | # Serialization options | 
 | # | 
 |  | 
 | # Omit empty part files from the output? (default: false) | 
 | # | 
 | # * false (default): reducers generates an output file, even if output is empty | 
 | # * true (recommended): do not generate zero-byte part files | 
 | # | 
 | # The default behavior of MapReduce is to generate an empty file for no data, so | 
 | # Pig follows that. But many small files can cause annoying extra map tasks and | 
 | # put load on the HDFS, so consider setting this to 'true' | 
 | # | 
 | # pig.output.lazy=false | 
 |  | 
 | # | 
 | # === Tempfile Handling | 
 | # | 
 |  | 
 | # EXPERIMENTAL: Storage format for temporary files generated by intermediate | 
 | # stages of Pig jobs. This can provide significant speed increases for certain | 
 | # codecs, as reducing the amount of data transferred to and from disk can more | 
 | # than make up for the cost of compression/compression. Recommend that you set | 
 | # up LZO compression in Hadoop and specify tfile storage. | 
 | # | 
 | # Compress temporary files? | 
 | # * false (default): do not compress | 
 | # * true (recommended): compress temporary files. | 
 | # | 
 | # pig.tmpfilecompression=false | 
 | # pig.tmpfilecompression=true | 
 |  | 
 | # Tempfile storage container type. | 
 | # | 
 | # * tfile (default, recommended): more efficient, but only supports supports gz(gzip) and lzo compression. | 
 | #   https://issues.apache.org/jira/secure/attachment/12396286/TFile%20Specification%2020081217.pdf | 
 | # * seqfile: only supports gz(gzip), lzo, snappy, and bzip2 compression | 
 | # | 
 | # pig.tmpfilecompression.storage=tfile | 
 |  | 
 | # Codec types for intermediate job files. tfile supports gz(gzip) and lzo; | 
 | # seqfile support gz(gzip), lzo, snappy, bzip2 | 
 | # | 
 | # * lzo (recommended with caveats): moderate compression, low cpu burden; | 
 | #   typically leads to a noticeable speedup. Best default choice, but you must | 
 | #   set up LZO independently due to license incompatibility | 
 | # * snappy: moderate compression, low cpu burden; typically leads to a noticeable speedup.. | 
 | # * gz (default): higher compression, high CPU burden. Typically leads to a noticeable slowdown. | 
 | # * bzip2: most compression, major CPU burden. Typically leads to a noticeable slowdown. | 
 | # | 
 | # pig.tmpfilecompression.codec=gzip | 
 |  | 
 | # | 
 | # === Split Combining | 
 | # | 
 |  | 
 | # | 
 | # Should pig try to combine small files for fewer map tasks? This improves the | 
 | # efficiency of jobs with many small input files, reduces the overhead on the | 
 | # jobtracker, and reduces the number of output files a map-only job | 
 | # produces. However, it only works with certain loaders and increases non-local | 
 | # map tasks. See http://pig.apache.org/docs/r0.12.0/perf.html#combine-files | 
 | # | 
 | # * false (default, recommended): _do_ combine files | 
 | # * true: do not combine files | 
 | # | 
 | # pig.noSplitCombination=false | 
 |  | 
 | # | 
 | # Size, in bytes, of data to be processed by a single map. Smaller files are | 
 | # combined untill this size is reached. If unset, defaults to the file system's | 
 | # default block size. | 
 | # | 
 | # pig.maxCombinedSplitSize= | 
 |  | 
 | # ########################################################################### | 
 | # | 
 | # Execution options | 
 | # | 
 |  | 
 | # Should pig omit combiners? (default, recommended: false -- meaning pig _will_ | 
 | # use combiners) | 
 | # | 
 | # When combiners work well, they eliminate a significant amount of | 
 | # data. However, if they do not eliminate much data -- say, a DISTINCT operation | 
 | # that only eliminates 5% of the records -- they add a noticeable overhead to | 
 | # the job. So the recommended default is false (use combiners), selectively | 
 | # disabling them per-job: | 
 | # | 
 | #     pig -Dpig.exec.nocombiner=true distinct_but_not_too_much.pig | 
 | # | 
 | # pig.exec.nocombiner=false | 
 |  | 
 | # Enable or disable use of combiners only in reducer shuffle-merge phase.  | 
 | # pig.exec.nocombiner turns off combiner for both map and reduce phases.  | 
 | # Valid values are auto, true or false. Default is auto in which Pig turns off combiner | 
 | # on per combine plan basis when bags are present in a particular plan. | 
 | # Value of true or false will apply to all combine plans in the script. | 
 | # Currently this only applies to Tez as Mapreduce does not run combiners in reducer (MAPREDUCE-5221). | 
 |  | 
 | # pig.exec.nocombiner.reducer=auto | 
 |  | 
 | # EXPERIMENTAL: Aggregate records in map task before sending to the combiner? | 
 | # (default: false, 10; recommended: true, 10). In cases where there is a massive | 
 | # reduction of data in the aggregation step, pig can do a first pass of | 
 | # aggregation before the data even leaves the mapper, saving much serialization | 
 | # overhead. It's off by default but can give a major improvement to | 
 | # group-and-aggregate operations. Pig skips partial aggregation unless reduction | 
 | # is better than a factor of minReduction (default: 10). See | 
 | # http://pig.apache.org/docs/r0.12.0/perf.html#hash-based-aggregation | 
 | # | 
 | # pig.exec.mapPartAgg=false | 
 | # pig.exec.mapPartAgg.minReduction=10 | 
 |  | 
 | # | 
 | # === Control how many reducers are used. | 
 | # | 
 |  | 
 | # Estimate number of reducers naively using a fixed amount of data per | 
 | # reducer. Optimally, you have both fewer reducers than available reduce slots, | 
 | # and reducers that are neither getting too little data (less than a half-GB or | 
 | # so) nor too much data (more than 2-3 times the reducer child process max heap | 
 | # size). The default of 1000000000 (about 1GB) is probably low for a production | 
 | # cluster -- however it's much worse to set this too high (reducers spill many | 
 | # times over in group-sort) than too low (delay waiting for reduce slots). | 
 | # | 
 | # pig.exec.reducers.bytes.per.reducer=1000000000 | 
 |  | 
 | # | 
 | # Don't ever use more than this many reducers. (default: 999) | 
 | # | 
 | # pig.exec.reducers.max=999 | 
 |  | 
 | # | 
 | # === Local mode for small jobs | 
 | # | 
 |  | 
 | # EXPERIMENTAL: Use local mode for small jobs? If true, jobs with input data | 
 | # size smaller than pig.auto.local.input.maxbytes bytes and one or no reducers | 
 | # are run in local mode, which is much faster. Note that file paths are still | 
 | # interpreted as pig.exectype implies. | 
 | # | 
 | # * true (recommended): allow local mode for small jobs, which is much faster. | 
 | # * false (default): always use pig.exectype. | 
 | # | 
 | # pig.auto.local.enabled=false | 
 |  | 
 | # | 
 | # Definition of a small job for the pig.auto.local.enabled feature. Only jobs | 
 | # with less than this may bytes are candidates to run locally (default: | 
 | # 100000000 bytes, about 1GB) | 
 | # | 
 | # pig.auto.local.input.maxbytes=100000000 | 
 |  | 
 |  | 
 | # | 
 | # Should use hadoop's BZipCodec for bzip2 input? (for PigStorage and TextLoader) | 
 | # Only available for hadoop 2.X and after and ignored for others.(Default: true) | 
 | # | 
 | # pig.bzip.use.hadoop.inputformat=true | 
 |  | 
 |  | 
 | ############################################################################ | 
 | # | 
 | # Security Features | 
 | # | 
 |  | 
 | # Comma-delimited list of commands/operators that are disallowed. This security | 
 | # feature can be used by administrators to block use of certain commands by | 
 | # users. | 
 | # | 
 | # * <blank> (default): all commands and operators are allowed. | 
 | # * fs,set (for example): block all filesystem commands and config changes from pig scripts. | 
 | # | 
 | # pig.blacklist= | 
 | # pig.blacklist=fs,set | 
 |  | 
 | # Comma-delimited list of the only commands/operators that are allowed. This | 
 | # security feature can be used by administrators to block use of certain | 
 | # commands by users. | 
 | # | 
 | # * <blank> (default): all commands and operators not on the pig.blacklist are allowed. | 
 | # * load,store,filter,group: only LOAD, STORE, FILTER, GROUP | 
 | #   from pig scripts. All other commands and operators will fail. | 
 | # | 
 | # pig.whitelist= | 
 | # pig.whitelist=load,store,filter,group | 
 |  | 
 | ##################################################################### | 
 | # | 
 | # Advanced Site-specific Customizations | 
 | # | 
 |  | 
 | # Remove intermediate output files? | 
 | # | 
 | # * true (default, recommended): remove the files | 
 | # * false: do NOT remove the files. You must clean them up yourself. | 
 | # | 
 | # Keeping them is useful for advanced debugging, but can be dangerous -- you | 
 | # must clean them up yourself.  Inspect the intermediate outputs with | 
 | # | 
 | #     LOAD '/path/to/tmp/file' USING org.apache.pig.impl.io.TFileStorage(); | 
 | # | 
 | # (Or ...SequenceFileInterStorage if pig.tmpfilecompression.storage is seqfile) | 
 | # | 
 | # pig.delete.temp.files=true | 
 |  | 
 | # EXPERIMENTAL: A Pig Progress Notification Listener (PPNL) lets you wire pig's | 
 | # progress into your visibility stack. To use a PPNL, supply the fully qualified | 
 | # class name of a PPNL implementation. Note that only one PPNL can be set up, so | 
 | # if you need several, write a PPNL that will chain them. | 
 | # | 
 | # See https://github.com/twitter/ambrose for a pretty awesome one of these | 
 | # | 
 | # pig.notification.listener=<fully qualified class name of a PPNL implementation> | 
 |  | 
 | # String argument to pass to your PPNL constructor (optional). Only a single | 
 | # string value is allowed. (default none) | 
 | # | 
 | # pig.notification.listener.arg=<somevalue> | 
 |  | 
 | # EXPERIMENTAL: Class invoked to estimate the number of reducers to use. | 
 | # (default: org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.InputSizeReducerEstimator) | 
 | # | 
 | # If you don't know how or why to write a PigReducerEstimator, you're unlikely | 
 | # to use this. By default, the naive mapReduceLayer.InputSizeReducerEstimator is | 
 | # used, but you can specify anything implementing the interface | 
 | # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigReducerEstimator | 
 | # | 
 | # pig.exec.reducer.estimator=<fully qualified class name of a PigReducerEstimator implementation> | 
 |  | 
 | # Optional String argument to pass to your PigReducerEstimator. (default: none; | 
 | # a single String argument is allowed). | 
 | # | 
 | # pig.exec.reducer.estimator.arg=<somevalue> | 
 |  | 
 | # Class invoked to report the size of reducers output. By default, the reducers' | 
 | # output is computed as the total size of output files. But not every storage is | 
 | # file-based, and so this logic can be replaced by implementing the interface | 
 | # org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigStatsOutputSizeReader | 
 | # If you need to register more than one reader, you can register them as a comma | 
 | # separated list. Every reader implements a boolean supports(POStore sto) method. | 
 | # When there are more than one reader, they are consulted in order, and the | 
 | # first one whose supports() method returns true will be used. | 
 | # | 
 | # pig.stats.output.size.reader=<fully qualified class name of a PigStatsOutputSizeReader implementation> | 
 | # pig.stats.output.size.reader.unsupported=<comma separated list of StoreFuncs that are not supported by this reader> | 
 |  | 
 | # By default, Pig retrieves TaskReports for every launched task to compute | 
 | # various job statistics. But this can cause OOM if the number of tasks is | 
 | # large. In such case, you can disable it by setting this property to true. | 
 | # pig.stats.notaskreport=false | 
 |  | 
 | # | 
 | # Override hadoop configs programatically | 
 | # | 
 | # By default, Pig expects hadoop configs (hadoop-site.xml and core-site.xml) | 
 | # to be present on the classpath. There are cases when these configs are | 
 | # needed to be passed programatically, such as while using the PigServer API. | 
 | # In such cases, you can override hadoop configs by setting the property | 
 | # "pig.use.overriden.hadoop.configs". | 
 | # | 
 | # When this property is set to true, Pig ignores looking for hadoop configs | 
 | # in the classpath and instead picks it up from Properties/Configuration | 
 | # object passed to it. | 
 | # | 
 | # pig.use.overriden.hadoop.configs=false | 
 |  | 
 | # Implied LoadFunc for the LOAD operation when no USING clause is | 
 | # present. Supply the fully qualified class name of a LoadFunc | 
 | # implementation. Note: setting this means you will have to modify most code | 
 | # brought in from elsewhere on the web, as people generally omit the USING | 
 | # clause for TSV files. | 
 | # | 
 | # * org.apache.pig.builtin.PigStorage (default): the traditional tab-separated-values LoadFunc | 
 | # * my.custom.udfcollection.MyCustomLoadFunc (for example): use MyCustomLoadFunc instead | 
 | # | 
 | # pig.default.load.func=<fully qualified class name of a LoadFunc implementation> | 
 |  | 
 | # The implied StoreFunc for STORE operations with no USING clause. Supply the | 
 | # fully qualified class name of a StoreFunc implementation. | 
 | # | 
 | # * org.apache.pig.builtin.PigStorage (default): the traditional tab-separated-values StoreFunc. | 
 | # * my.custom.udfcollection.MyCustomStoreFunc (for example): use MyCustomStoreFunc instead | 
 | # | 
 | # pig.default.store.func=<fully qualified class name of a StoreFunc implementation> | 
 |  | 
 | # Recover jobs when the application master is restarted? (default: false). This | 
 | # is a Hadoop 2 specific property; enable it to take advantage of AM recovery. | 
 | # | 
 | # pig.output.committer.recovery.support=true | 
 |  | 
 | # Should scripts check to prevent multiple stores writing to the same location? | 
 | # (default: false) When set to true, stops the execution of script right away. | 
 | # | 
 | pig.location.check.strict=false | 
 |  | 
 | # In addition to the fs-style commands (rm, ls, etc) Pig can now execute | 
 | # SQL-style DDL commands, eg "sql create table pig_test(name string, age int)". | 
 | # The only implemented backend is hcat, and luckily that's also the default. | 
 | # | 
 | # pig.sql.type=hcat | 
 |  | 
 | # Path to the hcat executable, for use with pig.sql.type=hcat (default: null) | 
 | # | 
 | hcat.bin=/usr/local/hcat/bin/hcat | 
 |  | 
 | # Enable ATS hook to log the Pig specific ATS entry, disable only when ATS server is not deployed | 
 | pig.ats.enabled=true | 
 |  | 
 | ########################################################################### | 
 | # | 
 | # Overrides for extreme environments | 
 | # | 
 | # (Most people won't have to adjust these parameters) | 
 | # | 
 |  | 
 |  | 
 | # Limit the pig script length placed in the jobconf xml. (default:10240) | 
 | # Extremely long queries can waste space in the JobConf; since its contents are | 
 | # only advisory, the default is fine unless you are retaining it for forensics. | 
 | # | 
 | # pig.script.max.size=10240 | 
 |  | 
 | # Disable use of counters by Pig. Note that the word 'counter' is singular here. | 
 | # | 
 | # * false (default, recommended): do NOT disable counters. | 
 | # * true: disable counters. Set this to true only when your Pig job will | 
 | #   otherwise die because of using more counters than hadoop configured limit | 
 | # | 
 | # pig.disable.counter=true | 
 |  | 
 | # Sample size (per-mapper, in number of rows) the ORDER..BY operation's | 
 | # RandomSampleLoader uses to estimate how your data should be | 
 | # partitioned. (default, recommended: 100 rows per task) Increase this if you | 
 | # have exceptionally large input splits and are unhappy with the reducer skew. | 
 | # | 
 | # pig.random.sampler.sample.size=100 | 
 |  | 
 | # Process an entire script at once, reducing the amount of work and number of | 
 | # tasks? (default, recommended: true) See http://pig.apache.org/docs/r0.12.0/perf.html#multi-query-execution | 
 | # | 
 | # MultiQuery optimization is very useful, and so the recommended default is | 
 | # true. You may find a that a script fails to compile under MultiQuery. If so, | 
 | # disable it at runtime: | 
 | # | 
 | #     pig -no_multiquery script_that_makes_pig_sad.pig | 
 | # | 
 | # opt.multiquery=true | 
 |  | 
 | # For small queries, fetch data directly from the HDFS. (default, recommended: | 
 | # true). If you want to force Pig to launch a MR job, for example when you're | 
 | # testing a live cluster, disable with the -N option. See PIG-3642. | 
 | # | 
 | # opt.fetch=true | 
 |  | 
 | ######################################################################### | 
 | # | 
 | # Error Handling Properties | 
 | # | 
 | # By default, Pig job fails immediately on encountering an errors on writing Tuples for Store. | 
 | # If you want Pig to allow certain errors before failing you can set this property. | 
 | # If the propery is set to true and the StoreFunc implements ErrorHandling if will allow configurable errors  | 
 | # based on the OutputErrorHandler implementation   | 
 | # pig.error-handling.enabled = false | 
 | # | 
 | # Controls the minimum number of errors for store | 
 | # pig.error-handling.min.error.records = 0 | 
 | # | 
 | # Set the threshold for percentage of errors | 
 | # pig.error-handling.error.threshold = 0.0f | 
 |  | 
 | ########################################################################### | 
 | # | 
 | # Streaming properties | 
 | # | 
 |  | 
 | # Define what properties will be set in the streaming environment. Just set this | 
 | # property to a comma-delimited list of properties to set, and those properties | 
 | # will be set in the environment. | 
 | # | 
 | # pig.streaming.environment=<comma-delimited list of propertes> | 
 |  | 
 | # Specify a comma-delimited list of local files to ship to distributed cache for | 
 | # streaming job. | 
 | # | 
 | # pig.streaming.ship.files=<comma-delimited list of local files> | 
 |  | 
 | # Specify a comma-delimited list of remote files to cache on distributed cache | 
 | # for streaming job. | 
 | # | 
 | # pig.streaming.cache.files=<comma-delimited list of remote files> | 
 |  | 
 | # Specify the python command to be used for python streaming udf. By default, | 
 | # python is used, but you can overwrite it with a non-default version such as | 
 | # python2.7. | 
 | # | 
 | # pig.streaming.udf.python.command=python | 
 |  | 
 | ########################################################################### | 
 | # | 
 | # Tez specific properties | 
 | # | 
 |  | 
 | # Enable auto/grace parallelism in tez. Default is true and these should be  | 
 | # used by default unless you encounter some bug in automatic parallelism. | 
 | # If pig.tez.auto.parallelism is set to false, 1 is used as default parallelism | 
 |  | 
 | #pig.tez.auto.parallelism=true | 
 | #pig.tez.grace.parallelism=true | 
 |  | 
 | # Union optimization (pig.tez.opt.union=true) in tez uses vertex groups to store | 
 | # output from different vertices into one final output location. | 
 | # If a StoreFunc's OutputCommitter does not work with multiple vertices | 
 | # writing to same location, then you can disable union optimization just | 
 | # for that StoreFunc. Refer PIG-4649. You can also specify a whitelist of StoreFuncs | 
 | # that are known to work with multiple vertices writing to same location instead of a blacklist | 
 |  | 
 | #pig.tez.opt.union.unsupported.storefuncs=org.apache.hcatalog.pig.HCatStorer,org.apache.hive.hcatalog.pig.HCatStorer | 
 | #pig.tez.opt.union.supported.storefuncs= | 
 |  | 
 |  | 
 | # Pig only reads once from datasource for LoadFuncs specified here during sort instead of | 
 | # loading once for sampling and loading again for partitioning. | 
 | # Used to avoid hitting external non-filesystem datasources like HBase and Accumulo twice. | 
 |       | 
 | pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage | 
 |  | 
 | # If set, Pig will override tez.am.launch.cmd-opts and tez.am.resource.memory.mb to optimal | 
 | # even they are set to a different value. Default value is true. | 
 | #pig.tez.configure.am.memory=false |