Merge latest trunk changes git-svn-id: https://svn.apache.org/repos/asf/pig/branches/tez@1598376 13f79535-47bb-0310-9956-ffa450edef68

commit: e7abdf57c2281eb2833e83b2f97d0889b3b22ea1 [log] [tgz]
author: Jianyong Dai <daijy@apache.org> Thu May 29 19:19:41 2014 +0000
committer: Jianyong Dai <daijy@apache.org> Thu May 29 19:19:41 2014 +0000
tree: 4dad72d92b62c43fd8f6e530954d7122fb7477ba
parent: d0d83cb4b074b11e99856242e7f53bd874d5edeb [diff]
diff --git a/CHANGES.txt b/CHANGES.txt
index c8889b2..9d9571e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt

@@ -28,6 +28,8 @@
  
 BUG FIXES
 
+PIG-3968: OperatorPlan.serialVersionUID is not defined (daijy)
+
 Release 0.13.0 - Unreleased
  
 INCOMPATIBLE CHANGES
@@ -42,6 +44,14 @@
 
 IMPROVEMENTS
 
+PIG-3954: Document use of user level jar cache (aniket486)
+
+PIG-3752: Fix e2e Parallel test for Windows (daijy)
+
+PIG-3966: Document variable input arguments of UDFs (lbendig via aniket486)
+
+PIG-3963: Documentation for BagToString UDF (mrflip via daijy)
+
 PIG-3929: pig.temp.dir should allow to substitute vars as hadoop configuration does (aniket486)
 
 PIG-3913: Pig should use job's jobClient wherever possible (fixes local mode counters) (aniket486)
@@ -149,6 +159,30 @@
  
 BUG FIXES
 
+PIG-3915: MapReduce queries in Pigmix outputs different results than Pig's (keren3000 via daijy)
+
+PIG-3955: Remove url.openStream() file descriptor leak from JCC (aniket486)
+
+PIG-3958: TestMRJobStats is broken in 0.13 and trunk (aniket486)
+
+PIG-3949: HiveColumnarStorage compile failure with Hive 0.14.0 (daijy)
+
+PIG-3960: Compile fail against Hadoop 2.4.0 after PIG-3913 (daijy)
+
+PIG-3956: UDF profile is often misleading (cheolsoo)
+
+PIG-3950: Removing empty file PColFilterExtractor.java speeds up rebuilds (mrflip via cheolsoo)
+
+PIG-3940: NullPointerException writing .pig_header for field with null name in JsonMetadata.java (mrflip via cheolsoo)
+
+PIG-3944: PigNullableWritable toString method throws NPE on null value (mauzhang via cheolsoo)
+
+PIG-3936: DBStorage fails on storing nulls for non varchar columns (jeremykarn via cheolsoo) 
+
+PIG-3945: Ant not sending hadoopversion to piggybank sub-ant (mrflip via cheolsoo)
+
+PIG-3942: Util.buildPp() is incompatible with Non-MR execution engine (cheolsoo)
+
 PIG-3902: PigServer creates cycle (thedatachef via cheolsoo)
 
 PIG-3930: "java.io.IOException: Cannot initialize Cluster" in local mode with hadoopversion=23 dependencies (jira.shegalov via cheolsoo)

diff --git a/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/HiveColumnarStorage.java b/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/HiveColumnarStorage.java
index d6f12c9..22ccc29 100644
--- a/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/HiveColumnarStorage.java
+++ b/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/HiveColumnarStorage.java

@@ -103,8 +103,8 @@
         for (int i = 0; i < sz && i < numColumns; i++) {
 
             putField(byteStream, t.get(i));
-            colValRefs[i].set(byteStream.getData(), startPos, byteStream.getCount() - startPos);
-            startPos = byteStream.getCount();
+            colValRefs[i].set(byteStream.getData(), startPos, byteStream.getLength() - startPos);
+            startPos = byteStream.getLength();
         }
 
         try {

diff --git a/src/docs/src/documentation/content/xdocs/admin.xml b/src/docs/src/documentation/content/xdocs/admin.xml
index d8febd4..fdfdd32 100644
--- a/src/docs/src/documentation/content/xdocs/admin.xml
+++ b/src/docs/src/documentation/content/xdocs/admin.xml

@@ -31,5 +31,24 @@
             <p>Specifically this makes sense for file-based output locations (HDFS, Local FS, S3..) to avoid Pig script from failing when multiple MR jobs write to the same location. </p>   
             <p>To enforce strict checking of output location, set <strong>pig.location.check.strict=true</strong>. See also <a href="start.html#properties">Pig Properties</a> on how to set this property.</p>
     </section>
-  </body>
-</document>
\ No newline at end of file
+
+<!-- DISABLE PIG COMMANDS AND OPERATORS -->
+    <section>
+       <title>Disabling Pig commands and operators</title>
+           <p>This is an admin feature providing ability to blacklist or/and whitelist certain commands and operations. Pig exposes a few of these that could be not very safe in a multitenant environment. For example, "sh" invokes shell commands, "set" allows users to change non-final configs. While these are tremendously useful in general, having an ability to disable would make Pig a safer platform. The goal is to allow administrators to be able to have more control over user scripts. Default behaviour would still be the same - no filters applied on commands and operators.</p>
+           <p>There are two properties you can use to control what users are able to do</p>
+               <ul>
+                  <li>pig.blacklist</li>
+                  <li>pig.whitelist</li>
+               </ul>
+           <h4>Blacklisting</h4>
+             <p>Set "pig.blacklist" to a comma-delimited set of operators and commands. For eg, <i>pig.blacklist=rm,kill,cross</i> would disable users from executing any of "rm", "kill" commands and "cross" operator.</p>
+
+           <h4>Whitelisting</h4>
+             <p>This is an even safer approach to disallowing functionality in Pig. Using this you will be able to disable all commands and operators that are not a part of the whitelist. For eg, <i>pig.whitelist=load,filter,store</i> will disallow every command and operator other than "load", "filter" and "store". </p>
+             <h4>Note</h4>
+               <p>There should not be any conflicts between blacklist and whitelist. Make sure to have them entirely distinct or Pig will complain.</p>
+    </section>
+
+</body>
+</document>

diff --git a/src/docs/src/documentation/content/xdocs/func.xml b/src/docs/src/documentation/content/xdocs/func.xml
index cf79e80..3ce593b 100644
--- a/src/docs/src/documentation/content/xdocs/func.xml
+++ b/src/docs/src/documentation/content/xdocs/func.xml

@@ -194,18 +194,118 @@
    </table>
    <p>* Average values for datatypes bigdecimal and biginteger have precision setting <a href="http://docs.oracle.com/javase/7/docs/api/java/math/MathContext.html#DECIMAL128">java.math.MathContext.DECIMAL128</a>.</p>
    </section></section>
+
+<!-- ======================================================== -->
+
+<section id="bagtostring">
+  <title>BagToString</title>
+  <p>Concatenate the elements of a Bag into a chararray string, placing an optional delimiter between each value.</p>
+
+  <section>
+    <title>Syntax</title>
+    <table>
+      <tr>
+        <td>
+          <p>BagToString(vals:bag [, delimiter:chararray])</p>
+        </td>
+      </tr>
+  </table></section>
+
+  <section>
+    <title>Terms</title>
+    <table>
+      <tr>
+	<td><p>vals</p></td>
+        <td><p>A bag of arbitrary values. They will each be cast to chararray if they are not already.</p></td>
+      </tr>
+      <tr>
+	<td><p>delimiter</p></td>
+        <td><p>A chararray value to place between elements of the bag; defaults to underscore <code>'_'</code>.</p></td>
+      </tr>
+    </table>
+  </section>
+
+  <section>
+    <title>Usage</title>
+    <p>BagToString creates a single string from the elements of a bag, similar to SQL's <code>GROUP_CONCAT</code> function. Keep in mind the following:</p>
+    <ul>
+      <li>Bags can be of arbitrary size, while strings in Java cannot: you will either exhaust available memory or exceed the maximum number of characters (about 2 billion). One of the worst features a production job can have is thresholding behavior: everything will seem nearly fine until the data size of your largest bag grows from nearly-too-big to just-barely-too-big.</li>
+      <li>Bags are disordered unless you explicitly apply a nested <code>ORDER BY</code> operation as demonstrated below. A nested <code>FOREACH</code> will preserve ordering, letting you order by one combination of fields then project out just the values you'd like to concatenate.</li>
+      <li>The default string conversion is applied to each element. If the bags contents are not atoms (tuple, map, etc), this may be not be what you want. Use a nested <code>FOREACH</code> to format values and then compose them with BagToString as shown below</li>
+    </ul>
+    <p>Examples:</p>
+    <table>
+      <tr><th>vals</th> <th>delimiter</th> <th>BagToString(vals, delimiter)</th> <th>Notes</th> </tr>
+      <tr> <td><code>{('BOS'),('NYA'),('BAL')}</code></td> <td><code></code></td> <td><code>BOS_NYA_BAL</code></td> <td>If only one argument is given, the field is delimited with underscore characters</td></tr>
+      <tr> <td><code>{('BOS'),('NYA'),('BAL')}</code></td> <td><code>'|'</code></td> <td><code>BOS|NYA|BAL</code></td> <td>But you can supply your own delimiter</td></tr>
+      <tr> <td><code>{('BOS'),('NYA'),('BAL')}</code></td> <td><code>''</code></td> <td><code>BOSNYABAL</code></td> <td>Use an explicit empty string to just smush everything together</td></tr>
+      <tr> <td><code>{(1),(2),(3)}</code></td> <td><code>'|'</code></td> <td><code>1|2|3</code></td> <td>Elements are type-converted for you (but see examples below)</td></tr>
+    </table>
+  </section>
+  <section>
+    <title>Examples</title>
+    <p>Simple delimited strings are simple:</p>
+<source>
+team_parks = LOAD 'team_parks' AS (team_id:chararray, park_id:chararray, years:bag{(year_id:int)});
+
+-- BOS     BOS07   {(1995),(1997),(1996),(1998),(1999)}
+-- NYA     NYC16   {(1995),(1999),(1998),(1997),(1996)}
+-- NYA     NYC17   {(1998)}
+-- SDN     HON01   {(1997)}
+-- SDN     MNT01   {(1996),(1999)}
+-- SDN     SAN01   {(1999),(1997),(1998),(1995),(1996)}
+
+team_parkslist = FOREACH (GROUP team_parks BY team_id) GENERATE
+  group AS team_id, BagToString(team_parks.park_id, ';');
+
+-- BOS     BOS07
+-- NYA     NYC17;NYC16
+-- SDN     SAN01;MNT01;HON01
+</source>
+
+<p>The default handling of complex elements works, but probably isn't what you want.</p>
+<source>
+team_parkyearsugly = FOREACH (GROUP team_parks BY team_id) GENERATE
+  group AS team_id,
+  BagToString(team_parks.(park_id, years));
+
+-- BOS     BOS07_{(1995),(1997),(1996),(1998),(1999)}
+-- NYA     NYC17_{(1998)}_NYC16_{(1995),(1999),(1998),(1997),(1996)}
+-- SDN     SAN01_{(1999),(1997),(1998),(1995),(1996)}_MNT01_{(1996),(1999)}_HON01_{(1997)}
+</source>
+
+<p>Instead, assemble it in pieces. In step 2, we sort on one field but process another; it remains in the sorted order.</p>
+<source>
+team_park_yearslist = FOREACH team_parks {
+  years_o = ORDER years BY year_id;
+  GENERATE team_id, park_id, SIZE(years_o) AS n_years, BagToString(years_o, '/') AS yearslist;
+};
+team_parkyearslist = FOREACH (GROUP team_park_yearslist BY team_id) {
+  tpy_o = ORDER team_park_yearslist BY n_years DESC, park_id ASC;
+  tpy_f = FOREACH tpy_o GENERATE CONCAT(park_id, ':', yearslist);
+  GENERATE group AS team_id, BagToString(tpy_f, ';');
+  };
+
+-- BOS     BOS07:1995/1996/1997/1998/1999
+-- NYA     NYC16:1995/1996/1997/1998/1999;NYC17:1998
+-- SDN     SAN01:1995/1996/1997/1998/1999;MNT01:1996/1999;HON01:1997
+</source>
+
+  </section>
+</section>
+
    
    <!-- ++++++++++++++++++++++++++++++++++++++++++++++ --> 
    <section id="concat">
    <title>CONCAT</title>
-   <p>Concatenates two expressions of identical type.</p>
+   <p>Concatenates two or more expressions of identical type.</p>
    
    <section>
    <title>Syntax</title>
    <table>
        <tr>
             <td>
-               <p>CONCAT (expression, expression)</p>
+               <p>CONCAT (expression, expression, [...expression])</p>
             </td>
          </tr> 
    </table>
@@ -227,14 +327,14 @@
    
    <section>
    <title>Usage</title>
-   <p>Use the CONCAT function to concatenate two expressions. The result values of the two expressions must have identical types.</p>
-   <p>If either subexpression is null, the resulting expression is null.</p>
+   <p>Use the CONCAT function to concatenate two or more expressions. The result values of the expressions must have identical types.</p>
+   <p>If any subexpression is null, the resulting expression is null.</p>
    </section>
 
    
    <section>
    <title>Example</title>
-   <p>In this example fields f2 and f3 are concatenated.</p>
+   <p>In this example, fields f1, an underscore string literal, f2 and f3 are concatenated.</p>
 <source>
 A = LOAD 'data' as (f1:chararray, f2:chararray, f3:chararray);
 
@@ -243,12 +343,12 @@
 (hadoop,map,reduce)
 (pig,pig,latin)
 
-X = FOREACH A GENERATE CONCAT(f2,f3);
+X = FOREACH A GENERATE CONCAT(f1, '_', f2,f3);
 
 DUMP X;
-(opensource)
-(mapreduce)
-(piglatin)
+(apache_opensource)
+(hadoop_mapreduce)
+(pig_piglatin)
 </source>
 </section>
 </section>

diff --git a/src/docs/src/documentation/content/xdocs/perf.xml b/src/docs/src/documentation/content/xdocs/perf.xml
index a7c9b45..ce3f36f 100644
--- a/src/docs/src/documentation/content/xdocs/perf.xml
+++ b/src/docs/src/documentation/content/xdocs/perf.xml

@@ -1093,6 +1093,24 @@
 </p>
 
 </section>
+
+<!-- +++++++++++++++++++++++++++++++ -->
+<section id="user-jar-cache">
+<title>User Jar Cache</title>
+<p>Jars required for user defined functions (UDFs) are copied to distributed cache by pig to make them available on task nodes. To put these jars on distributed cache, pig clients copy these jars to HDFS under a temporary location. For scheduled jobs, these jars do not change frequently. Also, creating a lot of small jar files on HDFS is not HDFS friendly. To avoid copying these small jar files to HDFS again and again, pig allows users to configure a user level jar cache (readable only to the user for security reasons). If pig.user.cache.enabled flag is set to true, UDF jars are copied to jar cache location (configurable) under a directory named with the hash (SHA) of the jar. Hash of the jar is used to identify the existence of the jar in subsequent uses of the jar by the user. If a jar with same hash and filename is found in the cache, it is used avoiding copy of the jar to hdfs.</p>
+
+<p>You can set the values for these properties in order to configure the jar cache:</p>
+
+<ul>
+<li>pig.user.cache.enabled - Turn on/off user jar cache feature (false by default).</li>
+<li>pig.user.cache.location - Path on HDFS that will be used a staging directory for the user jar cache (defaults to pig.temp.dir or /tmp).</li>
+</ul>
+<p></p>
+<p>
+User jar cache feature is fail safe. If jars cannot be copied to jar cache due to any permission/configuration problems, pig will default old behavior.
+</p>
+
+</section>
 </section>
   
 <!-- ==================================================================== -->

diff --git a/src/docs/src/documentation/content/xdocs/pig-index.xml b/src/docs/src/documentation/content/xdocs/pig-index.xml
index 3cae450..e8a803a 100644
--- a/src/docs/src/documentation/content/xdocs/pig-index.xml
+++ b/src/docs/src/documentation/content/xdocs/pig-index.xml

@@ -136,10 +136,13 @@
 <br></br>&nbsp;&nbsp;&nbsp; <a href="basic.html#bag-schema">and schemas</a>
 <br></br>&nbsp;&nbsp;&nbsp; <a href="func.html#tobag">and TOBAG function</a>
 <br></br>&nbsp;&nbsp;&nbsp; <a href="basic.html#type-construction">and type construction operators</a>
+<br></br>&nbsp;&nbsp;&nbsp; <a href="func.html#bagtostring">converting to string</a>
 <br></br>&nbsp;&nbsp;&nbsp; <a href="basic.html#schema-multi">schemas for multiple types</a>
 <br></br>&nbsp;&nbsp;&nbsp; <a href="basic.html#bag">syntax</a>
 </p>
 
+<p><a href="func.html#bagtostring">BagToString</a> function</p>
+
 <p><a href="start.html#batch-mode">batch mode</a>. <em>See also</em> memory management</p>
 
 <p><a href="basic.html#arithmetic">bincond operator</a> ( ?: )</p>
@@ -180,6 +183,8 @@
 
 <p><a href="func.html#ceil">CEIL</a> function</p>
 
+<p>chararray functions (see <a href="func.html#string-functions">String Functions</a>)</p>
+
 <p><a href="udf.html#checkschema">checkSchema</a> method</p>
 
 <p><a href="basic.html#cogroup">COGROUP</a> operator</p>

diff --git a/src/docs/src/documentation/content/xdocs/udf.xml b/src/docs/src/documentation/content/xdocs/udf.xml
index 0c84356..9f10561 100644
--- a/src/docs/src/documentation/content/xdocs/udf.xml
+++ b/src/docs/src/documentation/content/xdocs/udf.xml

@@ -807,7 +807,17 @@
 B = FOREACH A GENERATE myudfs.UPPER(name);
 DUMP B;
 </source>
-
+<p>Variable-length arguments: </p>
+<p>The last input schema field in <code>getArgToFuncMapping()</code> can be marked as vararg, which enables UDF writers to create UDFs that take variable length arguments. This is done by overriding
+the <code>getSchemaType()</code> method:
+</p>
+<source>
+@Override
+public SchemaType getSchemaType() {
+    return SchemaType.VARARG;
+}
+</source>
+<p>For an example see <a href="http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/CONCAT.java?view=markup">CONCAT</a>.</p>
 </section>
 
 <!-- +++++++++++++++++++++++++++++++++++++++++++++++++ -->

diff --git a/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java b/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
index 98552ea..0439f8f 100644
--- a/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
+++ b/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java

@@ -1612,39 +1612,44 @@
     private static Path getFromCache(PigContext pigContext,
             Configuration conf,
             URL url) throws IOException {
+        InputStream is1 = null;
+        InputStream is2 = null;
+        OutputStream os = null;
+
         try {
             Path stagingDir = getCacheStagingDir(conf);
             String filename = FilenameUtils.getName(url.getPath());
 
-            String checksum = DigestUtils.shaHex(url.openStream());
+            is1 = url.openStream();
+            String checksum = DigestUtils.shaHex(is1);
             FileSystem fs = FileSystem.get(conf);
             Path cacheDir = new Path(stagingDir, checksum);
             Path cacheFile = new Path(cacheDir, filename);
             if (fs.exists(cacheFile)) {
-               log.info("Found " + url + " in jar cache at "+ stagingDir);
-               long curTime = System.currentTimeMillis();
-               fs.setTimes(cacheFile, -1, curTime);
-               return cacheFile;
+                log.debug("Found " + url + " in jar cache at "+ cacheDir);
+                long curTime = System.currentTimeMillis();
+                fs.setTimes(cacheFile, -1, curTime);
+                return cacheFile;
             }
-            log.info("Url "+ url + " was not found in jarcache at "+ stagingDir);
+            log.info("Url "+ url + " was not found in jarcache at "+ cacheDir);
             // attempt to copy to cache else return null
             fs.mkdirs(cacheDir, FileLocalizer.OWNER_ONLY_PERMS);
-            OutputStream os = null;
-            InputStream is = null;
-            try {
-                os = FileSystem.create(fs, cacheFile, FileLocalizer.OWNER_ONLY_PERMS);
-                is = url.openStream();
-                IOUtils.copyBytes(is, os, 4096, true);
-            } finally {
-                org.apache.commons.io.IOUtils.closeQuietly(is);
-                // IOUtils should not close stream to HDFS quietly
-                os.close();
-            }
+            is2 = url.openStream();
+            os = FileSystem.create(fs, cacheFile, FileLocalizer.OWNER_ONLY_PERMS);
+            IOUtils.copyBytes(is2, os, 4096, true);
+
             return cacheFile;
 
         } catch (IOException ioe) {
             log.info("Unable to retrieve jar from jar cache ", ioe);
             return null;
+        } finally {
+            org.apache.commons.io.IOUtils.closeQuietly(is1);
+            org.apache.commons.io.IOUtils.closeQuietly(is2);
+            // IOUtils should not close stream to HDFS quietly
+            if (os != null) {
+                os.close();
+            }
         }
     }
 

diff --git a/src/org/apache/pig/builtin/CONCAT.java b/src/org/apache/pig/builtin/CONCAT.java
index 99822b7..6788a93 100644
--- a/src/org/apache/pig/builtin/CONCAT.java
+++ b/src/org/apache/pig/builtin/CONCAT.java

@@ -33,8 +33,8 @@
 
 
 /**
- * Generates the concatenation of the first two arguments.  It can be
- * used with two bytearrays or two chararrays (but not a mixture of the two).
+ * Generates the concatenation of two or more arguments.  It can be
+ * used with two or more bytearrays or two or more chararrays (but not a mixture of the two).
  */
 public class CONCAT extends EvalFunc<DataByteArray> {
 

diff --git a/src/org/apache/pig/builtin/StringConcat.java b/src/org/apache/pig/builtin/StringConcat.java
index 0767c9d..40a4bc9 100644
--- a/src/org/apache/pig/builtin/StringConcat.java
+++ b/src/org/apache/pig/builtin/StringConcat.java

@@ -18,9 +18,9 @@
 package org.apache.pig.builtin;
 
 import java.io.IOException;
+
 import org.apache.pig.EvalFunc;
 import org.apache.pig.PigException;
-import org.apache.pig.EvalFunc.SchemaType;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.data.DataType;
 import org.apache.pig.data.Tuple;

diff --git a/src/org/apache/pig/impl/plan/OperatorPlan.java b/src/org/apache/pig/impl/plan/OperatorPlan.java
index a66e423..e88f54b 100644
--- a/src/org/apache/pig/impl/plan/OperatorPlan.java
+++ b/src/org/apache/pig/impl/plan/OperatorPlan.java

@@ -67,6 +67,7 @@
 // Suppress "unchecked" warnings for all logical plan related classes. Will revisit in logical plan rework
 @SuppressWarnings("unchecked")
 public abstract class OperatorPlan<E extends Operator> implements Iterable<E>, Serializable, Cloneable {
+    private static final long serialVersionUID = 1L;
     protected Map<E, OperatorKey> mOps;
     protected Map<OperatorKey, E> mKeys;
     protected MultiMap<E, E> mFromEdges;

diff --git a/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java b/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java
index b3ba802..e4b91be 100644
--- a/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java
+++ b/src/org/apache/pig/tools/pigstats/mapreduce/MRJobStats.java

@@ -331,6 +331,22 @@
         }
     }
 
+    private class TaskStat {
+        int size;
+        long max;
+        long min;
+        long avg;
+        long median;
+
+        public TaskStat(int size, long max, long min, long avg, long median) {
+            this.size = size;
+            this.max = max;
+            this.min = min;
+            this.avg = avg;
+            this.median = median;
+        }
+    }
+
     void addMapReduceStatistics(Job job) {
         TaskReport[] maps = null;
         try {
@@ -338,26 +354,42 @@
         } catch (IOException e) {
             LOG.warn("Failed to get map task report", e);
         }
+        TaskReport[] reduces = null;
+        try {
+            reduces = HadoopShims.getTaskReports(job, TaskType.REDUCE);
+        } catch (IOException e) {
+            LOG.warn("Failed to get reduce task report", e);
+        }
+        addMapReduceStatistics(maps, reduces);
+    }
+
+    private TaskStat getTaskStat(TaskReport[] tasks) {
+        int size = tasks.length;
+        long max = 0;
+        long min = Long.MAX_VALUE;
+        long median = 0;
+        long total = 0;
+        long durations[] = new long[size];
+
+        for (int i = 0; i < tasks.length; i++) {
+            TaskReport rpt = tasks[i];
+            long duration = rpt.getFinishTime() - rpt.getStartTime();
+            durations[i] = duration;
+            max = (duration > max) ? duration : max;
+            min = (duration < min) ? duration : min;
+            total += duration;
+        }
+        long avg = total / size;
+
+        median = calculateMedianValue(durations);
+
+        return new TaskStat(size, max, min, avg, median);
+    }
+
+    private void addMapReduceStatistics(TaskReport[] maps, TaskReport[] reduces) {
         if (maps != null && maps.length > 0) {
-            int size = maps.length;
-            long max = 0;
-            long min = Long.MAX_VALUE;
-            long median = 0;
-            long total = 0;
-            long durations[] = new long[size];
-
-            for (int i = 0; i < maps.length; i++) {
-                TaskReport rpt = maps[i];
-                long duration = rpt.getFinishTime() - rpt.getStartTime();
-                durations[i] = duration;
-                max = (duration > max) ? duration : max;
-                min = (duration < min) ? duration : min;
-                total += duration;
-            }
-            long avg = total / size;
-
-            median = calculateMedianValue(durations);
-            setMapStat(size, max, min, avg, median);
+            TaskStat st = getTaskStat(maps);
+            setMapStat(st.size, st.max, st.min, st.avg, st.median);
         } else {
             int m = conf.getInt("mapred.map.tasks", 1);
             if (m > 0) {
@@ -365,31 +397,9 @@
             }
         }
 
-        TaskReport[] reduces = null;
-        try {
-            reduces = HadoopShims.getTaskReports(job, TaskType.REDUCE);
-        } catch (IOException e) {
-            LOG.warn("Failed to get reduce task report", e);
-        }
         if (reduces != null && reduces.length > 0) {
-            int size = reduces.length;
-            long max = 0;
-            long min = Long.MAX_VALUE;
-            long median = 0;
-            long total = 0;
-            long durations[] = new long[size];
-
-            for (int i = 0; i < reduces.length; i++) {
-                TaskReport rpt = reduces[i];
-                long duration = rpt.getFinishTime() - rpt.getStartTime();
-                durations[i] = duration;
-                max = (duration > max) ? duration : max;
-                min = (duration < min) ? duration : min;
-                total += duration;
-            }
-            long avg = total / size;
-            median = calculateMedianValue(durations);
-            setReduceStat(size, max, min, avg, median);
+            TaskStat st = getTaskStat(reduces);
+            setReduceStat(st.size, st.max, st.min, st.avg, st.median);
         } else {
             int m = conf.getInt("mapred.reduce.tasks", 1);
             if (m > 0) {

diff --git a/test/e2e/pig/conf/default.conf b/test/e2e/pig/conf/default.conf
index 525c883..f048948 100644
--- a/test/e2e/pig/conf/default.conf
+++ b/test/e2e/pig/conf/default.conf

@@ -15,7 +15,7 @@
 #  limitations under the License.                                                      
                                                                                        
 my $me = `whoami`;
-chomp $me;
+$me =~ s/[^a-zA-Z0-9]*//g;
 
 # The contents of this file can be rewritten to fit your installation.
 # Also, you can define the following environment variables and set things up as in the test setup
@@ -43,7 +43,7 @@
     #TEST
     , 'benchmarkPath'    => "$ENV{PH_OUT}/benchmarks"
     , 'scriptPath'       => "$ENV{PH_ROOT}/libexec"
-    , 'tmpPath'          => '/tmp/pigtest'
+    , 'tmpPath'          => 'tmp/pigtest'
     , 'jythonjar'        => "$ENV{PH_JYTHON_JAR}"
     , 'jrubyjar'         => "$ENV{PH_JRUBY_JAR}"
 

diff --git a/test/e2e/pig/deployers/ExistingClusterDeployer.pm b/test/e2e/pig/deployers/ExistingClusterDeployer.pm
index 1dd5dcf..d34a864 100644
--- a/test/e2e/pig/deployers/ExistingClusterDeployer.pm
+++ b/test/e2e/pig/deployers/ExistingClusterDeployer.pm

@@ -22,6 +22,8 @@
 use strict;
 use English;
 
+use Util;
+
 our @ISA = "TestDeployer";
 
 ###########################################################################
@@ -354,11 +356,18 @@
 
     my @pigCmd = "";
 
+	my $pigbin = "";
     if ($cfg->{'usePython'} eq "true") {
-      @pigCmd = ("$cfg->{'pigpath'}/bin/pig.py");
+      $pigbin = "$cfg->{'pigpath'}/bin/pig.py";
+    } elsif (Util::isCygwin()) {
+      $pigbin = "$cfg->{'pigpath'}/bin/pig.cmd";
+      $pigbin =~ s/\\/\//g;
+      $pigbin = `cygpath -u $pigbin`;
+      chomp($pigbin);
     } else {
-      @pigCmd = ("$cfg->{'pigpath'}/bin/pig");
+      $pigbin = "$cfg->{'pigpath'}/bin/pig";
     }
+	@pigCmd = ($pigbin);
     push(@pigCmd, '-e');
     push(@pigCmd, split(' ', $c));
 

diff --git a/test/e2e/pig/drivers/TestDriverPig.pm b/test/e2e/pig/drivers/TestDriverPig.pm
index 60f1f4d..fdd4027 100644
--- a/test/e2e/pig/drivers/TestDriverPig.pm
+++ b/test/e2e/pig/drivers/TestDriverPig.pm

@@ -113,7 +113,7 @@
 
     # Setup the output path
     my $me = `whoami`;
-    chomp $me;
+    $me =~ s/[^a-zA-Z0-9]*//g;
     my $jobId = $globalHash->{'job-id'};
     my $timeId = time;
     $globalHash->{'runid'} = $me . "-" . $timeId . "-" . $jobId;
@@ -203,7 +203,7 @@
     if ( $testCmd->{'pig'} && $self->hasCommandLineVerifications( $testCmd, $log) ) {
        my $oldpig;
 
-       if ( Util::isWindows() && $testCmd->{'pig_win'}) {
+       if ((Util::isWindows() || Util::isCygwin()) && $testCmd->{'pig_win'}) {
            $oldpig = $testCmd->{'pig'};
            $testCmd->{'pig'} = $testCmd->{'pig_win'};
        }
@@ -223,7 +223,7 @@
     } elsif( $testCmd->{'pig'} ){
        my $oldpig;
 
-       if ( Util::isWindows() && $testCmd->{'pig_win'}) {
+       if ((Util::isWindows() || Util::isCygwin()) && $testCmd->{'pig_win'}) {
            $oldpig = $testCmd->{'pig'};
            $testCmd->{'pig'} = $testCmd->{'pig_win'};
        }
@@ -312,7 +312,9 @@
     $result{'rc'} = $? >> 8;
     $result{'output'} = $outfile;
     $result{'stdout'} = `cat $stdoutfile`;
+    $result{'stdout'} =~ s/\r\n/\n/g;
     $result{'stderr'} = `cat $stderrfile`;
+    $result{'stderr'} =~ s/\r\n/\n/g;
     $result{'stderr_file'} = $stderrfile;
 
     print $log "STD ERROR CONTAINS:\n$result{'stderr'}\n";
@@ -391,7 +393,7 @@
 
     # set the PIG_CLASSPATH environment variable
 	my $separator = ":";
-	if(Util::isWindows()) {
+	if(Util::isWindows()||Util::isCygwin()) {
 	    $separator = ";";
 	}
 	my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'}));
@@ -407,12 +409,19 @@
         push(@pigCmd, "$testCmd->{'pigpath'}/bin/pig.py");
         # print ("Using pig too\n");
     } else {
+        my $pigbin = "";
         if(Util::isWindows()) {
-            @pigCmd = ("$testCmd->{'pigpath'}/bin/pig.cmd");
+            $pigbin = "$testCmd->{'pigpath'}/bin/pig.cmd";
         }
-        else {
-           @pigCmd = ("$testCmd->{'pigpath'}/bin/pig");
+        elsif (Util::isCygwin()) {
+            $pigbin = "$testCmd->{'pigpath'}/bin/pig.cmd";
+            $pigbin =~ s/\\/\//g;
+            $pigbin = `cygpath -u $pigbin`;
+            chomp($pigbin);
+        } else {
+            $pigbin = "$testCmd->{'pigpath'}/bin/pig";
         }
+        @pigCmd = ($pigbin);
     }
 
     if (defined($testCmd->{'additionaljars'})) {
@@ -653,7 +662,7 @@
 		$modifiedTestCmd{'pig'} = $testCmd->{'verify_pig_script'};
 	}
     else {
-        if ( Util::isWindows() && $testCmd->{'pig_win'}) {
+        if ((Util::isWindows()||Util::isCygwin()) && $testCmd->{'pig_win'}) {
            $modifiedTestCmd{'pig'} = $testCmd->{'pig_win'};
        }
 		# Change so we're looking at the old version of Pig

diff --git a/test/e2e/pig/drivers/Util.pm b/test/e2e/pig/drivers/Util.pm
index c3a5a0d..88c1b29 100644
--- a/test/e2e/pig/drivers/Util.pm
+++ b/test/e2e/pig/drivers/Util.pm

@@ -466,4 +466,15 @@
         return 0;
     }
 }
+
+sub isCygwin
+{
+    if($^O =~ /cygwin/i) {
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
+
 1;

diff --git a/test/org/apache/pig/test/TestMRJobStats.java b/test/org/apache/pig/test/TestMRJobStats.java
index fc79382..5616fbe 100644
--- a/test/org/apache/pig/test/TestMRJobStats.java
+++ b/test/org/apache/pig/test/TestMRJobStats.java

@@ -30,7 +30,6 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.JobID;
 import org.apache.hadoop.mapred.TaskReport;
 import org.apache.hadoop.mapreduce.Job;
@@ -111,8 +110,6 @@
 
     @Test
     public void testMedianMapReduceTime() throws Exception {
-
-        JobConf jobConf = new JobConf();
         JobClient jobClient = Mockito.mock(JobClient.class);
 
         // mock methods to return the predefined map and reduce task reports
@@ -124,10 +121,10 @@
         getJobStatsMethod("setId", JobID.class).invoke(jobStats, jobID);
         jobStats.setSuccessful(true);
 
-        getJobStatsMethod("addMapReduceStatistics", JobClient.class, Configuration.class)
-            .invoke(jobStats, jobClient, jobConf);
-        String msg = (String)getJobStatsMethod("getDisplayString", boolean.class)
-            .invoke(jobStats, false);
+        getJobStatsMethod("addMapReduceStatistics", TaskReport[].class, TaskReport[].class)
+            .invoke(jobStats, mapTaskReports, reduceTaskReports);
+        String msg = (String)getJobStatsMethod("getDisplayString")
+            .invoke(jobStats);
 
         System.out.println(JobStats.SUCCESS_HEADER);
         System.out.println(msg);
@@ -149,21 +146,15 @@
         Mockito.when(reduceTaskReports[0].getStartTime()).thenReturn(500L * ONE_THOUSAND);
         Mockito.when(reduceTaskReports[0].getFinishTime()).thenReturn(700L * ONE_THOUSAND);
 
-        JobConf jobConf = new JobConf();
-        JobClient jobClient = Mockito.mock(JobClient.class);
-
-        Mockito.when(jobClient.getMapTaskReports(jobID)).thenReturn(mapTaskReports);
-        Mockito.when(jobClient.getReduceTaskReports(jobID)).thenReturn(reduceTaskReports);
-
         PigStats.JobGraph jobGraph = new PigStats.JobGraph();
         MRJobStats jobStats = createJobStats("JobStatsTest", jobGraph);
         getJobStatsMethod("setId", JobID.class).invoke(jobStats, jobID);
         jobStats.setSuccessful(true);
 
-        getJobStatsMethod("addMapReduceStatistics", JobClient.class, Configuration.class)
-            .invoke(jobStats, jobClient, jobConf);
-        String msg = (String)getJobStatsMethod("getDisplayString", boolean.class)
-            .invoke(jobStats, false);
+        getJobStatsMethod("addMapReduceStatistics", TaskReport[].class, TaskReport[].class)
+            .invoke(jobStats, mapTaskReports, reduceTaskReports);
+        String msg = (String)getJobStatsMethod("getDisplayString")
+            .invoke(jobStats);
         System.out.println(JobStats.SUCCESS_HEADER);
         System.out.println(msg);
 

diff --git a/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld b/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld
index 24c0f74..547aa32 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld

@@ -18,4 +18,4 @@
     |   |   |
     |   |   Project[tuple][*] - scope-111
     |   |
-    |   |---b: Load(/tmp/input2:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MergeJoinIndexer('org.apache.pig.builtin.PigStorage','eNqtVTtvE0EQnlzixJiQhGeDKBCv7k6iQqKABIgwHNgiaXDF5G5zPti7XXb3wpkCiQYKKKFAAomCkt+AhCiooaRC9NSUMLu248MBUZgtLO/M7jy+/b65t9+hphXsv41b6Bcm5f6yUtgLU23Kh5+PvPiIr6ZhqgkzOr3PSgkAU/dm7C9dui5U4qPEqMt8mSb+BkZ3WB77XYyFkD4rWUQRRc7yJM3pSLen0wh5iD2mfMkx1357YGvTDvprygOvA3soUGtzmfNmLgsTQk3IDKWBYyElDfpJA0oapJnkgS08uFZwk15DebZUcGKsNHvKpfRbkik0QtmMT9/pl1/DD10P6iE0slUlsktxwvRdeADTlDO7ynrawGJo0RkkofghzGUhwy1GvqWKz4JGzpmsJV2IWgiz2Q0hjNvNhrCQrYlNM55m3lnXRdVWz6r7UhLaR//UknuxYeMDAD0PpmwVZHFVuNt7Rw98GXWXfLW5L+8/HLr1aRq8VWhwgfEqRgRME3aZrmK6K3hcynPnXcz5e3X6XbJ/S1dTY4fDMuL4P2EnRCvvfAW8NCagdSQkM7CvDyadT4I1o9I8OVsOu+qawTFKc3MS4hGLqTtN7mFNRMNWW4nbLDKj2mY7sJDqgZkeI4870BBbTFmkGG0OSiUiGyhPVjBpba4XkjPdgQMUnR5kjeycOSO5m1DXBpW5IDixJxK8yHJiz8EKe7Z1Z/m78b87vLTDNmoVKGE4ScKhineGnu9ADaOoyEjRqVXysjE2R9y0ON0tSEIsbqNCzhlPdbYCjT586z3JiBjIU3R8W6CNC2Dg8PgIiNFg4JAm9c26U0PF7eFULSZsXWHElIFT43cly4firRykOHuFSqlValFEaDsfhpwThalkaEhULDd2nBhoj4cfwBj0YQzGYAx+gzFwIzGojkSqY1rZMXNxosA3HKSWV0f/KdDR27khQno1cPKPQ9deCyq6Jq1aTBYBftI6LWk5kNxyH41GKe0sWpmEa/1eKjNkBeYVM4XK1wyaQhMD+gz6fZy0NqyOqURvOFBgu7j+F62xPSXtBb+ZG5Ywte/b6zc/Hj4+49mPYG0LecEIxqXRuetFtsHUo7fPj+x+9vWJFZNLUJbj4e328F+Hp/M6+ModoDlD/S83YUI3yPIXqjl9HQ==','','b_45-1','scope','true')) - scope-102
+    |   |---b: Load(/tmp/input2:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MergeJoinIndexer('org.apache.pig.builtin.PigStorage','eNqtVT1vE0EQnThxYkzIF4QGpYj46u4kKqQUkEAiDEdsJWlwxeRuc76wd7vZ20vOFEhpoKCFggKJgjJ/AlFQQ0mF6KkpYXZtxxcHRGG2sLwzu+/NvJ2ZO/oB5VTB+V3cRyfTEXeWlcK2F6U6P/yy8OYTvh2FkRqMpdFTlksAGDkYM790aV2o0EGJfos5MgqdbfSfsCRwWhgIIR2WM58QRcKSMEroSKudRj5yD9tMOZJjkjqNrq1BO+iskRKUmnCOgOo7y5zXEplpD8pCxig1XPaI1O2QukTqRrHkrgncfZhxHT1EuZQruDoQmjllKZ26ZAq1UCcZKx5U4zUl4tUgZOkePINR4owfsHaqYdoz6nRJCN+DidhjuM/IN1PwGdHIORbXpYUoezAebwih7W7cg6l4U+zoQZpJa90SRVslLu5zSWov/ikl+2K9xLvplEowYqIgi43C3p7tP/A9TFvkK098/fDx4uPPo1BagyoXGKyhT8LU4IxuKZa2BA9yeeu2xZw8qNDvjPmb25iqpxymIq78U3ZStPDO96EUBSR06gvJNMx1xKTzobupVZSES3kvq5buHiOaR8MUHlUxZZeSuxcTlWG9ocQu83U/tvEmTEVp10yPkQRNqIp9poxSjDbzUgnfACXhCob1na1McpY24QKh04Nskp0zayR3DSqpRqXvCE7V4wuexQlVz3yheo77ztTv9v/OcPWUrZ8qEKE3DGGvi09DTzahjL6fxdTRkenkZa0NR1AzOu1l1EIsaKBCzhmP0ngFqh35ttqSUWEgj9DW2xRtLICGS4MjIECNrlWaum/cnup13DlO0WLIthT6TGm4PnhXsqTXvIWDhDMrVESpUorCR5N5D3JCZLrAUJWoWKLNONHQGITvyuh2ZHQHZHRPyOjakegWRyLFMarMmLk7FPCGldTU1eI/G7T/dnaIUL9quPbHoWuuuYW+pl41mkwD/KJ1Q9KyItllPxrVXJpZtDJMrXVyKcyQFZhUTGcq2dSos5QqoFNBJ8dJfdv0MYVY6g0UOA6u80WrHk9Jc8GpJZqFTM19f/f+5+GLmyXzESzvI88YyTjTP7eexdtMPT96vXD21beXppksQZ4Pwpvtpb8OT+u18uWnRLOGyl9uwpBukPlvzlh4bg==','','b_45-1','scope','true')) - scope-102

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L1.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L1.java
index 2e38f0c..211df4d 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L1.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L1.java

@@ -21,28 +21,24 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Properties;
 import java.util.Map;
+import java.util.Properties;
 
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.KeyValueTextInputFormat;
-import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapred.jobcontrol.JobControl;
-import org.apache.hadoop.mapred.lib.IdentityMapper;
-
-import org.apache.pig.test.pigmix.mapreduce.Library;
 
 public class L1 {
 

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L10.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L10.java
index f753f32..17bdfa2 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L10.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L10.java

@@ -45,7 +45,6 @@
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapred.jobcontrol.JobControl;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
-
 import org.apache.pig.test.pigmix.mapreduce.Library;
 
 public class L10 {
@@ -127,9 +126,8 @@
 
         public int getPartition(MyType key, Text value, int numPartitions) {
             int rc = 0;
-            if (key.query_term == null || key.query_term.length() < 2) return 39;
-            if (key.query_term.charAt(0) > ']') rc += 20;
-            rc += map.get(key.query_term.charAt(1));
+            if (key==null || key.query_term == null ||  key.query_term.length() < 1 ) return 0;
+            rc += map.get(key.query_term.charAt(0));
             return rc;
         }
 
@@ -137,65 +135,65 @@
             // Don't actually do any configuration, do the setup of the hash
             // because this call is guaranteed to be made each time we set up
             // MyPartitioner
-            map = new HashMap<Character, Integer>(57);
-            map.put('A', 0);
+            map = new HashMap<Character, Integer>(59);
+            map.put('A', 1);
             map.put('B', 1);
             map.put('C', 2);
-            map.put('D', 3);
-            map.put('E', 4);
-            map.put('F', 5);
-            map.put('G', 6);
-            map.put('I', 7);
-            map.put('H', 8);
-            map.put('J', 9);
-            map.put('K', 10);
-            map.put('L', 11);
-            map.put('M', 12);
-            map.put('N', 13);
-            map.put('O', 14);
-            map.put('P', 15);
-            map.put('Q', 16);
-            map.put('R', 17);
-            map.put('S', 18);
-            map.put('T', 19);
-            map.put('U', 0);
-            map.put('V', 1);
-            map.put('W', 2);
-            map.put('X', 3);
-            map.put('Y', 4);
-            map.put('Z', 5);
-            map.put('[', 6);
-            map.put('\\', 7);
-            map.put(']', 8);
-            map.put('^', 9);
-            map.put('_', 10);
-            map.put('`', 11);
-            map.put('a', 12);
-            map.put('b', 13);
-            map.put('c', 14);
-            map.put('d', 15);
-            map.put('e', 16);
-            map.put('f', 17);
-            map.put('g', 18);
-            map.put('h', 19);
-            map.put('i', 0);
-            map.put('j', 1);
-            map.put('k', 2);
-            map.put('l', 3);
-            map.put('m', 4);
-            map.put('n', 5);
-            map.put('o', 6);
-            map.put('p', 7);
-            map.put('q', 8);
-            map.put('r', 9);
-            map.put('s', 10);
-            map.put('t', 11);
-            map.put('u', 12);
-            map.put('v', 13);
-            map.put('w', 14);
-            map.put('x', 15);
-            map.put('y', 16);
-            map.put('z', 17);
+            map.put('D', 2);
+            map.put('E', 3);
+            map.put('F', 3);
+            map.put('G', 4);
+            map.put('H', 4);
+            map.put('I', 5);
+            map.put('J', 5);
+            map.put('K', 6);
+            map.put('L', 6);
+            map.put('M', 7);
+            map.put('N', 7);
+            map.put('O', 8);
+            map.put('P', 8);
+            map.put('Q', 9);
+            map.put('R', 9);
+            map.put('S', 10);
+            map.put('T', 10);
+            map.put('U', 11);
+            map.put('V', 11);
+            map.put('W', 12);
+            map.put('X', 12);
+            map.put('Y', 13);
+            map.put('Z', 13);
+            map.put('[', 14);
+            map.put('\\', 14);
+            map.put(']', 15);
+            map.put('^', 15);
+            map.put('_', 16);
+            map.put('`', 16);
+            map.put('a', 17);
+            map.put('b', 17);
+            map.put('c', 18);
+            map.put('d', 18);
+            map.put('e', 19);
+            map.put('f', 20);
+            map.put('g', 20);
+            map.put('h', 21);
+            map.put('i', 22);
+            map.put('j', 23);
+            map.put('k', 24);
+            map.put('l', 25);
+            map.put('m', 26);
+            map.put('n', 27);
+            map.put('o', 28);
+            map.put('p', 29);
+            map.put('q', 30);
+            map.put('r', 31);
+            map.put('s', 32);
+            map.put('t', 33);
+            map.put('u', 34);
+            map.put('v', 35);
+            map.put('w', 36);
+            map.put('x', 37);
+            map.put('y', 38);
+            map.put('z', 39);
         }
     }
 

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L12.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L12.java
index 1d92981..67aa5af 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L12.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L12.java

@@ -58,8 +58,8 @@
 
             List<Text> fields = Library.splitLine(val, '');
 
-            // Filter out null users and query terms.
-            if (fields.get(0).getLength() == 0 &&
+            // Filter out null users or query terms.
+            if (fields.get(0).getLength() == 0 ||
                     fields.get(3).getLength() == 0) return;
             try {
                 oc.collect(fields.get(0),
@@ -74,13 +74,12 @@
                 Iterator<DoubleWritable> iter, 
                 OutputCollector<Text, DoubleWritable> oc,
                 Reporter reporter) throws IOException {
-            double max = Double.MIN_VALUE;
+            double max = Double.NEGATIVE_INFINITY;
 
             while (iter.hasNext()) {
                 double d = iter.next().get();
-                max = max > d ? max : d;
+            	if (max < d) max=d;
             }
-
             oc.collect(key, new DoubleWritable(max));
         }
     }
@@ -129,24 +128,22 @@
                 OutputCollector<Text, LongWritable> oc,
                 Reporter reporter) throws IOException {
             List<Text> fields = Library.splitLine(val, '');
-
+            
             // Filter out non-null users and non-null queries
-            if (fields.get(0).getLength() != 0 ||
-                    fields.get(3).getLength() != 0) return;
+            if (fields.get(0).getLength() == 0 || fields.get(3).getLength() != 0) return;
             oc.collect(fields.get(1), new LongWritable(1));
-
-        }
+       }
 
         public void reduce(
                 Text key,
                 Iterator<LongWritable> iter, 
                 OutputCollector<Text, LongWritable> oc,
                 Reporter reporter) throws IOException {
-            long cnt = 0;
-
+  
+        	long cnt = 0;
             while (iter.hasNext()) {
-                iter.next();
-                cnt++;
+                LongWritable l = iter.next();
+            	cnt += l.get();
             }
             oc.collect(key, new LongWritable(cnt));
         }

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L15.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L15.java
index 8e067d7..7d34330 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L15.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L15.java

@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -41,7 +42,6 @@
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapred.jobcontrol.JobControl;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
-
 import org.apache.pig.test.pigmix.mapreduce.Library;
 
 public class L15 {
@@ -81,8 +81,10 @@
             HashSet<Text> hash1 = new HashSet<Text>();
             HashSet<Text> hash2 = new HashSet<Text>();
             HashSet<Text> hash3 = new HashSet<Text>();
+            int cnt_per_combiner = 0;
             while (iter.hasNext()) {
                 List<Text> vals = Library.splitLine(iter.next(), '');
+                cnt_per_combiner++;
                 try {
 					hash1.add(vals.get(0));
 					hash2.add(vals.get(1));
@@ -104,6 +106,8 @@
             sb.append("");
             sb.append(ts.toString());
             sb.append("");
+            sb.append(cnt_per_combiner);
+            sb.append("");
             oc.collect(key, new Text(sb.toString()));
             reporter.setStatus("OK");
         }
@@ -120,27 +124,33 @@
             HashSet<Text> hash2 = new HashSet<Text>();
             HashSet<Text> hash3 = new HashSet<Text>();
             while (iter.hasNext()) {
-                List<Text> vals = Library.splitLine(iter.next(), '');
-                try {
+				Text line = iter.next();
+				List<Text> vals = Library.splitLine(line, '');
+				try {
 					hash1.add(vals.get(0));
 					hash2.add(vals.get(1));
 					hash3.add(vals.get(2));
 				}catch(NumberFormatException nfe) {
 				}
 			}
-			Integer ts=0;
-			Double rev=new Double(0.0);
-            for (Text t : hash2) rev += Double.valueOf(t.toString());
-            for (Text t : hash3) ts += Integer.valueOf(t.toString());
-            StringBuffer sb = new StringBuffer();
+
+			Integer ts = 0;
+			Double rev = new Double(0.0);
+			Integer overall_cnt_per_group = new Integer(0);
+			for (Text t : hash2)
+				rev += Double.valueOf(t.toString());
+			for (Text t : hash3)
+				ts += Integer.valueOf(t.toString());
+			StringBuffer sb = new StringBuffer();
 			sb.append((new Integer(hash1.size())).toString());
-            sb.append("");
-            sb.append(rev.toString());
-            sb.append("");
-            sb.append(ts.toString());
-            oc.collect(key, new Text(sb.toString()));
-            reporter.setStatus("OK");
-        }
+			sb.append("");
+			sb.append(rev.toString());
+			sb.append("");
+			Double avg = (double) ((Integer.valueOf(ts.toString())) / hash3.size());
+			sb.append(avg);
+			oc.collect(key, new Text(sb.toString()));
+			reporter.setStatus("OK");
+		}
     }
 
     public static void main(String[] args) throws IOException {

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L2.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L2.java
index 2de2bb0..94a766b 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L2.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L2.java

@@ -19,39 +19,31 @@
 
 import java.io.BufferedReader;
 import java.io.FileInputStream;
-import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
-import java.util.Set;
-import java.util.Properties;
 import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.KeyValueTextInputFormat;
-import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapred.jobcontrol.JobControl;
-import org.apache.hadoop.mapred.lib.IdentityMapper;
-
-import org.apache.pig.test.pigmix.mapreduce.Library;
 
 public class L2 {
 
@@ -78,6 +70,7 @@
                     String[] fields = line.split("");
                     hash.add(fields[0]);
                 }
+                reader.close();
             } catch (IOException ioe) {
                 throw new RuntimeException(ioe);
             }
@@ -92,7 +85,6 @@
             List<Text> fields = Library.splitLine(val, '');
             String name = fields.get(0).toString();
 
-            String v;
             if (hash.contains(name)) {
                 StringBuffer sb = new StringBuffer();
                 sb.append(name);
@@ -112,7 +104,6 @@
         }
         String inputDir = args[0];
         String outputDir = args[1];
-        String parallel = args[2];
         JobConf lp = new JobConf(L2.class);
         lp.setJobName("L2 Load Page Views");
         lp.setInputFormat(TextInputFormat.class);
@@ -123,8 +114,7 @@
         for (Map.Entry<Object,Object> entry : props.entrySet()) {
             lp.set((String)entry.getKey(), (String)entry.getValue());
         }
-        DistributedCache.addCacheFile(
-            new URI(inputDir + "/power_users"), lp);
+        DistributedCache.addCacheFile(new URI(inputDir + "/power_users"), lp);
         FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
         FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L2out"));
         lp.setNumReduceTasks(0);

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L3.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L3.java
index 43d17c8..34a0cb5 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L3.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L3.java

@@ -127,7 +127,6 @@
         String inputDir = args[0];
         String outputDir = args[1];
         String parallel = args[2];
-        String user = System.getProperty("user.name");
         JobConf lp = new JobConf(L3.class);
         lp.setJobName("L3 Load Page Views");
         lp.setInputFormat(TextInputFormat.class);

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L5.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L5.java
index 12d8a3a..1560077 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L5.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L5.java

@@ -92,12 +92,14 @@
             int cnt = 0;
 
             while (iter.hasNext()) {
-                if (iter.next().toString().charAt(0) == '2') cnt++;
-                reporter.setStatus("OK");
+		//As soon as a value for that name appears in the 'users' table, exclude the tuple by a return
+                if (iter.next().toString().charAt(0) == '2') return;
+		reporter.setStatus("OK");
             }
 
-            oc.collect(null, key);
-            reporter.setStatus("OK");
+	    oc.collect(null, key);
+	    reporter.setStatus("OK");
+	   
         }
     }
 

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L6.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L6.java
index 4fac7fe..5e7fe92 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L6.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L6.java

@@ -68,10 +68,8 @@
             sb.append("");
             sb.append(fields.get(5).toString());
             Text key = new Text(sb.toString());
-
             try {
-                oc.collect(fields.get(0),
-                    new IntWritable(Integer.valueOf(fields.get(3).toString())));
+                oc.collect(key,new IntWritable(Integer.valueOf(fields.get(2).toString())));
             } catch (NumberFormatException nfe) {
             }
         }
@@ -91,6 +89,7 @@
             }
             oc.collect(key, new IntWritable(sum));
             reporter.setStatus("OK");
+           
         }
     }
 

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L7.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L7.java
index 3778385..f668aa7 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L7.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L7.java

@@ -81,7 +81,7 @@
             }
             StringBuffer sb = new StringBuffer();
             sb.append((new Integer(morning)).toString());
-            sb.append("");
+            sb.append('');
             sb.append((new Integer(afternoon)).toString());
             oc.collect(key, new Text(sb.toString()));
             reporter.setStatus("OK");
@@ -98,11 +98,14 @@
                 Reporter reporter) throws IOException {
             int morning = 0, afternoon = 0;
             while (iter.hasNext()) {
-                List<Text> vals = Library.splitLine(iter.next(), '');
+            	List<Text> vals = Library.splitLine(iter.next(), '');
                 try {
                     morning += Integer.valueOf(vals.get(0).toString());
-                    if (vals.size() > 1) afternoon += Integer.valueOf(vals.get(1).toString());
+                    if (vals.size() > 1) {
+                    	afternoon += Integer.valueOf(vals.get(1).toString());
+                    }
                 } catch (NumberFormatException nfe) {
+                	System.out.println(nfe);
                 }
             }
             StringBuffer sb = new StringBuffer();

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L8.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L8.java
index ea5d325..f681503 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L8.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L8.java

@@ -41,7 +41,6 @@
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapred.jobcontrol.JobControl;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
-
 import org.apache.pig.test.pigmix.mapreduce.Library;
 
 public class L8 {
@@ -78,7 +77,7 @@
             int tsSum = 0, erCnt = 0;
             double erSum = 0.0;
             while (iter.hasNext()) {
-                List<Text> vals = Library.splitLine(iter.next(), '');
+            	 List<Text> vals = Library.splitLine(iter.next(), '');
                 try {
                     tsSum += Integer.valueOf(vals.get(0).toString());
                     erSum += Double.valueOf(vals.get(1).toString());
@@ -109,9 +108,10 @@
             while (iter.hasNext()) {
                 List<Text> vals = Library.splitLine(iter.next(), '');
                 try {
-                    tsSum += Integer.valueOf(vals.get(0).toString());
-                    erSum += Double.valueOf(vals.get(1).toString());
-                    erCnt++;
+                        tsSum += Integer.valueOf(vals.get(0).toString());
+	                    erSum += Double.valueOf(vals.get(1).toString());
+	                    erCnt += Integer.valueOf(vals.get(2).toString());
+                	                    
                 } catch (NumberFormatException nfe) {
                 }
             }

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L9.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L9.java
index d3c8c7f..d91d576 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L9.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/L9.java

@@ -31,7 +31,6 @@
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.KeyValueTextInputFormat;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
@@ -41,8 +40,6 @@
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapred.jobcontrol.JobControl;
-import org.apache.hadoop.mapred.lib.IdentityMapper;
-
 import org.apache.pig.test.pigmix.mapreduce.Library;
 
 public class L9 {
@@ -60,7 +57,7 @@
             List<Text> fields = Library.splitLine(val, '');
             if (fields.size() != 9) return;
 
-            oc.collect(fields.get(0), val);
+            oc.collect(fields.get(3), val);
         }
     }
 
@@ -71,9 +68,8 @@
         public int getPartition(Text key, Text value, int numPartitions) {
             int rc = 0;
             String s = key.toString();
-            if (s == null || s.length() < 2) return 39;
-            if (s.charAt(0) > ']') rc += 20;
-            rc += map.get(s.charAt(1));
+            if (s == null || s.length() < 1)  return 0;
+            rc += map.get(s.charAt(0));
             return rc;
         }
 
@@ -81,65 +77,65 @@
             // Don't actually do any configuration, do the setup of the hash
             // because this call is guaranteed to be made each time we set up
             // MyPartitioner
-            map = new HashMap<Character, Integer>(57);
-            map.put('A', 0);
+            map = new HashMap<Character, Integer>(59);
+            map.put('A', 1);
             map.put('B', 1);
             map.put('C', 2);
-            map.put('D', 3);
-            map.put('E', 4);
-            map.put('F', 5);
-            map.put('G', 6);
-            map.put('I', 7);
-            map.put('H', 8);
-            map.put('J', 9);
-            map.put('K', 10);
-            map.put('L', 11);
-            map.put('M', 12);
-            map.put('N', 13);
-            map.put('O', 14);
-            map.put('P', 15);
-            map.put('Q', 16);
-            map.put('R', 17);
-            map.put('S', 18);
-            map.put('T', 19);
-            map.put('U', 0);
-            map.put('V', 1);
-            map.put('W', 2);
-            map.put('X', 3);
-            map.put('Y', 4);
-            map.put('Z', 5);
-            map.put('[', 6);
-            map.put('\\', 7);
-            map.put(']', 8);
-            map.put('^', 9);
-            map.put('_', 10);
-            map.put('`', 11);
-            map.put('a', 12);
-            map.put('b', 13);
-            map.put('c', 14);
-            map.put('d', 15);
-            map.put('e', 16);
-            map.put('f', 17);
-            map.put('g', 18);
-            map.put('h', 19);
-            map.put('i', 0);
-            map.put('j', 1);
-            map.put('k', 2);
-            map.put('l', 3);
-            map.put('m', 4);
-            map.put('n', 5);
-            map.put('o', 6);
-            map.put('p', 7);
-            map.put('q', 8);
-            map.put('r', 9);
-            map.put('s', 10);
-            map.put('t', 11);
-            map.put('u', 12);
-            map.put('v', 13);
-            map.put('w', 14);
-            map.put('x', 15);
-            map.put('y', 16);
-            map.put('z', 17);
+            map.put('D', 2);
+            map.put('E', 3);
+            map.put('F', 3);
+            map.put('G', 4);
+            map.put('H', 4);
+            map.put('I', 5);
+            map.put('J', 5);
+            map.put('K', 6);
+            map.put('L', 6);
+            map.put('M', 7);
+            map.put('N', 7);
+            map.put('O', 8);
+            map.put('P', 8);
+            map.put('Q', 9);
+            map.put('R', 9);
+            map.put('S', 10);
+            map.put('T', 10);
+            map.put('U', 11);
+            map.put('V', 11);
+            map.put('W', 12);
+            map.put('X', 12);
+            map.put('Y', 13);
+            map.put('Z', 13);
+            map.put('[', 14);
+            map.put('\\', 14);
+            map.put(']', 15);
+            map.put('^', 15);
+            map.put('_', 16);
+            map.put('`', 16);
+            map.put('a', 17);
+            map.put('b', 17);
+            map.put('c', 18);
+            map.put('d', 18);
+            map.put('e', 19);
+            map.put('f', 20);
+            map.put('g', 20);
+            map.put('h', 21);
+            map.put('i', 22);
+            map.put('j', 23);
+            map.put('k', 24);
+            map.put('l', 25);
+            map.put('m', 26);
+            map.put('n', 27);
+            map.put('o', 28);
+            map.put('p', 29);
+            map.put('q', 30);
+            map.put('r', 31);
+            map.put('s', 32);
+            map.put('t', 33);
+            map.put('u', 34);
+            map.put('v', 35);
+            map.put('w', 36);
+            map.put('x', 37);
+            map.put('y', 38);
+            map.put('z', 39);
         }
     }
 

diff --git a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/Library.java b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/Library.java
index a45e674..ac0192e 100644
--- a/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/Library.java
+++ b/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/mapreduce/Library.java

@@ -39,7 +39,7 @@
             }
         }
         // Grab the last one.
-        if (start != s.length() - 1) cols.add(new Text(s.substring(start)));
+        if (start != s.length()) cols.add(new Text(s.substring(start)));
 
         return cols;
     }
commit	e7abdf57c2281eb2833e83b2f97d0889b3b22ea1	[log] [tgz]
author	Jianyong Dai <daijy@apache.org>	Thu May 29 19:19:41 2014 +0000
committer	Jianyong Dai <daijy@apache.org>	Thu May 29 19:19:41 2014 +0000
tree	4dad72d92b62c43fd8f6e530954d7122fb7477ba
parent	d0d83cb4b074b11e99856242e7f53bd874d5edeb [diff]