PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)

git-svn-id: https://svn.apache.org/repos/asf/pig/branches/branch-0.11@1524465 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 34c7bd9..70252d0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -32,6 +32,8 @@
 
 BUG FIXES
 
+PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)
+
 PIG-3435: Custom Partitioner not working with MultiQueryOptimizer (knoguchi via daijy)
 
 PIG-3385: DISTINCT no longer uses custom partitioner (knoguchi via daijy)
diff --git a/src/org/apache/pig/impl/util/HashOutputStream.java b/src/org/apache/pig/impl/util/HashOutputStream.java
new file mode 100644
index 0000000..53aad0d
--- /dev/null
+++ b/src/org/apache/pig/impl/util/HashOutputStream.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hasher;
+
+public class HashOutputStream extends OutputStream {
+
+    private Hasher hasher;
+
+    public HashOutputStream(HashFunction hf) {
+        hasher = hf.newHasher();
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+        hasher.putInt(b);
+    }
+
+    public HashCode getHashCode() {
+        return hasher.hash();
+    }
+
+}
diff --git a/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java b/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
index 3c0feed..da4df3a 100644
--- a/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
+++ b/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
@@ -18,40 +18,43 @@
 
 package org.apache.pig.newplan.logical.relational;
 
-import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
 import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.util.HashOutputStream;
 import org.apache.pig.newplan.BaseOperatorPlan;
 import org.apache.pig.newplan.Operator;
 import org.apache.pig.newplan.OperatorPlan;
 import org.apache.pig.newplan.logical.DotLOPrinter;
 import org.apache.pig.newplan.logical.optimizer.LogicalPlanPrinter;
 
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
 /**
- * LogicalPlan is the logical view of relational operations Pig will execute 
+ * LogicalPlan is the logical view of relational operations Pig will execute
  * for a given script.  Note that it contains only relational operations.
  * All expressions will be contained in LogicalExpressionPlans inside
  * each relational operator.
  */
 public class LogicalPlan extends BaseOperatorPlan {
-  
+
     public LogicalPlan(LogicalPlan other) {
         // shallow copy constructor
         super(other);
     }
-    
+
     public LogicalPlan() {
         super();
     }
-    
+
     /**
      * Equality is checked by calling equals on every leaf in the plan.  This
-     * assumes that plans are always connected graphs.  It is somewhat 
-     * inefficient since every leaf will test equality all the way to 
+     * assumes that plans are always connected graphs.  It is somewhat
+     * inefficient since every leaf will test equality all the way to
      * every root.  But it is only intended for use in testing, so that
      * should be ok.  Checking predecessors (as opposed to successors) was
      * chosen because splits (which have multiple successors) do not depend
@@ -60,19 +63,19 @@
      * graph has no correctness implications, whereas reversing the inputs
      * of join can.  This method of doing equals will detect predecessors
      * in different orders but not successors in different orders.
-     * It will return false if either plan has non deterministic EvalFunc. 
+     * It will return false if either plan has non deterministic EvalFunc.
      */
     @Override
     public boolean isEqual(OperatorPlan other) throws FrontendException {
         if (other == null || !(other instanceof LogicalPlan)) {
             return false;
         }
-        
-        return super.isEqual(other);   
+
+        return super.isEqual(other);
     }
-    
+
     @Override
-    public void explain(PrintStream ps, String format, boolean verbose) 
+    public void explain(PrintStream ps, String format, boolean verbose)
     throws FrontendException {
         ps.println("#-----------------------------------------------");
         ps.println("# New Logical Plan:");
@@ -98,7 +101,7 @@
     	        ops.add( op );
     	    }
     	}
-    	
+
     	if( ops.isEmpty() ) {
             return null;
     	} else {
@@ -109,18 +112,21 @@
     /**
      * Returns the signature of the LogicalPlan. The signature is a unique identifier for a given
      * plan generated by a Pig script. The same script run multiple times with the same version of
-     * Pig is guarenteed to produce the same signature, even if the input or output locations differ.
+     * Pig is guaranteed to produce the same signature, even if the input or output locations differ.
      *
      * @return a unique identifier for the logical plan
      * @throws FrontendException if signature can't be computed
      */
     public String getSignature() throws FrontendException {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        PrintStream ps = new PrintStream(baos);
+
+        // Use a streaming hash function. goodFastHash(32) is murmur3 32 bits
+        HashFunction hf = Hashing.goodFastHash(32);
+        HashOutputStream hos = new HashOutputStream(hf);
+        PrintStream ps = new PrintStream(hos);
 
         LogicalPlanPrinter printer = new LogicalPlanPrinter(this, ps);
         printer.visit();
 
-        return Integer.toString(baos.toString().hashCode());
+        return Integer.toString(hos.getHashCode().asInt());
     }
 }