PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)
git-svn-id: https://svn.apache.org/repos/asf/pig/branches/branch-0.11@1524465 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 34c7bd9..70252d0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -32,6 +32,8 @@
BUG FIXES
+PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)
+
PIG-3435: Custom Partitioner not working with MultiQueryOptimizer (knoguchi via daijy)
PIG-3385: DISTINCT no longer uses custom partitioner (knoguchi via daijy)
diff --git a/src/org/apache/pig/impl/util/HashOutputStream.java b/src/org/apache/pig/impl/util/HashOutputStream.java
new file mode 100644
index 0000000..53aad0d
--- /dev/null
+++ b/src/org/apache/pig/impl/util/HashOutputStream.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hasher;
+
+public class HashOutputStream extends OutputStream {
+
+ private Hasher hasher;
+
+ public HashOutputStream(HashFunction hf) {
+ hasher = hf.newHasher();
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ hasher.putInt(b);
+ }
+
+ public HashCode getHashCode() {
+ return hasher.hash();
+ }
+
+}
diff --git a/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java b/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
index 3c0feed..da4df3a 100644
--- a/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
+++ b/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
@@ -18,40 +18,43 @@
package org.apache.pig.newplan.logical.relational;
-import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.util.HashOutputStream;
import org.apache.pig.newplan.BaseOperatorPlan;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.logical.DotLOPrinter;
import org.apache.pig.newplan.logical.optimizer.LogicalPlanPrinter;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
/**
- * LogicalPlan is the logical view of relational operations Pig will execute
+ * LogicalPlan is the logical view of relational operations Pig will execute
* for a given script. Note that it contains only relational operations.
* All expressions will be contained in LogicalExpressionPlans inside
* each relational operator.
*/
public class LogicalPlan extends BaseOperatorPlan {
-
+
public LogicalPlan(LogicalPlan other) {
// shallow copy constructor
super(other);
}
-
+
public LogicalPlan() {
super();
}
-
+
/**
* Equality is checked by calling equals on every leaf in the plan. This
- * assumes that plans are always connected graphs. It is somewhat
- * inefficient since every leaf will test equality all the way to
+ * assumes that plans are always connected graphs. It is somewhat
+ * inefficient since every leaf will test equality all the way to
* every root. But it is only intended for use in testing, so that
* should be ok. Checking predecessors (as opposed to successors) was
* chosen because splits (which have multiple successors) do not depend
@@ -60,19 +63,19 @@
* graph has no correctness implications, whereas reversing the inputs
* of join can. This method of doing equals will detect predecessors
* in different orders but not successors in different orders.
- * It will return false if either plan has non deterministic EvalFunc.
+ * It will return false if either plan has non deterministic EvalFunc.
*/
@Override
public boolean isEqual(OperatorPlan other) throws FrontendException {
if (other == null || !(other instanceof LogicalPlan)) {
return false;
}
-
- return super.isEqual(other);
+
+ return super.isEqual(other);
}
-
+
@Override
- public void explain(PrintStream ps, String format, boolean verbose)
+ public void explain(PrintStream ps, String format, boolean verbose)
throws FrontendException {
ps.println("#-----------------------------------------------");
ps.println("# New Logical Plan:");
@@ -98,7 +101,7 @@
ops.add( op );
}
}
-
+
if( ops.isEmpty() ) {
return null;
} else {
@@ -109,18 +112,21 @@
/**
* Returns the signature of the LogicalPlan. The signature is a unique identifier for a given
* plan generated by a Pig script. The same script run multiple times with the same version of
- * Pig is guarenteed to produce the same signature, even if the input or output locations differ.
+ * Pig is guaranteed to produce the same signature, even if the input or output locations differ.
*
* @return a unique identifier for the logical plan
* @throws FrontendException if signature can't be computed
*/
public String getSignature() throws FrontendException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream ps = new PrintStream(baos);
+
+ // Use a streaming hash function. goodFastHash(32) is murmur3 32 bits
+ HashFunction hf = Hashing.goodFastHash(32);
+ HashOutputStream hos = new HashOutputStream(hf);
+ PrintStream ps = new PrintStream(hos);
LogicalPlanPrinter printer = new LogicalPlanPrinter(this, ps);
printer.visit();
- return Integer.toString(baos.toString().hashCode());
+ return Integer.toString(hos.getHashCode().asInt());
}
}