PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)
git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1778363 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index e245c6c..83c0cc7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -175,6 +175,8 @@
BUG FIXES
+PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)
+
PIG-5043: Slowstart not applied in Tez with PARALLEL clause (rohini)
PIG-4930: Skewed Join Breaks On Empty Sampled Input When Key is From Map (nkollar via rohini)
diff --git a/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java b/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
index ded3e86..2446836 100644
--- a/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
+++ b/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
@@ -17,8 +17,6 @@
*/
package org.apache.pig.backend.hadoop.executionengine.tez.runtime;
-import java.util.Map;
-
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.pig.data.DataBag;
@@ -44,13 +42,10 @@
if (o != null) {
// Skip computing hashcode for bags.
// Order of elements in the map/bag may be different on each run
+ // Can't even include size as some DataBag implementations
+ // iterate through all elements in the bag to get the size.
if (o instanceof DataBag) {
hash = 31 * hash;
- } else if (o instanceof Map) {
- // Including size of map as it is easily available
- // Not doing for DataBag as some implementations actually
- // iterate through all elements in the bag to get the size.
- hash = 31 * hash + ((Map) o).size();
} else {
hash = 31 * hash + o.hashCode();
}