PIG-3417: Job fails when skewed join is done on tuple key (nkollar via rohini)
git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1775168 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index b2be651..f75cf34 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -171,6 +171,8 @@
BUG FIXES
+PIG-3417: Job fails when skewed join is done on tuple key (nkollar via rohini)
+
PIG-5074: Build broken when hadoopversion=20 in branch 0.16 (szita via daijy)
PIG-5064: NPE in TestScriptUDF#testPythonBuiltinModuleImport1 when JAVA_HOME is not set (water via daijy)
diff --git a/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java b/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
index 193905f..ba3ad52 100644
--- a/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
+++ b/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
@@ -2432,7 +2432,7 @@
}else{
for(int i=0; i<transformPlans.size(); i++) {
eps1.add(transformPlans.get(i));
- flat1.add(true);
+ flat1.add(i == transformPlans.size() - 1 ? true : false);
}
}
diff --git a/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java b/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
index 9395749..6dc118f 100644
--- a/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
+++ b/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
@@ -1507,7 +1507,7 @@
for (int i=0; i<transformPlans.size(); i++) {
eps1.add(transformPlans.get(i));
- flat1.add(true);
+ flat1.add(i == transformPlans.size() - 1 ? true : false);
}
// This foreach will pick the sort key columns from the POPoissonSample output
diff --git a/test/e2e/pig/tests/nightly.conf b/test/e2e/pig/tests/nightly.conf
index c8017d8..4cc12bf 100644
--- a/test/e2e/pig/tests/nightly.conf
+++ b/test/e2e/pig/tests/nightly.conf
@@ -3186,6 +3186,23 @@
store e into ':OUTPATH:';\,
},
+ # skew join with tuple key
+ {
+ 'num' => 15,
+ 'java_params' => ['-Dpig.skewedjoin.reduce.maxtuple=100'],
+ 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
+b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+c = group a by (name, age);
+d = group b by (name, age);
+e = join c by $0, d by $0 using 'skewed' parallel 5;
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
+b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+c = group a by (name, age);
+d = group b by (name, age);
+e = join c by $0, d by $0;
+store e into ':OUTPATH:';\
+ }
]
},
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
index b269663..4886e9f 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
@@ -18,7 +18,7 @@
| | |
| | Constant(DummyVal) - scope-32
| |
-| |---New For Each(true,true)[tuple] - scope-37
+| |---New For Each(false,true)[tuple] - scope-37
| | |
| | Project[int][0] - scope-21
| | |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
index c35dab6..88ea95a 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
@@ -16,7 +16,7 @@
| |
| Constant(DummyVal) - scope-30
|
-|---New For Each(true,true)[tuple] - scope-41
+|---New For Each(false,true)[tuple] - scope-41
| |
| Project[int][0] - scope-16
| |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
index 642b6b9..1bcadb5 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
@@ -16,7 +16,7 @@
| |
| Constant(DummyVal) - scope-34
|
-|---New For Each(true,true)[tuple] - scope-39
+|---New For Each(false,true)[tuple] - scope-39
| |
| Project[int][0] - scope-20
| |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
index a2f964e..70d3782 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
@@ -118,7 +118,7 @@
| | |
| | Constant(DummyVal) - scope-112
| |
-| |---New For Each(true,true)[tuple] - scope-117
+| |---New For Each(false,true)[tuple] - scope-117
| | |
| | Project[int][0] - scope-73
| | |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
index e917568..a157a69 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
@@ -24,7 +24,7 @@
| | | | |
| | | | Constant(DummyVal) - scope-174
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-180
+| | | |---New For Each(false,true)[tuple] - scope-180
| | | | |
| | | | Project[int][0] - scope-177
| | | | |
@@ -52,7 +52,7 @@
| | | | |
| | | | Constant(DummyVal) - scope-186
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-192
+| | | |---New For Each(false,true)[tuple] - scope-192
| | | | |
| | | | Project[int][0] - scope-189
| | | | |
@@ -90,7 +90,7 @@
| | | | |
| | | | Constant(DummyVal) - scope-201
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-207
+| | | |---New For Each(false,true)[tuple] - scope-207
| | | | |
| | | | Project[int][0] - scope-204
| | | | |
@@ -124,7 +124,7 @@
| | | | |
| | | | Constant(DummyVal) - scope-216
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-222
+| | | |---New For Each(false,true)[tuple] - scope-222
| | | | |
| | | | Project[int][0] - scope-219
| | | | |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
index f1cfec4..f05fe3b 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
@@ -50,7 +50,7 @@
| |
| Constant(DummyVal) - scope-42
|
-|---New For Each(true,true)[tuple] - scope-47
+|---New For Each(false,true)[tuple] - scope-47
| |
| Project[int][0] - scope-26
| |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
index 654db21..1de2990 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
@@ -18,7 +18,7 @@
| | |
| | Constant(DummyVal) - scope-72
| |
-| |---New For Each(true,true)[tuple] - scope-78
+| |---New For Each(false,true)[tuple] - scope-78
| | |
| | Project[int][0] - scope-75
| | |
@@ -36,7 +36,7 @@
| | |
| | Constant(DummyVal) - scope-82
| |
-| |---New For Each(true,true)[tuple] - scope-88
+| |---New For Each(false,true)[tuple] - scope-88
| | |
| | Project[int][0] - scope-85
| | |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
index a4fe3c0..1eebe86 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
@@ -18,7 +18,7 @@
| |
| Constant(DummyVal) - scope-42
|
-|---New For Each(true,true)[tuple] - scope-53
+|---New For Each(false,true)[tuple] - scope-53
| |
| Project[int][0] - scope-26
| |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
index 9901615..7587457 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
@@ -16,7 +16,7 @@
| |
| Constant(DummyVal) - scope-42
|
-|---New For Each(true,true)[tuple] - scope-53
+|---New For Each(false,true)[tuple] - scope-53
| |
| Project[int][0] - scope-26
| |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
index 184737e..1918efb 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
@@ -48,7 +48,7 @@
| |
| Constant(DummyVal) - scope-122
|
-|---New For Each(true,true)[tuple] - scope-127
+|---New For Each(false,true)[tuple] - scope-127
| |
| Project[int][0] - scope-110
| |
diff --git a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
index a1b6c27..d6feb80 100644
--- a/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
+++ b/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
@@ -19,7 +19,7 @@
| |
| Constant(DummyVal) - scope-68
|
-|---New For Each(true,true)[tuple] - scope-74
+|---New For Each(false,true)[tuple] - scope-74
| |
| Project[int][0] - scope-71
| |
@@ -50,7 +50,7 @@
| |
| Constant(DummyVal) - scope-78
|
-|---New For Each(true,true)[tuple] - scope-84
+|---New For Each(false,true)[tuple] - scope-84
| |
| Project[int][0] - scope-81
| |