PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing (YaShock via szita)
git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1801161 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index af909ae..5320fd7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -24,6 +24,8 @@
IMPROVEMENTS
+PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing (YaShock via szita)
+
PIG-5269: MapReduceLauncher and MRJobStats imports org.python.google.common.collect.Lists instead of org.google.common.collect.Lists (nkollar via szita)
PIG-4700: Enable progress reporting for Tasks in Tez (satishsaley via rohini)
diff --git a/test/org/apache/pig/spark/TestSparkCompiler.java b/test/org/apache/pig/spark/TestSparkCompiler.java
index 15f77f6..89248a8 100644
--- a/test/org/apache/pig/spark/TestSparkCompiler.java
+++ b/test/org/apache/pig/spark/TestSparkCompiler.java
@@ -24,7 +24,6 @@
import java.io.PrintStream;
import java.util.Properties;
import java.util.Random;
-
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
@@ -44,13 +43,15 @@
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.test.Util;
import org.apache.pig.test.utils.TestHelper;
+import org.apache.pig.test.utils.dotGraph.DotGraph;
+import org.apache.pig.test.utils.dotGraph.DotGraphReader;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
/**
* Test cases to test the SparkCompiler. VERY IMPORTANT NOTE: The tests here
@@ -74,9 +75,8 @@
public void doPrint(PrintStream ps, SparkOperPlan plan) throws VisitorException, ParserConfigurationException, TransformerException {
switch (this) {
case DOT:
- throw new RuntimeException("Testing in DOT format not supported yet");
- //(new DotSparkPrinter(plan, ps)).dump();
- //break;
+ (new DotSparkPrinter(plan, ps)).dump();
+ break;
case XML:
XMLSparkPrinter printer = new XMLSparkPrinter(ps, plan);
printer.visit();
@@ -88,6 +88,19 @@
break;
}
}
+
+ public boolean compare(String goldenPlan, String compiledPlan) {
+ switch (this) {
+ case DOT:
+ DotGraph a = DotGraphReader.load(goldenPlan);
+ DotGraph b = DotGraphReader.load(compiledPlan);
+ return a.isomorphic(b);
+ case XML:
+ case TEXT:
+ default:
+ return TestHelper.sortUDFs(Util.removeSignature(goldenPlan)).equals(TestHelper.sortUDFs(Util.removeSignature(compiledPlan)));
+ }
+ }
}
// If for some reason, the golden files need to be regenerated, set this to
@@ -135,8 +148,7 @@
run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-text.gld", PlanPrinter.TEXT);
run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-xml.gld", PlanPrinter.XML);
- //TODO: enable this when DOT file comparison is supported
- //run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", PlanPrinter.DOT);
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", PlanPrinter.DOT);
}
private void run(String query, String expectedFile, PlanPrinter planPrinter) throws Exception {
@@ -174,8 +186,8 @@
String goldenPlanClean = Util.standardizeNewline(goldenPlan).trim();
String compiledPlanClean = Util.standardizeNewline(compiledPlan).trim();
- assertEquals(TestHelper.sortUDFs(Util.removeSignature(goldenPlanClean)),
- TestHelper.sortUDFs(Util.removeSignature(compiledPlanClean)));
+
+ assertTrue(planPrinter.compare(goldenPlanClean, compiledPlanClean));
}
}
diff --git a/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt b/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt
index 49b6d23..3546092 100644
--- a/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt
+++ b/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt
@@ -25,23 +25,31 @@
PARSER_BEGIN(DOTParser)
-package org.apache.pig.test.utils.dotGraph.parser ;
+package org.apache.pig.test.utils.dotGraph.parser;
-import java.util.*;
-import java.io.*;
-import org.apache.pig.test.utils.dotGraph.* ;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.apache.pig.test.utils.dotGraph.DotGraph;
+import org.apache.pig.test.utils.dotGraph.DotNode;
+import org.apache.pig.test.utils.dotGraph.DotEdge;
+import org.apache.pig.test.utils.dotGraph.DotGraphReader;
public class DOTParser {
+ public static Map<String, DotNode> nodeMap = new HashMap<String, DotNode>();
static String unquote(String s) {
return s.substring(1, s.length()-1);
}
-
- static class DotState {
- public Map<String,String> nodeAttributes = new HashMap<String,String>() ;
- public Map<String,String> edgeAttributes = new HashMap<String,String>() ;
- }
+ public static DotNode getNodeByName(String name) {
+ DotNode node = nodeMap.get(name);
+ if (node == null) {
+ node = new DotNode(name);
+ nodeMap.put(name, node);
+ }
+ return node;
+ }
}
PARSER_END(DOTParser)
@@ -96,95 +104,132 @@
| <NODE: "node">
| <GRAPH: "graph">
| <DIGRAPH : "digraph">
+ | <SUBGRAPH : "subgraph">
| <#LETTER : ["a"-"z", "A"-"Z"] >
| <#DIGIT : ["0"-"9"] >
| <#SPECIAL_CHAR : "_" | "$" >
| <NAME : <LETTER> ( <LETTER> | <DIGIT> | <SPECIAL_CHAR> )* >
+ | <NUMBER : ( <DIGIT> )+ >
| <QUOTEDSTRING : "\"" (~["\""])* "\"">
}
-
DotGraph Parse() :
{
DotGraph dotGraph = null ;
- DotState dotState = new DotState() ;
Token graphName ;
}
{
(
<DIGRAPH>
- graphName = <NAME> { dotGraph = new DotGraph(graphName.image) ; }
+ (graphName = <NAME> { dotGraph = new DotGraph(graphName.image); dotGraph.topLevel = true; })?
<LPAREN>
-
- ( LOOKAHEAD(2)
- EdgeStatement(dotGraph, dotState)
- | NodeStatement(dotGraph, dotState)
- | AttributeStatement(dotGraph, dotState)
- )+
-
+ StatementList(dotGraph)
<RPAREN>
)
{ return dotGraph ; }
}
-void AttributeStatement(DotGraph dotGraph, DotState dotState) :
+void StatementList(DotGraph dotGraph) :
+{
+ ;
+}
+{
+ (
+ Statement(dotGraph)
+ )+
+}
+
+void Statement(DotGraph dotGraph) :
+{
+ String[] attr;
+ DotGraph subGraph;
+}
+{
+ (
+ LOOKAHEAD(2) EdgeStatement(dotGraph)
+ | AttributeStatement(dotGraph)
+ | LOOKAHEAD(2) ( attr = Attribute() ) { dotGraph.attributes.put(attr[0], attr[1]); }
+ | LOOKAHEAD(2) NodeStatement(dotGraph)
+ | subGraph = SubGraph() { dotGraph.nodes.add(subGraph); }
+ )
+ ( <SEMICOLON> )?
+}
+
+DotGraph SubGraph() :
+{
+ DotGraph dotGraph = null ;
+ Token graphName ;
+}
+{
+ <SUBGRAPH>
+ graphName = <NAME> { dotGraph = new DotGraph(graphName.image); }
+ <LPAREN>
+ StatementList(dotGraph)
+ <RPAREN>
+ { return dotGraph; }
+}
+
+void AttributeStatement(DotGraph dotGraph) :
{
Map<String,String> attributes ;
}
{
(
- ( <EDGE> attributes = AttributeList() { dotState.edgeAttributes = attributes ; } )
- | ( <NODE> attributes = AttributeList() { dotState.nodeAttributes = attributes ; } )
+ ( <EDGE> attributes = AttributeList() { dotGraph.edgeAttributes = attributes; } )
+ | ( <NODE> attributes = AttributeList() { dotGraph.nodeAttributes = attributes; } )
| ( <GRAPH> attributes = AttributeList() { dotGraph.attributes = attributes ; } )
)
- <SEMICOLON>
}
-void NodeStatement(DotGraph dotGraph, DotState dotState) :
+void NodeStatement(DotGraph dotGraph) :
{
- Token nodeName ;
- DotNode node = new DotNode() ;
+ String nodeName ;
+ DotNode node ;
Map<String,String> attributes ;
}
{
- nodeName = <NAME> { node.name = nodeName.image ; }
+ ( nodeName = NodeName() ) { node = getNodeByName(nodeName); }
( attributes = AttributeList() {
node.attributes = new HashMap<String,String>() ;
- if (dotState != null) {
- node.attributes.putAll(dotState.nodeAttributes) ;
- }
node.attributes.putAll(attributes) ;
}
)?
- <SEMICOLON>
{ dotGraph.nodes.add(node) ; }
}
-void EdgeStatement(DotGraph dotGraph, DotState dotState) :
+void EdgeStatement(DotGraph dotGraph) :
{
- Token nodeName1 ;
- Token nodeName2 ;
- String startingNode ;
- DotNode node = new DotNode() ;
+ String nodeName1 ;
+ String nodeName2 ;
+ DotNode startingNode ;
Map<String,String> attributes ;
+ DotEdge edge = new DotEdge() ;
}
{
- nodeName1 = <NAME> { startingNode = nodeName1.image ; }
+ nodeName1 = NodeName() { startingNode = getNodeByName(nodeName1) ; }
(
<DIRECTED_EDGE>
- nodeName2 = <NAME>
+ nodeName2 = NodeName()
{
- DotEdge edge = new DotEdge() ;
edge.fromNode = startingNode ;
- edge.toNode = nodeName2.image ;
+ DotNode node2 = getNodeByName(nodeName2) ;
+ edge.toNode = node2 ;
+ dotGraph.nodes.add(startingNode);
+ dotGraph.nodes.add(node2);
dotGraph.edges.add(edge) ;
- startingNode = nodeName2.image ;
+ if (startingNode != node2)
+ startingNode.edgeTo.add(node2);
+
+ startingNode = node2 ;
}
)+
- <SEMICOLON>
-
+ ( attributes = AttributeList() {
+ edge.attributes = new HashMap<String,String>() ;
+ edge.attributes.putAll(attributes) ;
+ }
+ )?
}
Map<String,String> AttributeList() :
@@ -208,14 +253,39 @@
String[] Attribute() :
{
Token attName ;
- Token value ;
+ String value ;
String[] keyValuePair = new String[2] ;
}
{
(
attName = <NAME> { keyValuePair[0] = attName.image ; }
<EQUAL>
- value = <QUOTEDSTRING> { keyValuePair[1] = unquote(value.image) ; }
+ value = Value() { keyValuePair[1] = value; }
)
{ return keyValuePair ; }
}
+
+String Value() :
+{
+ Token value;
+}
+{
+ (
+ value = <QUOTEDSTRING>
+ | value = <NAME>
+ | value = <NUMBER>
+ )
+ { return value.image; }
+}
+
+String NodeName() :
+{
+ Token name ;
+}
+{
+ (
+ name = <NAME>
+ | name = <NUMBER>
+ )
+ { return name.image; }
+}
diff --git a/test/org/apache/pig/test/utils/dotGraph/DotEdge.java b/test/org/apache/pig/test/utils/dotGraph/DotEdge.java
index eca2f31..46a17e0 100644
--- a/test/org/apache/pig/test/utils/dotGraph/DotEdge.java
+++ b/test/org/apache/pig/test/utils/dotGraph/DotEdge.java
@@ -18,11 +18,46 @@
package org.apache.pig.test.utils.dotGraph;
+import java.util.HashMap;
+import java.util.Map;
+
/**
* This represents an edge in DOT format.
* An edge in DOT can have attributes but we're not interested
*/
public class DotEdge {
- public String fromNode ;
- public String toNode ;
-}
+ public DotNode fromNode;
+ public DotNode toNode;
+ public Map<String, String> attributes = new HashMap<String, String>();
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(fromNode.name + " -> " + toNode.name);
+ if (attributes.size() > 0) {
+ int index = 0;
+ sb.append(" [");
+ for (Map.Entry<String, String> attr : attributes.entrySet()) {
+ sb.append(attr.getKey() + "=" + attr.getValue());
+ if (index < attributes.size() - 1)
+ sb.append(", ");
+ index++;
+ }
+ sb.append("]");
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other instanceof DotEdge) {
+ DotEdge edge = (DotEdge) other;
+ return fromNode.equals(edge.fromNode) && toNode.equals(edge.toNode);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return fromNode.hashCode() * toNode.hashCode();
+ }
+}
\ No newline at end of file
diff --git a/test/org/apache/pig/test/utils/dotGraph/DotGraph.java b/test/org/apache/pig/test/utils/dotGraph/DotGraph.java
index 2013562..8b8a2fb 100644
--- a/test/org/apache/pig/test/utils/dotGraph/DotGraph.java
+++ b/test/org/apache/pig/test/utils/dotGraph/DotGraph.java
@@ -18,24 +18,137 @@
package org.apache.pig.test.utils.dotGraph;
-import java.util.List;
import java.util.ArrayList;
-import java.util.Map;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
/***
* This represents graph structure in DOT format
*/
-public class DotGraph {
-
- public String name;
- public List<DotEdge> edges = new ArrayList<DotEdge>() ;
- public List<DotNode> nodes = new ArrayList<DotNode>() ;
- public Map<String, String> attributes = new HashMap<String,String>() ;
-
+public class DotGraph extends DotNode {
+ public boolean topLevel = false;
+ public Set<DotEdge> edges = new HashSet<DotEdge>();
+ public Set<DotNode> nodes = new HashSet<DotNode>();
+ public Map<String, String> edgeAttributes = new HashMap<String, String>();
+ public Map<String, String> nodeAttributes = new HashMap<String, String>();
public DotGraph(String name) {
- this.name = name ;
+ super(name);
}
-}
+ @Override
+ public String toString() {
+ String graphType = topLevel ? "digraph " : "subgraph ";
+ StringBuilder sb = new StringBuilder(graphType);
+ sb.append(name);
+ sb.append("{\n");
+
+ for (Map.Entry<String, String> attr : attributes.entrySet())
+ sb.append(attr.getKey() + "=" + attr.getValue() + ";\n");
+
+ if (nodeAttributes.size() > 0) {
+ int index = 0;
+ sb.append("node [");
+ for (Map.Entry<String, String> attr : nodeAttributes.entrySet()) {
+ sb.append(attr.getKey() + "=" + attr.getValue());
+ if (index < nodeAttributes.size() - 1)
+ sb.append(", ");
+ index++;
+ }
+ sb.append("];\n");
+ }
+
+ if (edgeAttributes.size() > 0) {
+ int index = 0;
+ sb.append("edge [");
+ for (Map.Entry<String, String> attr : edgeAttributes.entrySet()) {
+ sb.append(attr.getKey() + "=" + attr.getValue());
+ if (index < edgeAttributes.size() - 1)
+ sb.append(", ");
+ index++;
+ }
+ sb.append("];\n");
+ }
+
+ for (DotNode node : nodes)
+ sb.append(node.toString() + ";\n");
+
+ for (DotEdge edge : edges)
+ sb.append(edge.toString() + ";\n");
+
+ sb.append("}");
+ return sb.toString();
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other != null && other instanceof DotGraph) {
+ DotGraph graph = (DotGraph) other;
+ return graph.getLabel().equals(getLabel()) && edges.equals(graph.edges) && nodes.equals(graph.nodes);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return getLabel().hashCode() * edgeAttributes.hashCode() * nodeAttributes.hashCode();
+ }
+
+ private Set<DotNode> getRootNodes() {
+ Set<DotNode> roots = new HashSet<DotNode>(nodes);
+
+ for (DotEdge edge : edges)
+ roots.remove(edge.toNode);
+
+ return roots;
+ }
+
+ public boolean isomorphic(DotNode other) {
+ if (other instanceof DotGraph) {
+ DotGraph graph = (DotGraph) other;
+ return graph.getLabel().equals(getLabel()) && graph.getCanonicalName().equals(getCanonicalName());
+ }
+ return false;
+ }
+
+ @Override
+ public String getCanonicalName() {
+ StringBuilder sb = new StringBuilder("");
+ ArrayList<String> children = new ArrayList<>();
+
+ Set<DotNode> roots = getRootNodes();
+
+ for (DotNode root : roots)
+ children.add(root.getCanonicalName());
+
+ Collections.sort(children);
+ for (String nodeName : children)
+ sb.append(nodeName);
+
+ sb.insert(0, '#');
+ sb.insert(sb.length(), '#');
+ return sb.toString();
+ }
+
+ @Override
+ public String getLabel() {
+ StringBuilder label = new StringBuilder(super.getLabel());
+ ArrayList<String> children = new ArrayList<>();
+
+ Set<DotNode> roots = getRootNodes();
+
+ for (DotNode root : roots)
+ children.add(root.getLabel());
+
+ Collections.sort(children);
+ for (String nodeLabel : children)
+ label.append(nodeLabel);
+
+ label.insert(0, '\"');
+ label.insert(label.length(), '\"');
+ return label.toString();
+ }
+}
\ No newline at end of file
diff --git a/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java b/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java
index c1cb6ae..cb9743f 100644
--- a/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java
+++ b/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java
@@ -18,11 +18,15 @@
package org.apache.pig.test.utils.dotGraph;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+
import org.apache.pig.test.utils.dotGraph.parser.DOTParser;
import org.apache.pig.test.utils.dotGraph.parser.ParseException;
-import java.io.*;
-
/***
* This class is responsible for loading textual Dot graph
* into object representation.
@@ -36,19 +40,18 @@
* @return graph
*/
- public DotGraph load(String dotContent) {
+ static public DotGraph load(String dotContent) {
ByteArrayInputStream stream
- = new ByteArrayInputStream(dotContent.getBytes()) ;
- DOTParser dotParser = new DOTParser(stream) ;
- DotGraph graph = null ;
+ = new ByteArrayInputStream(dotContent.getBytes());
+ DOTParser dotParser = new DOTParser(stream);
+ DotGraph graph = null;
try {
- graph = dotParser.Parse() ;
+ graph = dotParser.Parse();
+ } catch (ParseException pe) {
+ System.out.println(pe.getMessage());
+ throw new RuntimeException("Bad Dot file");
}
- catch (ParseException pe) {
- System.out.println(pe.getMessage()) ;
- throw new RuntimeException("Bad Dot file") ;
- }
- return graph ;
+ return graph;
}
/***
@@ -57,24 +60,22 @@
* @return graph
*/
- public DotGraph loadFromFile(String file) {
- StringBuilder sb = new StringBuilder() ;
- BufferedReader br = null ;
+ static public DotGraph loadFromFile(String file) {
+ StringBuilder sb = new StringBuilder();
+ BufferedReader br = null;
try {
- br = new BufferedReader(new FileReader(file)) ;
- String str ;
- while((str=br.readLine())!=null) {
- sb.append(str) ;
- sb.append("\n") ;
+ br = new BufferedReader(new FileReader(file));
+ String str;
+ while ((str = br.readLine()) != null) {
+ sb.append(str);
+ sb.append("\n");
}
- }
- catch (FileNotFoundException fnfe) {
- throw new RuntimeException("file:" + file + " not found!") ;
- }
- catch (IOException ioe) {
- throw new RuntimeException("Error while reading from:" + file) ;
+ } catch (FileNotFoundException fnfe) {
+ throw new RuntimeException("file:" + file + " not found!");
+ } catch (IOException ioe) {
+ throw new RuntimeException("Error while reading from:" + file);
}
- return load(sb.toString()) ;
+ return load(sb.toString());
}
}
diff --git a/test/org/apache/pig/test/utils/dotGraph/DotNode.java b/test/org/apache/pig/test/utils/dotGraph/DotNode.java
index 2d3ace6..5008f68 100644
--- a/test/org/apache/pig/test/utils/dotGraph/DotNode.java
+++ b/test/org/apache/pig/test/utils/dotGraph/DotNode.java
@@ -18,15 +18,89 @@
package org.apache.pig.test.utils.dotGraph;
-import java.util.Map;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
/***
* This represents a node in DOT format
*/
public class DotNode {
+ public String name;
+ public Map<String, String> attributes = new HashMap<String, String>();
+ public Set<DotNode> edgeTo = new HashSet<DotNode>();
- public String name ;
- public Map<String, String> attributes = new HashMap<String,String>() ;
+ public DotNode(String name) {
+ this.name = name;
+ }
-}
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(name);
+ if (attributes.size() > 0) {
+ int index = 0;
+ sb.append(" [");
+ for (Map.Entry<String, String> attr : attributes.entrySet()) {
+ sb.append(attr.getKey() + "=" + attr.getValue());
+ if (index < attributes.size() - 1)
+ sb.append(", ");
+ index++;
+ }
+ sb.append("]");
+ }
+ return sb.toString();
+ }
+
+ public String getLabel() {
+ String label = "";
+ for (Map.Entry<String, String> attr : attributes.entrySet()) {
+ if (attr.getKey().equals("label")) {
+ label = attr.getValue();
+ break;
+ }
+ }
+ return label;
+ }
+
+ public boolean isInvisStyle() {
+ for (Map.Entry<String, String> attr : attributes.entrySet())
+ if (attr.getKey().equals("style") && attr.getValue().equals("invis"))
+ return true;
+
+ return false;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other instanceof DotNode && !(other instanceof DotGraph)) {
+ DotNode node = (DotNode) other;
+ return name.equals(node.name);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return name.hashCode();
+ }
+
+ public String getCanonicalName() {
+ StringBuilder sb = new StringBuilder("");
+ ArrayList<String> canonicalNames = new ArrayList<>();
+
+ for (DotNode node : edgeTo)
+ if (!node.isInvisStyle())
+ canonicalNames.add(node.getCanonicalName());
+
+ Collections.sort(canonicalNames);
+ for (String nodeName : canonicalNames)
+ sb.append(nodeName);
+
+ sb.insert(0, '0');
+ sb.insert(sb.length(), '1');
+ return sb.toString();
+ }
+}
\ No newline at end of file