| #!/usr/bin/env perl |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| use Pod::Usage; |
| use Getopt::Long; |
| use Data::Dumper; |
| use strict; |
| use warnings; |
| |
| =head1 NAME |
| |
| B<explain.pl> - parse and reformat Postgres EXPLAIN output |
| |
| =head1 SYNOPSIS |
| |
| B<explain> [options] filename |
| |
| Options: |
| |
| -help brief help message |
| -man full documentation |
| -option formatting option: perl, yaml, dot, query, jpg, json |
| -querylist list of queries |
| -direction direction of query plan graph: LR, RL, TB or BT. |
| -colorscheme graph color scheme |
| -timeline rank nodes by start offset time (experimental) |
| -prune prune tree attributes |
| -output output filename (else output to STDOUT). |
| -statcolor statistics coloring (experimental) |
| -edge edge decorations |
| |
| =head1 OPTIONS |
| |
| =over 8 |
| |
| =item B<-help> |
| |
| Print a brief help message and exits. |
| |
| =item B<-man> |
| |
| Prints the manual page and exits. |
| |
| =item B<-option> |
| |
| Choose the output format option. Several formats are supported. |
| |
| =over 12 |
| |
| =item perl: output in perl L<Data::Dumper> format. |
| |
| =item yaml: output in L<yaml.org> machine and human-readable format |
| |
| =item dot: output in dot graphical language for L<graphiz.org> graphing tool. |
| |
| =item querytext: output the text of the query (only useful for TPC-H) |
| |
| =item jpg: pipe the dot output thru the dot formatter (if it is installed) to get jpg output directly. (May also support bmp, ps, pdf, png) |
| |
| =item json: output in L<json.org> machine and human-readable format |
| |
| =back |
| |
| |
| =item B<-querylist> |
| |
| A list of queries to process. The query numbering is 1-based. Some |
| valid forms are: |
| |
| -querylist 1 |
| -querylist=2 |
| --ql=3,4,5 |
| --ql=6-9 |
| |
| or some combination. By default, all queries are processed. |
| |
| |
| =item B<-direction> |
| |
| Direction of data flow in query plan graph. Valid entries are: |
| |
| =over 12 |
| |
| =item BT (default): bottom to top |
| |
| =item TB: top to bottom |
| |
| =item LR: left to right |
| |
| =item RL: right to left |
| |
| =back |
| |
| =item B<-colorscheme> |
| |
| One of the supported ColorBrewer(TM) color schemes. Use |
| -color ? |
| to get a list of the valid schemes, and |
| -color dump |
| to output a dot file displaying all the valid schemes. |
| |
| Colors from www.ColorBrewer.org by Cynthia A. Brewer, Geography, |
| Pennsylvania State University. |
| |
| =item B<-prune> |
| |
| Prune tree attributes. The only supported option is "stats" to |
| remove the to_end and to_first timing information. |
| |
| =item B<-output> |
| |
| Output file name. If multiple queries are processed, the filename |
| is used as a template to generate multiple files. If the filename |
| has an extension, it is preserved, else an extension is supplied |
| based upon the formatting option. The filename template inserts |
| the query number before the "dot" (.) if more than one query was |
| processed. |
| |
| =item B<-statcolor> |
| |
| For an EXPLAIN ANALYZE plan, color according to the time spent in |
| node. Red is greatest, and blue is least. For statcolor=ts (default), |
| the node edge is colored by time, and the node interior is filled |
| by slice color. For statcolor=st, the color scheme is reversed. |
| For statcolor=t (timing only), the entire node is colored according |
| to the time spent. |
| |
| =item B<-edge> |
| |
| Decorate graph edges with row count if available. Valid entries are: |
| |
| =over 12 |
| |
| =item long - print average rows and number of workers |
| |
| =item medium - print average rows and number of workers compactly |
| |
| =item short - print total row counts |
| |
| =back |
| |
| |
| =back |
| |
| =head1 DESCRIPTION |
| |
| explain.pl reads EXPLAIN output from a text file (or standard |
| input) and formats it in several ways. The text file must contain |
| output in one of the the following formats. The first is a regular |
| EXPLAIN format, starting the the QUERY PLAN header and ending with the |
| number of rows in parentheses. Indenting must be on: |
| |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------- |
| Gather Motion 64:1 (slice2) (cost=6007722.78..6007722.79 rows=6 width=51) |
| Merge Key: partial_aggregation.l_returnflag, partial_aggregation.l_linestatus |
| -> Sort (cost=6007722.78..6007722.79 rows=6 width=51) |
| Sort Key: partial_aggregation.l_returnflag, partial_aggregation.l_linestatus |
| -> HashAggregate (cost=6007722.52..6007722.70 rows=6 width=51) |
| Group By: lineitem.l_returnflag, lineitem.l_linestatus |
| -> Redistribute Motion 64:64 (slice1) (cost=6007721.92..6007722.31 rows=6 width=51) |
| Hash Key: lineitem.l_returnflag, lineitem.l_linestatus |
| -> HashAggregate (cost=6007721.92..6007722.19 rows=6 width=51) |
| Group By: lineitem.l_returnflag, lineitem.l_linestatus |
| -> Seq Scan on lineitem (cost=0.00..3693046.50 rows=92587017 width=51) |
| Filter: l_shipdate <= '1998-09-08 00:00:00'::timestamp without time zone |
| (12 rows) |
| |
| |
| The second acceptable format is the TPC-H EXPLAIN ANALYZE, listing |
| each query followed by the EXPLAIN output delineated by vertical bars |
| ('|', e.g. |QUERY PLAN| ): |
| |
| EXPLAIN ANALYZE |
| |
| |
| select |
| l_returnflag, |
| l_linestatus, |
| sum(l_quantity) as sum_qty, |
| sum(l_extendedprice) as sum_base_price, |
| sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, |
| sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, |
| avg(l_quantity) as avg_qty, |
| avg(l_extendedprice) as avg_price, |
| avg(l_discount) as avg_disc, |
| count(*) as count_order |
| from |
| lineitem |
| where |
| l_shipdate <= date '1998-12-01' - interval '106 day' |
| group by |
| l_returnflag, |
| l_linestatus |
| order by |
| l_returnflag, |
| l_linestatus; |
| |
| |
| Query 1 complete, 19 rows returned |
| |
| |
| |QUERY PLAN| |
| |Gather Motion 64:1 (slice2) (cost=5990545.19..5990545.21 rows=6 width=51)| |
| | recv: Total 4 rows with 1294937 ms to end.| |
| | Merge Key: partial_aggregation.junk_attr_1, partial_aggregation.junk_attr_2| |
| | -> Sort (cost=5990545.19..5990545.21 rows=6 width=51)| |
| | Avg 1.00 rows x 4 workers. Max 1 rows (seg49) with 1294938 ms to end.| |
| | Sort Key: partial_aggregation.junk_attr_1, partial_aggregation.junk_attr_2| |
| | -> HashAggregate (cost=5990544.94..5990545.12 rows=6 width=51)| |
| | Avg 1.00 rows x 4 workers. Max 1 rows (seg49) with 1294933 ms to end.| |
| | Group By: lineitem.l_returnflag, lineitem.l_linestatus| |
| | -> Redistribute Motion 64:64 (slice1) (cost=5990544.34..5990544.73 rows=6 width=51)| |
| | recv: Avg 64.00 rows x 4 workers. Max 64 rows (seg49) with 1277197 ms to first row, 1294424 ms to end.| |
| | Hash Key: lineitem.l_returnflag, lineitem.l_linestatus| |
| | -> HashAggregate (cost=5990544.34..5990544.61 rows=6 width=51)| |
| | Avg 4.00 rows x 64 workers. Max 4 rows (seg44) with 1292222 ms to end.| |
| | Group By: lineitem.l_returnflag, lineitem.l_linestatus| |
| | -> Seq Scan on lineitem (cost=0.00..3693046.50 rows=91899913 width=51)| |
| | Avg 91914578.95 rows x 64 workers. Max 91914598 rows (seg13) with 14.694 ms to first row, 258614 ms to end.| |
| | Filter: l_shipdate <= '1998-08-17 00:00:00'::timestamp without time zone| |
| |1295317.560 ms elapsed| |
| |
| Time was 1295.33 seconds. Query ended at Thu Oct 12 12:09:27 2006 |
| |
| =head1 CAVEATS/LIMITATIONS |
| |
| If explain.pl uses Graphviz to graph the query plan, it may flip the |
| left and right children of a join to obtain a more balanced pictorial |
| representation. Use the -edge option to label graph edges to |
| correctly identify the left and right children. |
| |
| =head1 AUTHORS |
| |
| Apache HAWQ |
| |
| Address bug reports and comments to: dev@hawq.apache.org |
| |
| =cut |
| |
| # IMPLEMENTATION NOTES: |
| # |
| # EXPLAIN ANALYZE final statistics in analyze_node: |
| # |
| # The final statistics look like this: |
| # |
| # Slice statistics: |
| # (slice0) Executor memory: 472K bytes. |
| # (slice1) Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0). |
| # Settings: |
| # |
| # Total runtime: 52347.493 ms |
| # |
| # The "Settings" entry is optional (ie, it only exists if you change the |
| # settings in your session). If the "Settings" entry is missing explain.pl |
| # adds a dummy entry to the statistics. This technique is a bit easier |
| # than changing the parser to handle both cases. |
| # |
| # Parse_node: |
| # InitPlan entries in greenplum are in separate slices, so explain.pl |
| # prefixes them with an arrow (and adds a fake cost) to make them |
| # look like a top-level execution node. Again, this technique was |
| # easier than modifying the parser to special case InitPlan. |
| # |
| # Plan parsing in general: |
| # The original code only dealt with the TPCH formatted output: |
| # |QUERY PLAN| |
| # |Gather Motion 64:1 (slice2) (cost=5990545.19..5990545.21 rows=6 width=51)| |
| # | recv: Total 4 rows with 1294937 ms to end.| |
| # | Merge Key: partial_aggr.junk_attr_1, partial_aggr.junk_attr_2| |
| # | -> Sort (cost=5990545.19..5990545.21 rows=6 width=51)| |
| # | Avg 1.00 rows x 4 workers. Max 1 rows (seg49) with 1294 ms to end.| |
| # |
| # It was easier to modify the parser to wrap the input with missing bars |
| # than handle two cases (are you sensing a pattern here?). |
| # |
| # "Magic" Mode: |
| # This mode just adds an output filename option and constructs jpgs |
| # |
| # Output File Name: |
| # The guts of the formatting code always write to STDOUT, so this code |
| # resets STDOUT to the filename of choice. |
| # |
| # treemap: |
| # This routine applies a function over the entire parse tree |
| # |
| # OLAP fixups: |
| # OLAP queries have duplicate Shared Scan and Multi Slice Motion nodes. |
| # explain.pl only fixes them up for dot output, but not for yaml, perl, etc. |
| # The rationale is that dot handle digraphs nicely, but yaml and perl are |
| # more suitable for tree output. |
| # |
| |
| my $glob_id = ""; |
| |
| my $glob_optn; |
| my $glob_qlist; |
| my $glob_direction; |
| my $glob_timeline; |
| my $glob_prune; |
| my $glob_outi; |
| my $glob_statcolor; |
| my $glob_edge; |
| |
| my $GV_formats; # graphviz output formats |
| |
| my %glob_coltab; |
| |
| my %glob_divcol; |
| |
| my $glob_colorscheme; |
| BEGIN { |
| |
| $GV_formats = '^(jpg|bmp|ps|pdf|png)$'; |
| |
| # table of valid "qualitative" color schemes |
| |
| %glob_coltab = (set312 => [ |
| '#8DD3C7', |
| '#FFFFB3', |
| '#BEBADA', |
| '#FB8072', |
| '#80B1D3', |
| '#FDB462', |
| '#B3DE69', |
| '#FCCDE5', |
| '#D9D9D9', |
| '#BC80BD', |
| '#CCEBC5', |
| '#FFED6F' |
| ], |
| paired12 => [ |
| '#a6cee3', |
| '#1f78b4', |
| '#b2df8a', |
| '#33a02c', |
| '#fb9a99', |
| '#e31a1c', |
| '#fdbf6f', |
| '#ff7f00', |
| '#cab2d6', |
| '#6a3d9a', |
| '#ffff99', |
| '#b15928' |
| ], |
| pastel19 => [ |
| '#fbb4ae', |
| '#b3cde3', |
| '#ccebc5', |
| '#decbe4', |
| '#fed9a6', |
| '#ffffcc', |
| '#e5d8bd', |
| '#fddaec', |
| '#f2f2f2' |
| ], |
| pastel24 => [ |
| '#b3e2cd', |
| '#fdcdac', |
| '#cbd5e8', |
| '#f4cae4', |
| '#e6f5c9', |
| '#fff2ae', |
| '#f1e2cc', |
| '#cccccc' |
| ], |
| set19 => [ |
| '#e41a1c', |
| '#377eb8', |
| '#4daf4a', |
| '#984ea3', |
| '#ff7f00', |
| '#ffff33', |
| '#a65628', |
| '#f781bf', |
| '#999999' |
| ], |
| set28 => [ |
| '#66c2a5', |
| '#fc8d62', |
| '#8da0cb', |
| '#e78ac3', |
| '#a6d854', |
| '#ffd92f', |
| '#e5c494', |
| '#b3b3b3' |
| ], |
| original => [ |
| 'azure', |
| 'cornsilk', |
| 'lavender', |
| 'mintcream', |
| 'mistyrose', |
| 'lightgray', |
| 'salmon', |
| 'goldenrod', |
| 'cyan' |
| ], |
| ); |
| |
| # diverging color schemes |
| |
| %glob_divcol = ( |
| rdbu11 => [ |
| '#67001f', |
| '#b2182b', |
| '#d6604d', |
| '#f4a582', |
| '#fddac7', |
| '#f6f6f6', |
| '#d1e5f0', |
| '#92c5de', |
| '#4393c3', |
| '#2166ac', |
| '#053061' |
| ], |
| ); |
| |
| } |
| |
| sub qlist_fixup |
| { |
| my $qlist = shift; |
| |
| my @outi; |
| |
| for my $qnum (@{$qlist}) |
| { |
| if ($qnum =~ m/^\d+$/) |
| { |
| push @outi, $qnum; |
| } |
| else |
| { |
| if ($qnum =~ m/^\d+\-\d+$/) |
| { |
| my $expr = $qnum; |
| $expr =~ s/\-/\.\./; |
| |
| eval "for my \$val ($expr) { push \@outi, \$val; }"; |
| } |
| else |
| { |
| die("Invalid format for querylist: \'$qnum\'\n"); |
| exit(1); |
| } |
| |
| } |
| |
| } |
| |
| return \@outi; |
| |
| } |
| |
| |
| # dump a nice graph listing all of the color schemes (neato is preferred) |
| sub dodumpcolor |
| { |
| my $coltab = shift; |
| my $fh = shift; |
| |
| my @ggg = sort(keys(%{$coltab})); |
| |
| # centered, with lines (not arrows) |
| print $fh "digraph plan1 { graph [center=\"root\",root=\"root\"] ;\n edge [dir=none]\n"; |
| |
| # adjust the lengths to avoid overlap |
| for my $ii (0..(scalar(@ggg)-1)) |
| { |
| print $fh '"root" -> "' . $ii. '"' . " [len=2];\n"; |
| |
| my $jj = 0; |
| |
| for my $cc (@{$coltab->{$ggg[$ii]}}) |
| { |
| print $fh '"' . $ii. '" -> "' . $ii. "." . $jj. '"' |
| . " [len=1];\n"; |
| $jj++; |
| } |
| } |
| |
| print $fh '"root" [label="color schemes"]' . ";\n"; |
| |
| for my $ii (0..(scalar(@ggg)-1)) |
| { |
| print $fh '"' . $ii . '" [label="' . $ggg[$ii] . '"]' . ";\n"; |
| |
| my $jj = 0; |
| |
| for my $cc (@{$coltab->{$ggg[$ii]}}) |
| { |
| print $fh '"' . $ii . "." . $jj . |
| '" [label="", style=filled, ' . |
| 'fillcolor="' . $cc . '"]' . ";\n"; |
| $jj++; |
| } |
| } |
| print $fh "\n}\n"; |
| |
| } |
| |
| |
| BEGIN { |
| my $man = 0; |
| my $help = 0; |
| my $optn = "YAML"; |
| my $dir = "BT"; |
| my $DEFAULT_COLOR = "set28"; |
| my $colorscheme = $DEFAULT_COLOR; |
| my $timeline = ''; |
| my $prune; |
| my $outfile; |
| my $statcol; |
| my $edgescheme; |
| |
| my @qlst; |
| |
| GetOptions( |
| 'help|?' => \$help, man => \$man, |
| "querylist|ql|list:s" => \@qlst, |
| "option|operation=s" => \$optn, |
| "direction:s" => \$dir, |
| "colorscheme:s" => \$colorscheme, |
| "timeline" => \$timeline, |
| "prune:s" => \$prune, |
| "output:s" => \$outfile, |
| "statcolor:s" => \$statcol, |
| "edge:s" => \$edgescheme) |
| or pod2usage(2); |
| |
| |
| pod2usage(-msg => $glob_id, -exitstatus => 1) if $help; |
| pod2usage(-msg => $glob_id, -exitstatus => 0, -verbose => 2) if $man; |
| |
| $glob_optn = $optn; |
| $glob_optn = "jpg" if ($glob_optn =~ m/^jpeg/i); |
| |
| $glob_timeline = $timeline; |
| $glob_prune = $prune; |
| $glob_outi = $outfile; |
| $glob_statcolor = $statcol; |
| |
| $glob_edge = $edgescheme; |
| |
| if ($dir !~ m/^(TB|BT|LR|RL)$/i) |
| { |
| $glob_direction = "BT"; |
| } |
| else |
| { |
| $glob_direction = uc($dir); |
| } |
| |
| $colorscheme = lc($colorscheme); |
| |
| # print "color: $colorscheme\n"; |
| |
| if (exists($glob_coltab{$colorscheme})) |
| { |
| $glob_colorscheme = $colorscheme; |
| } |
| else |
| { |
| if ($colorscheme =~ m/list|dump/i) |
| { |
| use IO::File; |
| use POSIX qw(tmpnam); |
| |
| my ($tmpnam, $tmpfh); |
| |
| for (;;) { |
| $tmpnam = tmpnam(); |
| sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; |
| } |
| |
| # write to a temporary file |
| dodumpcolor(\%glob_coltab, $tmpfh); |
| |
| close $tmpfh; |
| |
| my $catcmd = "cat $tmpnam"; |
| |
| # format with neato if jpg was specified |
| if ($glob_optn =~ m/$GV_formats/i) |
| { |
| my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/neato"; |
| |
| if ($^O !~ m/darwin/) |
| { |
| $dotapp = `which neato`; |
| chomp($dotapp); |
| } |
| if (defined($dotapp) && length($dotapp) && (-e $dotapp)) |
| { |
| $catcmd .= " | $dotapp -T$glob_optn"; |
| } |
| } |
| |
| system($catcmd); |
| |
| unlink $tmpnam; |
| |
| exit(0); |
| } |
| else |
| { |
| my $colorschemelist = join("\n", sort(keys(%glob_coltab))) . "\n"; |
| |
| # identify the default color |
| $colorschemelist =~ |
| s/$DEFAULT_COLOR/$DEFAULT_COLOR \(default\)/gm; |
| |
| print "\nvalid color schemes are:\n"; |
| print $colorschemelist; |
| print "\nUse: \"explain.pl -color dump -opt jpg > graph.jpg\"\n"; |
| print "to construct a JPEG showing all the valid color schemes.\n"; |
| print "\nColors from www.ColorBrewer.org by Cynthia A. Brewer, Geography,\nPennsylvania State University.\n\n"; |
| |
| exit(0); |
| } |
| } |
| |
| @qlst = split(/,/,join(',', @qlst)); |
| |
| $glob_qlist = qlist_fixup(\@qlst); |
| |
| # print "loading...\n" ; |
| } |
| |
| sub analyze_node |
| { |
| my ($node, $parse_ctx) = @_; |
| |
| if (defined($node) && exists($node->{txt})) |
| { |
| |
| # gather analyze statistics if it exists in this node... |
| if ($node->{txt} =~ |
| m/Slice\s+statistics.*(Settings.*)*Total\s+runtime/s) |
| { |
| my $t1 = $node->{txt}; |
| |
| # NOTE: the final statistics look something like this: |
| |
| # Slice statistics: |
| # (slice0) Executor memory: 472K bytes. |
| # (slice1) Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0). |
| # Settings: |
| # Total runtime: 52347.493 ms |
| |
| # (we've actually added some vertical bars so it might look |
| # like this): |
| # || Slice statistics: |
| # || (slice0) Executor memory: 472K bytes. |
| |
| # NB: the "Settings" entry is optional, so |
| # add Settings if they are missing |
| unless ($t1 =~ |
| m/Slice\s+statistics.*Settings.*Total\s+runtime/s) |
| { |
| $t1 =~ |
| s/\n.*\s+Total\s+runtime/\n Settings\: \n Total runtime/; |
| } |
| |
| my @foo = ($t1 =~ m/Slice\s+statistics\:\s+(.*)\s+Settings\:\s+(.*)\s+Total\s+runtime:\s+(.*)\s+ms/s); |
| |
| if (scalar(@foo) == 3) |
| { |
| my $mem = shift @foo; |
| my $sett = shift @foo; |
| my $runt = shift @foo; |
| |
| $mem =~ s/\|\|//gm; # remove '||'... |
| $sett =~ s/\|\|//gm; |
| |
| my $statstuff = {}; |
| |
| my @baz = split(/\n/, $mem); |
| my $sliceh = {}; |
| for my $elt (@baz) |
| { |
| my @ztesch = ($elt =~ m/(slice\d+)/); |
| next unless (scalar(@ztesch)); |
| $elt =~ s/\s*\(slice\d+\)\s*//; |
| my $val = shift @ztesch; |
| $sliceh->{$val} = $elt; |
| } |
| |
| $statstuff->{memory} = $sliceh; |
| $statstuff->{settings} = $sett; |
| $statstuff->{runtime} = $runt; |
| $parse_ctx->{explain_analyze_stats} = $statstuff; |
| $node->{statistics} = $statstuff; |
| } |
| } |
| |
| my @short = $node->{txt} =~ m/\-\>\s*(.*)\s*\(cost\=/; |
| $node->{short} = shift @short; |
| |
| unless(exists($node->{id})) |
| { |
| print Data::Dumper->Dump([$node]), "\n"; |
| } |
| |
| if ($node->{id} == 1) |
| { |
| @short = $node->{txt} =~ m/^\s*\|\s*(.*)\s*\(cost\=/; |
| $node->{short} = shift @short; |
| |
| # handle case where dashed line might have wrapped... |
| unless (defined($node->{short}) && length($node->{short})) |
| { |
| # might not be first line... |
| @short = $node->{txt} =~ m/\s*\|\s*(.*)\s*\(cost\=/; |
| $node->{short} = shift @short; |
| } |
| |
| |
| } |
| |
| # handle case of "cost-free" txt (including a double || |
| # and not first line, or screwed-up parse of short as a single bar |
| # |
| # example: weird initplan like: |
| # || -> InitPlan (slice49) |
| if (defined($node->{short}) && length($node->{short}) |
| && ($node->{short} =~ m/\s*\|\s*/)) |
| { |
| $node->{short} = ""; |
| } |
| |
| unless (defined($node->{short}) && length($node->{short})) |
| { |
| @short = $node->{txt} =~ m/\s*\|(\|)?\s*(\w*)\s*/; |
| $node->{short} = shift @short; |
| |
| if (defined($node->{short}) && length($node->{short}) |
| && ($node->{short} =~ m/\s*\|\s*/)) |
| { |
| $node->{short} = ""; |
| } |
| |
| # last try!! |
| unless (defined($node->{short}) && length($node->{short})) |
| { |
| my $foo = $node->{txt}; |
| $foo =~ s/\-\>//gm; |
| $foo =~ s/\|//gm; |
| $foo =~ s/^\s+//gm; |
| $foo =~ s/\s+$//gm; |
| $node->{short} = $foo; |
| } |
| |
| # print "long: $node->{txt}\n"; |
| # print "short: $node->{short}\n"; |
| } |
| |
| $node->{short} =~ s/\s*$//; |
| |
| # remove quotes which mess up dot file |
| $node->{short} =~ s/\"//gm; |
| |
| # print "long: $node->{txt}\n"; |
| # print "short: $node->{short}\n"; |
| |
| # XXX XXX XXX XXX: FINAL "short" fixups |
| while (defined($node->{short}) && length($node->{short}) |
| && ($node->{short} =~ m/(\n)|^\s+|\s+$|(\(cost\=)/m)) |
| { |
| # remove leading and trailing spaces... |
| $node->{short} =~ s/^\s*//; |
| $node->{short} =~ s/\s*$//; |
| |
| # remove newlines |
| $node->{short} =~ s/(\n).*//gm; |
| |
| # remove cost=... |
| $node->{short} =~ s/\(cost\=.*//gm; |
| |
| # print "short fixup: $node->{short}\n\n\n"; |
| } |
| |
| { |
| if ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*end)/i) |
| { |
| |
| my @ggg = |
| ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*end)/i); |
| |
| # print join('*', @ggg), "\n"; |
| |
| my $tt = $ggg[0]; |
| |
| $node->{to_end} = $tt; |
| |
| $parse_ctx->{alltimes}->{$tt} = 1; |
| |
| if (exists($parse_ctx->{h_to_end}->{$tt})) |
| { |
| push @{$parse_ctx->{h_to_end}->{$tt}}, '"'. $node->{id} .'"'; |
| } |
| else |
| { |
| $parse_ctx->{h_to_end}->{$tt} = ['"'. $node->{id} . '"']; |
| } |
| |
| |
| |
| } |
| if ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*first\s*row)/i) |
| { |
| |
| my @ggg = |
| ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*first\s*row)/i); |
| |
| # print join('*', @ggg), "\n"; |
| |
| my $tt = $ggg[0]; |
| |
| $node->{to_first} = $tt; |
| |
| $parse_ctx->{alltimes}->{$tt} = 1; |
| |
| if (exists($parse_ctx->{h_to_first}->{$tt})) |
| { |
| push @{$parse_ctx->{h_to_first}->{$tt}}, '"' . $node->{id} . '"' ; |
| } |
| else |
| { |
| $parse_ctx->{h_to_first}->{$tt} = [ '"' . $node->{id} . '"']; |
| } |
| |
| } |
| |
| if ($node->{txt} =~ m/start offset by (\d+(\.\d*)?)(\s*ms)/i) |
| { |
| |
| my @ggg = |
| ($node->{txt} =~ m/start offset by (\d+(\.\d*)?)(\s*ms)/i); |
| |
| # print join('*', @ggg), "\n"; |
| |
| my $tt = $ggg[0]; |
| |
| $node->{to_startoff} = $tt; |
| |
| $parse_ctx->{allstarttimes}->{$tt} = 1; |
| |
| if (exists($parse_ctx->{h_to_startoff}->{$tt})) |
| { |
| push @{$parse_ctx->{h_to_startoff}->{$tt}}, '"'. $node->{id} .'"'; |
| } |
| else |
| { |
| $parse_ctx->{h_to_startoff}->{$tt} = ['"'. $node->{id} . '"']; |
| } |
| } |
| |
| if (exists($node->{to_end})) |
| { |
| $node->{total_time} = |
| (exists($node->{to_first})) ? |
| ($node->{to_end} - $node->{to_first}) : |
| $node->{to_end}; |
| } |
| |
| |
| } |
| |
| if (1) |
| { |
| if (exists($node->{child})) |
| { |
| delete $node->{child} |
| unless (defined($node->{child}) |
| && scalar(@{$node->{child}})); |
| } |
| } |
| |
| |
| } |
| |
| } |
| |
| sub parse_node |
| { |
| |
| my ($ref_id, $parse_ctx, $depth, $plan_rows, $parent) = @_; |
| |
| # print "depth: $depth\n"; |
| # print "row: ",$plan_rows->[0],"\n" if (scalar(@{$plan_rows})); |
| |
| # print "first: $first\n" if defined ($first); |
| |
| my $spclen = undef; |
| my $node = undef; |
| |
| my $no_more_text = 0; |
| |
| while (scalar(@{$plan_rows})) |
| { |
| my $row = $plan_rows->[0]; |
| |
| unless (defined($node)) |
| { |
| $node = {}; |
| |
| $node->{child} = []; |
| |
| $node->{txt} = ""; |
| |
| $node->{parent} = $parent |
| if (defined($parent)); |
| |
| my $id = $$ref_id; |
| $id++; |
| $$ref_id= $id; |
| $node->{id} = $id; |
| } |
| |
| # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX |
| # make initplan into a fake node so the graphs look nicer (eg |
| # tpch query 15). Prefix it with an arrow and add a fake cost. |
| if ($row =~ m/\|(\s)*InitPlan(.*)slice/) |
| { |
| $row =~ s/InitPlan/\-\> InitPlan/; |
| if ($row !~ m/\(cost=/) |
| { |
| $row =~ s/\|$/\(cost=\?\)|/; |
| } |
| } |
| |
| if ($row !~ m/\|(\s)*\-\>/) |
| { |
| # add text to existing node |
| |
| if ($no_more_text) |
| { |
| print "error: no more text for ". $node->{id}, "\n"; |
| } |
| |
| $node->{txt} .= "\n" . $row; |
| |
| # print "txt: $node->{txt}\n"; |
| |
| shift @{$plan_rows}; |
| next; |
| } |
| else |
| { |
| # new node |
| unless ($no_more_text) |
| { |
| unless (length($node->{txt})) |
| { |
| $node->{txt} .= $row; |
| shift @{$plan_rows}; |
| next; |
| } |
| } |
| |
| # match the leading spaces before the '->', eg: |
| # "| -> Sort (cost=5990545.19..599..." |
| |
| my @spc = ($row =~ m/\|(\s*)\-\>/); |
| |
| # print "space match:", Data::Dumper->Dump(\@spc), "\n"; |
| |
| $spclen = scalar(@spc) ? length($spc[0]) : 0; |
| |
| # print "space len: $spclen, depth: $depth\n"; |
| |
| if ($spclen > $depth) |
| { |
| # found a child |
| push @{$node->{child}}, parse_node($ref_id, $parse_ctx, |
| $spclen, $plan_rows, |
| $node->{id}); |
| } |
| |
| } |
| |
| if (defined($spclen)) |
| { |
| if ($spclen <= $depth) |
| { # found a sibling or parent |
| # need to put the row back on the head of the list |
| |
| if (defined($node) && exists($node->{txt})) |
| { |
| analyze_node($node, $parse_ctx); |
| |
| return $node; |
| } |
| } |
| } |
| else |
| { |
| die ("what the heck?"); |
| } |
| |
| $spclen = undef; |
| $no_more_text = 1; |
| |
| } # end while |
| |
| if (defined($node)) |
| { |
| analyze_node($node, $parse_ctx); |
| } |
| |
| return $node; |
| |
| } |
| |
| |
| if (1) |
| { |
| |
| my @bigarr; |
| |
| my $state = "INIT"; |
| |
| my $pair = undef; |
| |
| my ($query, $plan); |
| |
| my $tpch_format=0; |
| my $bigdash = '-' x 40; # make big dash smaller |
| |
| my $magic; |
| |
| for (<>) |
| { |
| my $ini = $_; |
| |
| if ($state =~ m/INIT/) |
| { |
| if ($ini !~ m/(^EXPLAIN ANALYZE)|(QUERY PLAN)/) |
| { |
| next; |
| } |
| |
| $query = ""; |
| $plan = ""; |
| $pair = {}; |
| |
| if ($ini =~ m/^EXPLAIN ANALYZE/) |
| { |
| $tpch_format = 1; |
| $state = "GETQUERY"; |
| next; |
| } |
| |
| if ($ini =~ m/QUERY PLAN/) |
| { |
| $tpch_format = 0; |
| $plan = ""; |
| $state = "GETPLAN"; |
| next; |
| } |
| |
| } |
| |
| if ($state !~ m/GETPLAN/) |
| { |
| # should be START or GETQUERY only... |
| if ($tpch_format) |
| { |
| if ($ini =~ m/^EXPLAIN ANALYZE/) |
| { |
| if (defined($pair)) |
| { |
| $pair->{plan} = $plan; |
| $pair->{query} = $query; |
| push @bigarr, $pair; |
| } |
| $pair = {}; |
| $query = ""; |
| $plan = ""; |
| $state = "GETQUERY"; |
| next; |
| } |
| } |
| else |
| { |
| # not tpch analyze |
| if ($ini =~ m/QUERY PLAN/) |
| { |
| if (defined($pair)) |
| { |
| $pair->{plan} = $plan; |
| $pair->{query} = $query; |
| push @bigarr, $pair; |
| } |
| $pair = {}; |
| $query = ""; |
| $plan = ""; |
| $state = "GETPLAN"; |
| next; |
| } |
| |
| } |
| if ($state =~ m/GETQUERY/) |
| { |
| if ($ini =~ m/QUERY PLAN/) |
| { |
| if (!($tpch_format)) |
| { |
| if (defined($pair)) |
| { |
| $pair->{plan} = $plan; |
| $pair->{query} = $query; |
| push @bigarr, $pair; |
| } |
| $pair = {}; |
| $query = ""; |
| } |
| |
| $plan = ""; |
| $state = "GETPLAN"; |
| next; |
| } |
| |
| $query .= $ini; |
| } |
| |
| } # end not getplan |
| |
| if ($state =~ m/GETPLAN/) |
| { |
| |
| if ($tpch_format) |
| { |
| if ($ini !~ m/\|(.*)\|/) |
| { |
| $state = "START"; |
| next; |
| } |
| } |
| else |
| { |
| if ($ini =~ m/(\(\d+\s+rows\))|(Time\s+was.*seconds\.\s+Query\s+ended)/) |
| { |
| $state = "START"; |
| next; |
| } |
| } |
| # a bit weird here -- just ignore the separator. But |
| # maybe we should invest some effort to determine that the |
| # separator is the next line after the header (and only |
| # ignore it once) ? |
| next |
| if ($ini =~ m/$bigdash/); |
| |
| # add the missing bars |
| if (!($tpch_format)) |
| { |
| if ($ini !~ m/\|(.*)\|/) |
| { |
| $ini = '|' . $ini . '|'; |
| } |
| } |
| |
| $plan .= $ini; |
| } |
| |
| } # end big for |
| if (defined($pair)) |
| { |
| $pair->{plan} = $plan; |
| $pair->{query} = $query; |
| push @bigarr, $pair; |
| } |
| |
| #print scalar(@bigarr), "\n"; |
| |
| |
| #print Data::Dumper->Dump(\@bigarr); |
| |
| #print $bigarr[0]->{plan}; |
| |
| unless(scalar(@{$glob_qlist})) |
| { |
| # build a 1-based list of queries |
| for (my $ii =1; $ii <= scalar(@bigarr); $ii++) |
| { |
| push @{$glob_qlist}, $ii; |
| } |
| } |
| |
| my $realSTDOUT; |
| |
| for my $qqq (@{$glob_qlist}) |
| { |
| my $qnum = $qqq - 1; # 0 based vs 1 based |
| |
| if ($qnum > scalar(@bigarr)) |
| { |
| warn("specified query $qqq is out-of-range -- skipping...\n"); |
| next; |
| } |
| |
| if ($glob_optn =~ m/query|text|txt/i) |
| { |
| doquery($bigarr[$qnum]->{query}); |
| next; |
| } |
| |
| my $plantxt = $bigarr[$qnum]->{plan}; |
| |
| unless (defined($plantxt) && length($plantxt)) |
| { |
| warn("invalid plan for query $qqq -- skipping...\n"); |
| next; |
| } |
| |
| #print $plantxt, "\n"; |
| |
| my @plan_r = split(/\n/, $plantxt); |
| |
| my $pr = \@plan_r; |
| |
| my $parse_ctx = {}; |
| |
| my $id = 0; |
| |
| $parse_ctx->{alltimes} = {}; |
| $parse_ctx->{h_to_end} = {}; |
| $parse_ctx->{h_to_first} = {}; |
| |
| $parse_ctx->{allstarttimes} = {}; |
| $parse_ctx->{h_to_startoff} = {}; |
| $parse_ctx->{explain_analyze_stats} = {}; |
| |
| my $plantree = parse_node(\$id, $parse_ctx, 0, $pr); |
| |
| # my @timelist = sort {$a <=> $b} keys (%{$parse_ctx->{alltimes}}); |
| my @timelist = sort {$a <=> $b} keys (%{$parse_ctx->{allstarttimes}}); |
| |
| |
| if (defined($glob_prune)) |
| { |
| if ($glob_prune =~ m/stat|heavy|heavily/i) |
| { |
| my $map_expr = 'delete $node->{to_end};'; |
| treeMap($plantree, undef, $map_expr); |
| $map_expr = 'delete $node->{to_first};'; |
| treeMap($plantree, undef, $map_expr); |
| |
| # additional statistics |
| $map_expr = 'delete $node->{to_startoff};'; |
| treeMap($plantree, undef, $map_expr); |
| $map_expr = 'delete $node->{total_time};'; |
| treeMap($plantree, undef, $map_expr); |
| $map_expr = 'delete $node->{statistics};'; |
| treeMap($plantree, undef, $map_expr); |
| } |
| if ($glob_prune =~ m/heavy|heavily/i) |
| { |
| treeMap($plantree, 'prune_heavily($node);'); |
| } |
| } |
| |
| |
| # magic mode : display everything magically |
| # |
| # NOTE: only set to magic on the first iteration, then reset |
| # to jpg, so performs correctly with multiple queries |
| if ($glob_optn =~ m/magic/i) |
| { |
| $glob_optn = "jpg"; |
| |
| use IO::File; |
| use POSIX qw(tmpnam); |
| |
| my $tmpnam; |
| |
| for (;;) { |
| my $tmpfh; |
| |
| $tmpnam = tmpnam(); |
| sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; |
| } |
| |
| # create a temporary directory name -- just append ".dir" |
| # to the new tempfile name and mkdir |
| my $tmpdir = $tmpnam . ".dir"; |
| |
| mkdir($tmpdir) or die "magic failed" ; |
| |
| unlink $tmpnam; # we didn't need this tempfile anyhow |
| |
| # reset output file name to create files in the new |
| # temporary directory |
| $glob_outi = File::Spec->catfile($tmpdir, "query_"); |
| |
| $magic = $glob_outi; |
| } |
| |
| if ($glob_outi) |
| { |
| unless (defined($realSTDOUT)) |
| { |
| open $realSTDOUT, ">&STDOUT" or die "Can't dup STDOUT: $!"; |
| } |
| |
| my $outfilename = $glob_outi; |
| |
| # only need numbering if processed more than one query |
| my $neednum = (scalar(@bigarr) > 1); |
| |
| # check if name has an extension like ".foo" |
| if ($outfilename =~ m/\.(.){1,5}$/) |
| { |
| # qqq is query num (1 based) |
| |
| my $formatq = sprintf("%03d", $qqq); |
| |
| $outfilename =~ s/\.(.*)$/$formatq\.$1/ |
| if ($neednum); |
| } |
| else |
| { |
| # qqq is query num (1 based) |
| my $formatq = sprintf("%03d", $qqq); |
| |
| $outfilename .= $formatq |
| if ($neednum); |
| if ($glob_optn =~ m/yaml/i) |
| { |
| $outfilename .= ".yml"; |
| } |
| if ($glob_optn =~ m/json/i) |
| { |
| $outfilename .= ".json"; |
| } |
| if ($glob_optn =~ m/perl|dump/i) |
| { |
| $outfilename .= ".perl"; |
| } |
| if ($glob_optn =~ m/dot|graph/i) |
| { |
| $outfilename .= ".dot"; |
| } |
| if ($glob_optn =~ m/$GV_formats/i) |
| { |
| $outfilename .= ".$glob_optn"; |
| } |
| } |
| |
| close STDOUT; |
| |
| open (STDOUT, ">$outfilename" ) or die "can't open STDOUT: $!"; |
| |
| # print $outfilename, "\n"; |
| |
| } |
| |
| |
| if ($glob_optn =~ m/yaml/i) |
| { |
| doyaml($plantree); |
| } |
| if ($glob_optn =~ m/json/i) |
| { |
| doyaml($plantree, "json"); |
| } |
| if ($glob_optn =~ m/perl|dump/i) |
| { |
| doDataDump($plantree); |
| } |
| if ($glob_optn =~ m/dot|graph/i) |
| { |
| dodotfile($plantree, \@timelist, $qqq, $parse_ctx, |
| $glob_direction); |
| } |
| if ($glob_optn =~ m/operator/i) |
| { |
| doOperatorDump($plantree); |
| } |
| |
| if ($glob_optn =~ m/$GV_formats/i) |
| { |
| my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/dot"; |
| |
| if ($^O !~ m/darwin/) |
| { |
| $dotapp = `which dot`; |
| chomp($dotapp); |
| } |
| die "could not find dot app: $dotapp" |
| unless (defined($dotapp) && length($dotapp) && (-e $dotapp)); |
| |
| # should have been able to redirect STDOUT thru a pipe |
| # directly to dotapp, but didn't work. Use a tmpfile |
| # instead. |
| |
| use IO::File; |
| use POSIX qw(tmpnam); |
| |
| my $tmpnam; |
| |
| for (;;) { |
| my $tmpfh; |
| |
| $tmpnam = tmpnam(); |
| sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last; |
| } |
| open my $oldout, ">&STDOUT" or die "Can't dup STDOUT: $!"; |
| |
| close STDOUT; |
| open (STDOUT, ">$tmpnam" ) or die "can't open STDOUT: $!"; |
| |
| select STDOUT; $| = 1; # make unbuffered |
| |
| dodotfile($plantree, \@timelist, $qqq, $parse_ctx, |
| $glob_direction); |
| |
| close STDOUT; |
| open STDOUT, ">&", $oldout or die "Can't dup \$oldout: $!"; |
| |
| system("cat $tmpnam | $dotapp -T$glob_optn"); |
| |
| unlink $tmpnam; |
| |
| |
| } |
| } #end for querynum |
| |
| if (defined($realSTDOUT)) |
| { |
| close STDOUT; |
| open STDOUT, ">&", $realSTDOUT or die "Can't dup \$oldout: $!"; |
| |
| } |
| |
| # magically display all files |
| if (defined($magic)) |
| { |
| # only need numbering if processed more than one query |
| my $neednum = (scalar(@{$glob_qlist}) > 1); |
| |
| if ($^O =~ m/darwin/) |
| { |
| # use ImageMagick montage |
| my $ggg = $magic . '*'; |
| my $montage = `which montage`; |
| chomp($montage); |
| |
| # only perform a montage if more than one query |
| if ($neednum && defined($montage) && ( -e $montage)) |
| { |
| my $dir = $magic; |
| # get the directory name (remove "query_" prefix) |
| $dir =~ s/query_$//; |
| system("cd $dir; montage -label \'%f\' $ggg -title \"$dir\n`date`\" -shadow INDEX.html; open INDEX.html"); |
| } |
| else |
| { |
| system("open $ggg"); |
| } |
| } |
| } |
| |
| } |
| #print "\nmax id: $id\n\n"; |
| |
| # |
| |
| sub treeMap |
| { |
| my ($node, $pre_map, $post_map, $ctx) = @_; |
| |
| eval "$pre_map" |
| if (defined($pre_map)); |
| |
| if (exists($node->{child})) |
| { |
| for my $kid (@{$node->{child}}) |
| { |
| treeMap($kid, $pre_map, $post_map, $ctx); |
| } |
| } |
| eval "$post_map" |
| if (defined($post_map)); |
| } |
| |
| sub doDataDump |
| { |
| my $plantree = shift; |
| |
| |
| local $Data::Dumper::Indent = 1; |
| local $Data::Dumper::Terse = 1; |
| local $Data::Dumper::Sortkeys = 1; |
| |
| my $map_expr = 'delete $node->{txt};'; |
| # my $map_expr = 'print "foo\n"'; |
| treeMap($plantree, undef, $map_expr); |
| |
| print Data::Dumper->Dump([$plantree]); |
| } |
| |
| sub doOperatorDump |
| { |
| my $plantree = shift; |
| |
| print $plantree->{short}, "\n" if (exists($plantree->{short})); |
| |
| return |
| unless (exists($plantree->{child})); |
| |
| for my $kid (@{$plantree->{child}}) |
| { |
| doOperatorDump($kid); |
| } |
| } |
| |
| # add slice info to node |
| # and gather explain analyze stats |
| sub addslice |
| { |
| my ($node, $ctx) = @_; |
| |
| # AUTO-6: find nodes with "(slice1)" info where the slice numbers aren't |
| # part of the "Slice statistics" |
| |
| my $txt1 = $node->{txt}; |
| $txt1 =~ s/Slice statistics.*//gs; |
| |
| if ($txt1 =~ /(slice(\d+))/) |
| { |
| my @ggg = ($txt1 =~ m/(slice(\d+))/) ; |
| $node->{slice} = shift @ggg; |
| |
| # check if we have explain analyze stats for the slice |
| if (exists($ctx->{explain_analyze_stats}) |
| && exists($ctx->{explain_analyze_stats}->{memory}) |
| && exists($ctx->{explain_analyze_stats}->{memory}->{$node->{slice}})) |
| { |
| $node->{memory} = |
| $ctx->{explain_analyze_stats}->{memory}->{$node->{slice}}; |
| } |
| |
| } |
| } |
| |
| |
| sub doquery |
| { |
| my $qtxt = shift; |
| |
| print $qtxt, "\n"; |
| |
| } |
| |
| sub doyaml |
| { |
| my ($plantree, $opti) = @_; |
| |
| $opti = "yaml" unless (defined($opti)); |
| |
| if ($opti =~ m/json/i) |
| { |
| # JSON might not be installed, so test for it. |
| |
| if (eval "require JSON") |
| { |
| my $map_expr = 'delete $node->{txt};'; |
| |
| treeMap($plantree, undef, $map_expr); |
| |
| # because JSON is REQUIREd, not USEd, the symbols are not |
| # imported into the environment. |
| print JSON::objToJson($plantree, {pretty => 1, indent => 2}); |
| } |
| else |
| { |
| die("Fatal Error: The required package JSON is not installed -- please download it from www.cpan.org\n"); |
| exit(1); |
| } |
| |
| } |
| else |
| { |
| # YAML might not be installed, so test for it. |
| |
| if (eval "require YAML") |
| { |
| my $map_expr = 'delete $node->{txt};'; |
| |
| treeMap($plantree, undef, $map_expr); |
| |
| # because YAML is REQUIREd, not USEd, the symbols are not |
| # imported into the environment. |
| print YAML::Dump($plantree); |
| } |
| else |
| { |
| die("Fatal Error: The required package YAML is not installed -- please download it from www.cpan.org\n"); |
| exit(1); |
| } |
| |
| } |
| |
| } |
| |
| # remove slice numbering information to construct even more generic plans |
| sub prune_heavily |
| { |
| my $node = shift; |
| |
| return |
| unless (exists($node->{short})); |
| |
| if ($node->{short} =~ m/Delete\s*\(slice.*segment.*\)\s*\(row.*width.*\)/) |
| { |
| # QA-1309: fix strange DELETE operator formatting |
| $node->{short} = "Delete"; |
| } |
| elsif ($node->{short} =~ m/Update\s*\(slice.*segment.*\)\s*\(row.*width.*\)/) |
| { |
| # QA-1309: fix strange UPDATE operator formatting |
| $node->{short} = "Update"; |
| } |
| elsif ($node->{short} =~ m/\d+\:\d+/) |
| { |
| |
| # example: Gather Motion 8:1 (slice4); |
| |
| # strip the number of nodes and slice information |
| $node->{short} =~ s/\s+\d+\:\d+.*//; |
| |
| # Note: don't worry about removing "(slice1)" info from the |
| # "short" because addslice processes node->{text} |
| } |
| } |
| |
| # identify the slice for each node |
| # and find Shared Scan "Primary" |
| # and find MultiSliceMotion |
| sub pre_slice |
| { |
| my ($node, $ctx) = @_; |
| |
| { |
| if (scalar(@{$ctx->{a1}})) |
| { |
| my $parent = $ctx->{a1}->[-1]; |
| |
| unless (exists($node->{slice})) |
| { |
| if (exists($parent->{slice})) |
| { |
| $node->{slice} = $parent->{slice}; |
| } |
| } |
| } |
| |
| # olap stuff |
| |
| if ($node->{short} =~ m/^Shared Scan/) |
| { |
| # if the Shared Scan has a child it is the "primary" |
| if (exists($node->{child})) |
| { |
| my $share_short_fixup = $node->{short}; |
| |
| # remove the slice number from the "short" |
| $share_short_fixup =~ s/(\d+)\:/\:/g; |
| |
| if (!exists($ctx->{share_input_h}->{$share_short_fixup})) |
| { |
| $ctx->{share_input_h}->{$share_short_fixup} = $node; |
| } |
| } |
| else # not the primary, mark as a duplicate node |
| { |
| $node->{SharedScanDuplicate} = 1; |
| } |
| } |
| if ($node->{short} =~ m/^Multi Slice Motion/) |
| { |
| # choose first Multi Slice Motion node as primary |
| if (!exists($ctx->{multi_slice_h}->{$node->{short}})) |
| { |
| $ctx->{multi_slice_h}->{$node->{short}} = $node; |
| } |
| else # not the primary, mark as a duplicate node |
| { |
| $node->{MultiSliceMotionDuplicate} = 1; |
| } |
| } |
| |
| if (exists($node->{total_time})) |
| { |
| my $tt = $node->{total_time}; |
| my $tt2 = $tt * $tt; |
| $ctx->{time_stats_h}->{cnt} += 1; |
| $ctx->{time_stats_h}->{sum} += $tt; |
| $ctx->{time_stats_h}->{sumsq} += $tt2; |
| |
| if (exists($ctx->{time_stats_h}->{tt_h}->{$tt})) |
| { |
| push @{$ctx->{time_stats_h}->{tt_h}->{$tt}}, $node; |
| } |
| else |
| { |
| $ctx->{time_stats_h}->{tt_h}->{$tt} = [$node]; |
| } |
| } |
| |
| } |
| |
| push @{$ctx->{a1}}, $node; |
| |
| } |
| |
| sub post_slice |
| { |
| my ($node, $ctx) = @_; |
| |
| pop @{$ctx->{a1}}; |
| |
| } |
| |
| # make all duplicate sharedscan nodes point back to primary |
| sub sharedscan_fixup |
| { |
| my ($node, $ctx) = @_; |
| |
| if (exists($node->{SharedScanDuplicate})) |
| { |
| my $share_short_fixup = $node->{short}; |
| |
| # remove the slice number from the "short" |
| $share_short_fixup =~ s/(\d+)\:/\:/g; |
| |
| $node->{SharedScanDuplicate} = |
| $ctx->{share_input_h}->{$share_short_fixup}; |
| # $node->{id} = |
| # $node->{SharedScanDuplicate}->{id}; |
| } |
| |
| if (exists($node->{MultiSliceMotionDuplicate})) |
| { |
| $node->{MultiSliceMotionDuplicate} = |
| $ctx->{multi_slice_h}->{$node->{short}}; |
| # XXX XXX: for this case the node is really the same |
| $node->{id} = |
| $node->{MultiSliceMotionDuplicate}->{id}; |
| } |
| |
| } |
| |
| sub human_num |
| { |
| my $esti = shift; |
| |
| my @suffix = qw(K M G T P E Z Y); |
| my $suff = ""; |
| |
| # try to shorten estimate specification |
| while (length(POSIX::ceil($esti)) > 3) |
| { |
| $suff = shift @suffix; |
| |
| $esti = $esti/1000; |
| } |
| |
| if (length($suff)) |
| { |
| $esti *= 100; |
| $esti = POSIX::floor($esti+0.5); |
| $esti = $esti/100; |
| |
| $esti .= $suff; |
| } |
| |
| return $esti; |
| } |
| |
| # label left and right for nest loops |
| sub nestedloop_fixup |
| { |
| my ($node, $ctx) = @_; |
| |
| return |
| unless (exists($node->{short}) && |
| ($node->{short} =~ m/Nested Loop/)); |
| |
| my @kidlist; |
| |
| if (exists($node->{child})) |
| { |
| for my $kid (@{$node->{child}}) |
| { |
| push @kidlist, $kid; |
| } |
| } |
| |
| return |
| unless (2 == scalar(@kidlist)); |
| |
| if ($kidlist[0]->{id} < $kidlist[1]->{id}) |
| { |
| $kidlist[0]->{nested_loop_position} = "left"; |
| $kidlist[1]->{nested_loop_position} = "right"; |
| } |
| else |
| { |
| $kidlist[1]->{nested_loop_position} = "left"; |
| $kidlist[0]->{nested_loop_position} = "right"; |
| } |
| |
| } |
| |
| # find rows out information |
| sub get_rows_out |
| { |
| my ($node, $ctx, $edge) = @_; |
| |
| return |
| unless ($node->{txt} =~ m/(Rows out\:)|(\(cost\=.*\s+rows=.*\s+width\=.*\))/); |
| |
| my $long = ($edge =~ m/long|med/i); |
| |
| if ($node->{txt} =~ m/Rows out\:\s+Avg.*\s+rows\s+x\s+.*\s+workers/) |
| { |
| if (!$long) |
| { |
| # short result |
| my @foo = |
| ($node->{txt} =~ |
| m/Rows out\:\s+Avg\s+(.*)\s+rows\s+x\s+(.*)\s+workers/); |
| |
| goto L_get_est unless (2 == scalar(@foo)); |
| |
| # calculate row count as avg x num workers |
| $node->{rows_out} = $foo[0] * $foo[1]; |
| } |
| else |
| { |
| my @foo = |
| ($node->{txt} =~ |
| m/Rows out\:\s+(Avg.*workers)/); |
| |
| goto L_get_est unless (1 == scalar(@foo)); |
| |
| # just print the string |
| $node->{rows_out} = $foo[0]; |
| |
| if ($edge =~ m/med/i) |
| { |
| $node->{rows_out} =~ s/Avg\s+//; |
| $node->{rows_out} =~ s/rows\s+//; |
| $node->{rows_out} =~ s/\s*workers\s*//; |
| } |
| |
| } |
| } |
| elsif ($node->{txt} =~ m/Rows out\:\s+.*\s+rows/) |
| { |
| my @foo = |
| ($node->{txt} =~ |
| m/Rows out\:\s+(.*)\s+rows/); |
| |
| goto L_get_est unless (1 == scalar(@foo)); |
| |
| $node->{rows_out} = $foo[0]; |
| } |
| |
| if ( |
| exists($node->{rows_out}) && |
| length($node->{rows_out}) |
| ) |
| { |
| if ( |
| ($node->{rows_out} !~ m/avg/i) && |
| ($node->{rows_out} =~ m/x/)) |
| { |
| my @foo = ($node->{rows_out} =~ m/(x.*)$/); |
| |
| my $tail = $foo[0]; |
| |
| @foo = ($node->{rows_out} =~ m/(.*)\s+x.*/); |
| |
| my $head = $foo[0]; |
| |
| if (defined($tail) && defined($head)) |
| { |
| $head = human_num($head); |
| |
| $node->{rows_out} = $head . " " . $tail; |
| } |
| |
| } |
| elsif ($node->{rows_out} =~ m/^\d+$/) |
| { |
| $node->{rows_out} = human_num($node->{rows_out}); |
| } |
| } |
| |
| L_get_est: |
| |
| # add row estimates |
| if ($long && |
| ($node->{txt} =~ m/\(cost\=.*\s+rows=.*\s+width\=.*\)/)) |
| { |
| my @foo = ($node->{txt} =~ m/cost\=.*\s+rows=(\d+)\s+width\=.*/); |
| |
| if (scalar(@foo)) |
| { |
| use POSIX; |
| |
| my $esti = $foo[0]; |
| |
| $esti = human_num($esti); |
| |
| unless (exists($node->{rows_out}) && |
| length($node->{rows_out})) |
| { |
| $node->{rows_out} = ""; |
| } |
| |
| $node->{rows_out} .= " (est $esti)"; |
| } |
| } |
| |
| } # end get_rows_out |
| |
| sub calc_color_rank |
| { |
| my $ctx = shift; |
| |
| return |
| unless (defined($glob_statcolor)); |
| |
| if ($ctx->{time_stats_h}->{cnt} > 1) |
| { |
| # population variance = |
| # (sum of the squares)/n - (square of the sums)/n*n |
| my $sum = $ctx->{time_stats_h}->{sum}; |
| my $sumsq = $ctx->{time_stats_h}->{sumsq}; |
| my $enn = $ctx->{time_stats_h}->{cnt}; |
| |
| my $pop_var = ($sumsq/$enn) - (($sum*$sum)/($enn*$enn)); |
| my $std_dev = sqrt($pop_var); |
| my $mean = $sum/$enn; |
| my $half = $std_dev/2; |
| |
| # calculate a stanine (9 buckets, each 1/2 of stddev). The |
| # middle bucket (5, which is 4 if we start at zero) is |
| # centered on the mean, so it starts on mean - (1/4 stddev), |
| # and ends at mean + (1/4 stddev). |
| my @bucket; |
| my $buckstart = ($mean-($half/2))-(3*$half); |
| |
| push @bucket, 0; |
| |
| for my $ii (1..7) |
| { |
| push @bucket, $buckstart; |
| $buckstart += $half; |
| } |
| push @bucket, 2**40; # "infinity" |
| |
| my @tlist = sort {$a <=> $b} (keys %{$ctx->{time_stats_h}->{tt_h}}); |
| |
| # must have at least two |
| my $firstt = shift @tlist; |
| my $lastt = pop @tlist; |
| # print "f,l: $firstt, $lastt\n"; |
| |
| for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$firstt}}) |
| { |
| # print "first ", $nod->{id}, ": ", $nod->{short}, " - ", 0, "\n"; |
| $nod->{color_rank} = 10; |
| } |
| for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$lastt}}) |
| { |
| # print "last ", $nod->{id}, ": ", $nod->{short}, " - ", 10, "\n"; |
| $nod->{color_rank} = 1; |
| } |
| |
| # print "bucket: ", Data::Dumper->Dump(\@bucket); |
| # print "tlist: ", Data::Dumper->Dump(\@tlist); |
| # print Data::Dumper->Dump([$ctx->{time_stats_h}]); |
| |
| my $bucknum = 1; |
| for my $tt (@tlist) |
| { |
| # print "tt: $tt\n"; |
| # print "bk: $bucket[$bucknum]\n"; |
| |
| while ($tt > $bucket[$bucknum]) |
| { |
| # print "$tt > $bucket[$bucknum]\n"; |
| # last if ($bucknum >= 11); |
| $bucknum++; |
| } |
| for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$tt}}) |
| { |
| # print "node ", $nod->{id}, ": ", $nod->{short}, " - ", $bucknum, "\n"; |
| # $nod->{color_rank} = ($bucknum-1); |
| $nod->{color_rank} = (10 - $bucknum); |
| } |
| } |
| |
| } |
| |
| } |
| |
| sub dodotfile |
| { |
| my ($plantree, $time_list, $plan_num, $parse_ctx, $direction) = @_; |
| |
| { |
| my $map_expr = 'addslice($node, $ctx); '; |
| |
| treeMap($plantree, $map_expr, undef, $parse_ctx); |
| } |
| |
| |
| # $map_expr = 'propslice($node, $ctx);'; |
| my $ctx = {level => 0, a1 => [], |
| share_input_h => {}, multi_slice_h => {}, |
| time_stats_h => { cnt=>0, sum=>0, sumsq=>0, tt_h => {} } }; |
| |
| # my $map_expr = 'print "foo\n"'; |
| treeMap($plantree, |
| 'pre_slice($node, $ctx); ', |
| 'post_slice($node, $ctx); ', |
| $ctx); |
| |
| calc_color_rank($ctx); |
| |
| treeMap($plantree, |
| 'sharedscan_fixup($node, $ctx); ', |
| undef, |
| $ctx); |
| |
| # always label the left/right sides of nested loop |
| treeMap($plantree, |
| 'nestedloop_fixup($node, $ctx); ', |
| undef, |
| $ctx); |
| |
| if (defined($glob_edge) && length($glob_edge)) |
| { |
| treeMap($plantree, |
| 'get_rows_out($node, $ctx, $glob_edge); ', |
| undef, |
| $ctx); |
| } |
| |
| my $dotimeline = $glob_timeline; |
| |
| makedotfile($plantree, $time_list, $dotimeline, $plan_num, $parse_ctx, |
| $direction); |
| } |
| |
| |
| sub dotkid |
| { |
| my $node = shift; |
| |
| # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes |
| return |
| if (exists($node->{MultiSliceMotionDuplicate})); |
| |
| # XXX XXX: olap fixup - have children of primary sharedscan |
| # point to this node |
| if (exists($node->{SharedScanDuplicate})) |
| { |
| for my $kid (@{$node->{SharedScanDuplicate}->{child}}) |
| { |
| print '"' . $kid->{id} . '" -> "' . $node->{id} . '"' . ";\n"; |
| } |
| } |
| |
| my $docrunch = 2; |
| |
| if (exists($node->{child})) |
| { |
| if (($docrunch != 0 ) && (scalar(@{$node->{child}} > 10))) |
| { |
| my $maxi = scalar(@{$node->{child}}); |
| |
| $maxi -= 2; |
| |
| for my $ii (2..$maxi) |
| { |
| $node->{child}->[$ii]->{crunchme} = 1; |
| } |
| |
| if ($docrunch == 2) |
| { |
| splice(@{$node->{child}}, 3, ($maxi-2)); |
| |
| $node->{child}->[2]->{short} = "... removed " . ($maxi - 3) . " nodes ..."; |
| } |
| |
| |
| } |
| |
| for my $kid (@{$node->{child}}) |
| { |
| my $edge_label = ""; |
| |
| print '"' . $kid->{id} . '" -> "' . $node->{id} . '"'; |
| |
| if (exists($kid->{nested_loop_position})) |
| { |
| $edge_label .= $kid->{nested_loop_position}; |
| } |
| |
| if (exists($kid->{rows_out})) |
| { |
| $edge_label .= " "; |
| $edge_label .= " " |
| if (length($edge_label)); |
| $edge_label .= $kid->{rows_out}; |
| } |
| |
| if (length($edge_label)) |
| { |
| print ' [label="' . $edge_label . '" ] '; |
| } |
| |
| print ";\n"; |
| } |
| |
| for my $kid (@{$node->{child}}) |
| { |
| dotkid($kid); |
| } |
| |
| } |
| |
| } |
| |
| sub dotlabel_detail |
| { |
| my $node = shift; |
| |
| # return $node->{short} ; |
| |
| my $outi = $node->{short}; |
| |
| my ($frst, $last) = (" ", " "); |
| |
| if (exists($node->{to_end})) |
| { |
| $last = "end: " . $node->{to_end}; |
| } |
| if (exists($node->{to_first})) |
| { |
| $frst = "first row: " . $node->{to_first}; |
| } |
| |
| my $slice = $node->{slice}; |
| $slice = " " |
| unless (defined($slice)); |
| |
| |
| if ((length($frst) > 1) || (length($last) > 1)) |
| { |
| my $memstuff = ""; |
| |
| # add memory statistics if have them... |
| if (exists($node->{memory})) |
| { |
| $memstuff = " | { {" . $node->{memory} . "} } "; |
| # make multiline - split on comma and "Work_mem" |
| # (using the vertical bar formatting character) |
| $memstuff =~ s/\,/\,\| /gm; |
| $memstuff =~ s/Work\_mem/\| Work\_mem/gm; |
| } |
| |
| # $outi .= " | { " . join(" | " , $frst, $last) . " } "; |
| $outi .= " | { " . join(" | " , $slice, $frst, $last) . " } " . $memstuff; |
| |
| # wrapping with braces changes record organization to vertical |
| $outi = "{ " . $outi . " } "; |
| } |
| |
| |
| return $outi; |
| } |
| |
| |
| sub dotlabel |
| { |
| my $node = shift; |
| |
| # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes |
| return |
| if (exists($node->{MultiSliceMotionDuplicate})); |
| |
| my $colortable = $glob_coltab{$glob_colorscheme}; |
| |
| my $color = scalar(@{$colortable}); |
| $color = $node->{slice} if (exists($node->{slice})); |
| $color =~ s/slice//; |
| |
| $color = ($color) % (scalar(@{$colortable})); |
| |
| # build list of node attributes |
| my @attrlist; |
| push @attrlist, "shape=record"; |
| # push @attrlist, "shape=polygon"; |
| # push @attrlist, "peripheries=2"; |
| |
| # push @attrlist, 'fontcolor=white'; |
| |
| push @attrlist, 'label="' . dotlabel_detail($node) .'"'; |
| push @attrlist, 'style=filled'; |
| # push @attrlist, 'style="filled,bold"'; |
| # push @attrlist, "color=" . $colortable->[$color]; |
| # push @attrlist, "fillcolor=" . $colortable->[$color]; |
| |
| if (exists($node->{color_rank})) # color by statistical ranking |
| { |
| my $edgecol = $glob_divcol{rdbu11}->[$node->{color_rank}]; |
| my $fillcol = $colortable->[$color]; |
| |
| if (defined($glob_statcolor)) |
| { |
| if ($glob_statcolor =~ m/^t$/i) |
| { |
| # show timing color only |
| $fillcol = $edgecol; |
| } |
| if ($glob_statcolor =~ m/^st/i) |
| { |
| # edge is slice color, fill is time stats |
| # invert the selection |
| ($edgecol, $fillcol) = ($fillcol, $edgecol); |
| } |
| } |
| |
| push @attrlist, 'style="filled,setlinewidth(6)"'; |
| push @attrlist, "color=\"" . $edgecol . '"'; |
| |
| push @attrlist, "fillcolor=\"" . $fillcol . '"'; |
| } |
| else |
| { |
| push @attrlist, "color=\"" . $colortable->[$color] . '"'; |
| push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"'; |
| } |
| |
| if (exists($node->{crunchme})) |
| { |
| @attrlist = (); |
| |
| # push @attrlist, 'style=filled'; |
| push @attrlist, 'style=filled'; |
| push @attrlist, "color=\"" . $colortable->[$color] . '"'; |
| push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"'; |
| # push @attrlist, "shape=circle"; |
| push @attrlist, "label=\"" . $node->{short} . '"'; |
| # push @attrlist, "fontsize=1"; |
| # push @attrlist, "height=0.01"; |
| # push @attrlist, "width=0.01"; |
| # push @attrlist, "height=0.12"; |
| # push @attrlist, "width=0.12"; |
| |
| print '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ; |
| } |
| else |
| { |
| print '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ; |
| } |
| |
| if (exists($node->{child})) |
| { |
| for my $kid (@{$node->{child}}) |
| { |
| dotlabel($kid); |
| } |
| |
| } |
| |
| } |
| |
| |
| sub makedotfile |
| { |
| my ($plantree, $time_list, $do_timeline, $plan_num, $parse_ctx, |
| $direction) = @_; |
| |
| # print "\n\ndigraph plan1 { ranksep=.75; size = \"7.5,7.5\";\n\n \n"; |
| print "\n\ndigraph plan$plan_num { \n"; |
| |
| # print "graph [bgcolor=black];\n edge [style=bold, color=white];\n"; |
| # print "graph [bgcolor=black];\n edge [style=dashed, color=white];\n"; |
| # print "graph [bgcolor=black];\n edge [style=dotted, color=white];\n"; |
| |
| if ($do_timeline && scalar(@{$time_list})) |
| { |
| print " ranksep=.75; size = \"7.5,7.5\";\n\n \n"; |
| print " {\n node [shape=plaintext, fontsize=16];\n"; |
| print "/* the time-line graph */\n"; |
| |
| print join(' -> ', @{$time_list} ), ";\n"; |
| print "}\n"; |
| |
| print "node [shape=box];\n"; |
| |
| while ( my ($kk, $vv) = each(%{$parse_ctx->{h_to_startoff}})) |
| { |
| print '{ rank = same; ' . $kk . '; ' . join("; ", @{$vv}) . "; }\n"; |
| } |
| |
| } |
| |
| print "rankdir=$direction;\n"; |
| |
| dotkid($plantree); |
| |
| dotlabel($plantree); |
| |
| print "\n}\n"; |
| |
| } |
| |
| |
| |
| exit(); |