src/test/regress/explain.pl - hawq - Git at Google

 #!/usr/bin/env perl
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 use Pod::Usage;
 use Getopt::Long;
 use Data::Dumper;
 use strict;
 use warnings;

 =head1 NAME

 B<explain.pl> - parse and reformat Postgres EXPLAIN output

 =head1 SYNOPSIS

 B<explain> [options] filename

 Options:

     -help            brief help message
     -man             full documentation
     -option          formatting option: perl, yaml, dot, query, jpg, json
     -querylist       list of queries
     -direction       direction of query plan graph: LR, RL, TB or BT.
     -colorscheme     graph color scheme
     -timeline        rank nodes by start offset time (experimental)
     -prune           prune tree attributes
     -output          output filename (else output to STDOUT).
     -statcolor       statistics coloring (experimental)
     -edge            edge decorations

 =head1 OPTIONS

 =over 8

 =item B<-help>

     Print a brief help message and exits.

 =item B<-man>

     Prints the manual page and exits.

 =item B<-option>

     Choose the output format option.  Several formats are supported.

 =over 12

 =item perl:  output in perl L<Data::Dumper> format.

 =item yaml:  output in L<yaml.org> machine and human-readable format

 =item dot:   output in dot graphical language for L<graphiz.org> graphing tool.

 =item querytext: output the text of the query (only useful for TPC-H)

 =item jpg:   pipe the dot output thru the dot formatter (if it is installed) to get jpg output directly.  (May also support bmp, ps, pdf, png)

 =item json:  output in L<json.org> machine and human-readable format

 =back


 =item B<-querylist>

    A list of queries to process.  The query numbering is 1-based. Some
    valid forms are:

    -querylist 1
    -querylist=2
    --ql=3,4,5
    --ql=6-9

     or some combination. By default, all queries are processed.


 =item B<-direction>

    Direction of data flow in query plan graph.  Valid entries are:

 =over 12

 =item BT (default): bottom to top

 =item TB: top to bottom

 =item LR: left to right

 =item RL: right to left

 =back

 =item B<-colorscheme>

    One of the supported ColorBrewer(TM) color schemes.  Use
    -color ?
    to get a list of the valid schemes, and
    -color dump
    to output a dot file displaying all the valid schemes.

     Colors from www.ColorBrewer.org by Cynthia A. Brewer, Geography,
     Pennsylvania State University.

 =item B<-prune>

    Prune tree attributes.  The only supported option is "stats" to
    remove the to_end and to_first timing information.

 =item B<-output>

    Output file name.  If multiple queries are processed, the filename
    is used as a template to generate multiple files.  If the filename
    has an extension, it is preserved, else an extension is supplied
    based upon the formatting option.  The filename template inserts
    the query number before the "dot" (.) if more than one query was
    processed.

 =item B<-statcolor>

     For an EXPLAIN ANALYZE plan, color according to the time spent in
     node. Red is greatest, and blue is least.  For statcolor=ts (default),
     the node edge is colored by time, and the node interior is filled
     by slice color.  For statcolor=st, the color scheme is reversed.
     For statcolor=t (timing only), the entire node is colored according
     to the time spent.

 =item B<-edge>

     Decorate graph edges with row count if available. Valid entries are:

 =over 12

 =item long   - print average rows and number of workers

 =item medium - print average rows and number of workers compactly

 =item short  - print total row counts

 =back


 =back

 =head1 DESCRIPTION

 explain.pl reads EXPLAIN output from a text file (or standard
 input) and formats it in several ways.  The text file must contain
 output in one of the the following formats.  The first is a regular
 EXPLAIN format, starting the the QUERY PLAN header and ending with the
 number of rows in parentheses.  Indenting must be on:

                                                 QUERY PLAN
  ----------------------------------------------------------------------------------------------------------
   Gather Motion 64:1  (slice2)  (cost=6007722.78..6007722.79 rows=6 width=51)
     Merge Key: partial_aggregation.l_returnflag, partial_aggregation.l_linestatus
     ->  Sort  (cost=6007722.78..6007722.79 rows=6 width=51)
           Sort Key: partial_aggregation.l_returnflag, partial_aggregation.l_linestatus
           ->  HashAggregate  (cost=6007722.52..6007722.70 rows=6 width=51)
                 Group By: lineitem.l_returnflag, lineitem.l_linestatus
                 ->  Redistribute Motion 64:64  (slice1)  (cost=6007721.92..6007722.31 rows=6 width=51)
                       Hash Key: lineitem.l_returnflag, lineitem.l_linestatus
                       ->  HashAggregate  (cost=6007721.92..6007722.19 rows=6 width=51)
                             Group By: lineitem.l_returnflag, lineitem.l_linestatus
                             ->  Seq Scan on lineitem  (cost=0.00..3693046.50 rows=92587017 width=51)
                                   Filter: l_shipdate <= '1998-09-08 00:00:00'::timestamp without time zone
  (12 rows)


 The second acceptable format is the TPC-H EXPLAIN ANALYZE, listing
 each query followed by the EXPLAIN output delineated by vertical bars
 ('|', e.g. |QUERY PLAN| ):

  EXPLAIN ANALYZE


  select
      l_returnflag,
      l_linestatus,
      sum(l_quantity) as sum_qty,
      sum(l_extendedprice) as sum_base_price,
      sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
      sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
      avg(l_quantity) as avg_qty,
      avg(l_extendedprice) as avg_price,
      avg(l_discount) as avg_disc,
      count(*) as count_order
  from
      lineitem
  where
      l_shipdate <= date '1998-12-01' - interval '106 day'
  group by
      l_returnflag,
      l_linestatus
  order by
      l_returnflag,
      l_linestatus;


   Query 1 complete, 19 rows returned


  |QUERY PLAN|
  |Gather Motion 64:1  (slice2)  (cost=5990545.19..5990545.21 rows=6 width=51)|
  |  recv:  Total 4 rows with 1294937 ms to end.|
  |  Merge Key: partial_aggregation.junk_attr_1, partial_aggregation.junk_attr_2|
  |  ->  Sort  (cost=5990545.19..5990545.21 rows=6 width=51)|
  |        Avg 1.00 rows x 4 workers.  Max 1 rows (seg49) with 1294938 ms to end.|
  |        Sort Key: partial_aggregation.junk_attr_1, partial_aggregation.junk_attr_2|
  |        ->  HashAggregate  (cost=5990544.94..5990545.12 rows=6 width=51)|
  |              Avg 1.00 rows x 4 workers.  Max 1 rows (seg49) with 1294933 ms to end.|
  |              Group By: lineitem.l_returnflag, lineitem.l_linestatus|
  |              ->  Redistribute Motion 64:64  (slice1)  (cost=5990544.34..5990544.73 rows=6 width=51)|
  |                    recv:  Avg 64.00 rows x 4 workers.  Max 64 rows (seg49) with 1277197 ms to first row, 1294424 ms to end.|
  |                    Hash Key: lineitem.l_returnflag, lineitem.l_linestatus|
  |                    ->  HashAggregate  (cost=5990544.34..5990544.61 rows=6 width=51)|
  |                          Avg 4.00 rows x 64 workers.  Max 4 rows (seg44) with 1292222 ms to end.|
  |                          Group By: lineitem.l_returnflag, lineitem.l_linestatus|
  |                          ->  Seq Scan on lineitem  (cost=0.00..3693046.50 rows=91899913 width=51)|
  |                                Avg 91914578.95 rows x 64 workers.  Max 91914598 rows (seg13) with 14.694 ms to first row, 258614 ms to end.|
  |                                Filter: l_shipdate <= '1998-08-17 00:00:00'::timestamp without time zone|
  |1295317.560 ms elapsed|

  Time was 1295.33 seconds.  Query ended at Thu Oct 12 12:09:27 2006

 =head1 CAVEATS/LIMITATIONS

 If explain.pl uses Graphviz to graph the query plan, it may flip the
 left and right children of a join to obtain a more balanced pictorial
 representation.  Use the -edge option to label graph edges to
 correctly identify the left and right children.

 =head1 AUTHORS

 Apache HAWQ

 Address bug reports and comments to: dev@hawq.apache.org

 =cut

 # IMPLEMENTATION NOTES:
 #
 # EXPLAIN ANALYZE final statistics in analyze_node:
 #
 # The final statistics look like this:
 #
 #  Slice statistics:
 #    (slice0)    Executor memory: 472K bytes.
 #    (slice1)    Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0).
 #  Settings:
 #
 #  Total runtime: 52347.493 ms
 #
 # The "Settings" entry is optional (ie, it only exists if you change the
 # settings in your session).  If the "Settings" entry is missing explain.pl
 # adds a dummy entry to the statistics.  This technique is a bit easier
 # than changing the parser to handle both cases.
 #
 # Parse_node:
 #   InitPlan entries in greenplum are in separate slices, so explain.pl
 #   prefixes them with an arrow (and adds a fake cost) to make them
 #   look like a top-level execution node.  Again, this technique was
 #   easier than modifying the parser to special case InitPlan.
 #
 #  Plan parsing in general:
 #  The original code only dealt with the TPCH formatted output:
 # |QUERY PLAN|
 # |Gather Motion 64:1  (slice2)  (cost=5990545.19..5990545.21 rows=6 width=51)|
 # |  recv:  Total 4 rows with 1294937 ms to end.|
 # |  Merge Key: partial_aggr.junk_attr_1, partial_aggr.junk_attr_2|
 # |  ->  Sort  (cost=5990545.19..5990545.21 rows=6 width=51)|
 # |        Avg 1.00 rows x 4 workers.  Max 1 rows (seg49) with 1294 ms to end.|
 #
 # It was easier to modify the parser to wrap the input with missing bars
 # than handle two cases (are you sensing a pattern here?).
 #
 # "Magic" Mode:
 #    This mode just adds an output filename option and constructs jpgs
 #
 # Output File Name:
 #  The guts of the formatting code always write to STDOUT, so this code
 #  resets STDOUT to the filename of choice.
 #
 # treemap:
 #  This routine applies a function over the entire parse tree
 #
 # OLAP fixups:
 #  OLAP queries have duplicate Shared Scan and Multi Slice Motion nodes.
 #  explain.pl only fixes them up for dot output, but not for yaml, perl, etc.
 #  The rationale is that dot handle digraphs nicely, but yaml and perl are
 #  more suitable for tree output.
 #

 my $glob_id = "";

 my $glob_optn;
 my $glob_qlist;
 my $glob_direction;
 my $glob_timeline;
 my $glob_prune;
 my $glob_outi;
 my $glob_statcolor;
 my $glob_edge;

 my $GV_formats; # graphviz output formats

 my %glob_coltab;

 my %glob_divcol;

 my $glob_colorscheme;
 BEGIN {

     $GV_formats = '^(jpg|bmp|ps|pdf|png)$';

     # table of valid "qualitative" color schemes

  %glob_coltab = (set312 => [
                               '#8DD3C7',
                               '#FFFFB3',
                               '#BEBADA',
                               '#FB8072',
                               '#80B1D3',
                               '#FDB462',
                               '#B3DE69',
                               '#FCCDE5',
                               '#D9D9D9',
                               '#BC80BD',
                               '#CCEBC5',
                               '#FFED6F'
                               ],
                    paired12 => [
                                 '#a6cee3',
                                 '#1f78b4',
                                 '#b2df8a',
                                 '#33a02c',
                                 '#fb9a99',
                                 '#e31a1c',
                                 '#fdbf6f',
                                 '#ff7f00',
                                 '#cab2d6',
                                 '#6a3d9a',
                                 '#ffff99',
                                 '#b15928'
                                 ],
                    pastel19 => [
                                 '#fbb4ae',
                                 '#b3cde3',
                                 '#ccebc5',
                                 '#decbe4',
                                 '#fed9a6',
                                 '#ffffcc',
                                 '#e5d8bd',
                                 '#fddaec',
                                 '#f2f2f2'
                                 ],
                    pastel24 => [
                                 '#b3e2cd',
                                 '#fdcdac',
                                 '#cbd5e8',
                                 '#f4cae4',
                                 '#e6f5c9',
                                 '#fff2ae',
                                 '#f1e2cc',
                                 '#cccccc'
                                 ],
                    set19 => [
                              '#e41a1c',
                              '#377eb8',
                              '#4daf4a',
                              '#984ea3',
                              '#ff7f00',
                              '#ffff33',
                              '#a65628',
                              '#f781bf',
                              '#999999'
                              ],
                    set28 => [
                              '#66c2a5',
                              '#fc8d62',
                              '#8da0cb',
                              '#e78ac3',
                              '#a6d854',
                              '#ffd92f',
                              '#e5c494',
                              '#b3b3b3'
                              ],
                    original => [
                                 'azure',
                                 'cornsilk',
                                 'lavender',
                                 'mintcream',
                                 'mistyrose',
                                 'lightgray',
                                 'salmon',
                                 'goldenrod',
                                 'cyan'
                                 ],
                    );

     # diverging color schemes

     %glob_divcol = (
                  rdbu11 => [
                             '#67001f',
                             '#b2182b',
                             '#d6604d',
                             '#f4a582',
                             '#fddac7',
                             '#f6f6f6',
                             '#d1e5f0',
                             '#92c5de',
                             '#4393c3',
                             '#2166ac',
                             '#053061'
                             ],
                     );

 }

 sub qlist_fixup
 {
     my $qlist = shift;

     my @outi;

     for my $qnum (@{$qlist})
     {
         if ($qnum =~ m/^\d+$/)
         {
             push @outi, $qnum;
         }
         else
         {
             if ($qnum =~ m/^\d+\-\d+$/)
             {
                 my $expr = $qnum;
                 $expr =~ s/\-/\.\./;

                 eval "for my \$val ($expr) { push \@outi, \$val; }";
             }
             else
             {
                 die("Invalid format for querylist: \'$qnum\'\n");
                 exit(1);
             }

         }

     }

     return \@outi;

 }


 # dump a nice graph listing all of the color schemes (neato is preferred)
 sub dodumpcolor
 {
     my $coltab = shift;
     my $fh = shift;

     my @ggg = sort(keys(%{$coltab}));

     # centered, with lines (not arrows)
     print $fh "digraph plan1 {  graph [center=\"root\",root=\"root\"] ;\n edge [dir=none]\n";

     # adjust the lengths to avoid overlap
     for my $ii (0..(scalar(@ggg)-1))
     {
         print $fh '"root" -> "' . $ii. '"' . " [len=2];\n";

         my $jj = 0;

         for my $cc (@{$coltab->{$ggg[$ii]}})
         {
             print $fh '"' . $ii. '" -> "' . $ii. "." . $jj. '"'
                 . " [len=1];\n";
             $jj++;
         }
     }

     print $fh '"root" [label="color schemes"]' . ";\n";

     for my $ii (0..(scalar(@ggg)-1))
     {
         print $fh '"' . $ii . '" [label="' . $ggg[$ii] . '"]' . ";\n";

         my $jj = 0;

         for my $cc (@{$coltab->{$ggg[$ii]}})
         {
             print $fh '"' . $ii . "." . $jj .
                 '" [label="", style=filled, ' .
                 'fillcolor="' . $cc . '"]' . ";\n";
             $jj++;
         }
     }
     print $fh "\n}\n";

 }


 BEGIN {
     my $man  = 0;
     my $help = 0;
     my $optn = "YAML";
     my $dir  = "BT";
     my $DEFAULT_COLOR = "set28";
     my $colorscheme = $DEFAULT_COLOR;
     my $timeline = '';
     my $prune;
     my $outfile;
     my $statcol;
     my $edgescheme;

     my @qlst;

     GetOptions(
                'help|?' => \$help, man => \$man,
                "querylist|ql|list:s" => \@qlst,
                "option|operation=s" => \$optn,
                "direction:s" => \$dir,
                "colorscheme:s" => \$colorscheme,
                "timeline" => \$timeline,
                "prune:s" => \$prune,
                "output:s" => \$outfile,
                "statcolor:s" => \$statcol,
                "edge:s" => \$edgescheme)
         or pod2usage(2);


     pod2usage(-msg => $glob_id, -exitstatus => 1) if $help;
     pod2usage(-msg => $glob_id, -exitstatus => 0, -verbose => 2) if $man;

     $glob_optn = $optn;
     $glob_optn = "jpg" if ($glob_optn =~ m/^jpeg/i);

     $glob_timeline  = $timeline;
     $glob_prune     = $prune;
     $glob_outi      = $outfile;
     $glob_statcolor = $statcol;

     $glob_edge  = $edgescheme;

     if ($dir !~ m/^(TB|BT|LR|RL)$/i)
     {
         $glob_direction = "BT";
     }
     else
     {
         $glob_direction = uc($dir);
     }

     $colorscheme = lc($colorscheme);

 #    print "color: $colorscheme\n";

     if (exists($glob_coltab{$colorscheme}))
     {
         $glob_colorscheme = $colorscheme;
     }
     else
     {
         if ($colorscheme =~ m/list|dump/i)
         {
             use IO::File;
             use POSIX qw(tmpnam);

             my ($tmpnam, $tmpfh);

             for (;;) {
                 $tmpnam = tmpnam();
                 sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last;
             }

             # write to a temporary file
             dodumpcolor(\%glob_coltab, $tmpfh);

             close $tmpfh;

             my $catcmd = "cat $tmpnam";

             # format with neato if jpg was specified
             if ($glob_optn =~ m/$GV_formats/i)
             {
                 my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/neato";

                 if ($^O !~ m/darwin/)
                 {
                     $dotapp = `which neato`;
                     chomp($dotapp);
                 }
                 if (defined($dotapp) && length($dotapp) && (-e $dotapp))
                 {
                     $catcmd .= " | $dotapp -T$glob_optn";
                 }
             }

             system($catcmd);

             unlink $tmpnam;

             exit(0);
         }
         else
         {
             my $colorschemelist = join("\n", sort(keys(%glob_coltab))) . "\n";

             # identify the default color
             $colorschemelist =~
                 s/$DEFAULT_COLOR/$DEFAULT_COLOR  \(default\)/gm;

             print "\nvalid color schemes are:\n";
             print $colorschemelist;
             print "\nUse: \"explain.pl -color dump -opt jpg > graph.jpg\"\n";
             print "to construct a JPEG showing all the valid color schemes.\n";
             print "\nColors from www.ColorBrewer.org by Cynthia A. Brewer, Geography,\nPennsylvania State University.\n\n";

             exit(0);
         }
     }

     @qlst = split(/,/,join(',', @qlst));

     $glob_qlist = qlist_fixup(\@qlst);

 #    print "loading...\n" ;
 }

 sub analyze_node
 {
     my ($node, $parse_ctx) = @_;

     if (defined($node) && exists($node->{txt}))
     {

         # gather analyze statistics if it exists in this node...
         if ($node->{txt} =~
             m/Slice\s+statistics.*(Settings.*)*Total\s+runtime/s)
         {
             my $t1 = $node->{txt};

             # NOTE: the final statistics look something like this:

             # Slice statistics:
             #   (slice0)    Executor memory: 472K bytes.
             #   (slice1)    Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0).
             # Settings:
             # Total runtime: 52347.493 ms

             # (we've actually added some vertical bars so it might look
             # like this):
             # || Slice statistics:
             # ||   (slice0)    Executor memory: 472K bytes.

             # NB: the "Settings" entry is optional, so
             # add Settings if they are missing
             unless ($t1 =~
                     m/Slice\s+statistics.*Settings.*Total\s+runtime/s)
             {
                 $t1 =~
                     s/\n.*\s+Total\s+runtime/\n Settings\:   \n Total runtime/;
             }

             my @foo = ($t1 =~ m/Slice\s+statistics\:\s+(.*)\s+Settings\:\s+(.*)\s+Total\s+runtime:\s+(.*)\s+ms/s);

             if (scalar(@foo) == 3)
             {
                 my $mem  = shift @foo;
                 my $sett = shift @foo;
                 my $runt = shift @foo;

                 $mem  =~ s/\|\|//gm; # remove '||'...
                 $sett =~ s/\|\|//gm;

                 my $statstuff = {};

                 my @baz = split(/\n/, $mem);
                 my $sliceh = {};
                 for my $elt (@baz)
                 {
                     my @ztesch = ($elt =~ m/(slice\d+)/);
                     next unless (scalar(@ztesch));
                     $elt =~ s/\s*\(slice\d+\)\s*//;
                     my $val = shift @ztesch;
                     $sliceh->{$val} = $elt;
                 }

                 $statstuff->{memory}   = $sliceh;
                 $statstuff->{settings} = $sett;
                 $statstuff->{runtime}  = $runt;
                 $parse_ctx->{explain_analyze_stats} = $statstuff;
                 $node->{statistics} = $statstuff;
             }
         }

         my @short = $node->{txt} =~ m/\-\>\s*(.*)\s*\(cost\=/;
         $node->{short} = shift @short;

         unless(exists($node->{id}))
         {
             print Data::Dumper->Dump([$node]), "\n";
         }

         if ($node->{id} == 1)
         {
             @short = $node->{txt} =~ m/^\s*\|\s*(.*)\s*\(cost\=/;
             $node->{short} = shift @short;

             # handle case where dashed line might have wrapped...
             unless (defined($node->{short}) && length($node->{short}))
             {
                 # might not be first line...
                 @short = $node->{txt} =~ m/\s*\|\s*(.*)\s*\(cost\=/;
                 $node->{short} = shift @short;
             }


         }

         # handle case of "cost-free" txt (including a double ||
         # and not first line, or screwed-up parse of short as a single bar
         #
         # example: weird initplan like:
         # ||                       ->  InitPlan  (slice49)
         if (defined($node->{short}) && length($node->{short})
             && ($node->{short} =~ m/\s*\|\s*/))
         {
             $node->{short} = "";
         }

         unless (defined($node->{short}) && length($node->{short}))
         {
             @short = $node->{txt} =~ m/\s*\|(\|)?\s*(\w*)\s*/;
             $node->{short} = shift @short;

             if (defined($node->{short}) && length($node->{short})
                 && ($node->{short} =~ m/\s*\|\s*/))
             {
                 $node->{short} = "";
             }

             # last try!!
             unless (defined($node->{short}) && length($node->{short}))
             {
                 my $foo = $node->{txt};
                 $foo =~ s/\-\>//gm;
                 $foo =~ s/\|//gm;
                 $foo =~ s/^\s+//gm;
                 $foo =~ s/\s+$//gm;
                 $node->{short} = $foo;
             }

 #            print "long: $node->{txt}\n";
 #            print "short: $node->{short}\n";
         }

         $node->{short} =~ s/\s*$//;

         # remove quotes which mess up dot file
         $node->{short} =~ s/\"//gm;

 #            print "long: $node->{txt}\n";
 #            print "short: $node->{short}\n";

         # XXX XXX XXX XXX: FINAL "short" fixups
         while (defined($node->{short}) && length($node->{short})
                && ($node->{short} =~ m/(\n)|^\s+|\s+$|(\(cost\=)/m))
         {
             # remove leading and trailing spaces...
             $node->{short} =~ s/^\s*//;
             $node->{short} =~ s/\s*$//;

             # remove newlines
             $node->{short} =~ s/(\n).*//gm;

             # remove cost=...
             $node->{short} =~ s/\(cost\=.*//gm;

 #            print "short fixup: $node->{short}\n\n\n";
         }

         {
             if ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*end)/i)
             {

                 my @ggg =
                     ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*end)/i);

 #                print join('*', @ggg), "\n";

                 my $tt = $ggg[0];

                 $node->{to_end} = $tt;

                 $parse_ctx->{alltimes}->{$tt} = 1;

                 if (exists($parse_ctx->{h_to_end}->{$tt}))
                 {
                     push @{$parse_ctx->{h_to_end}->{$tt}}, '"'. $node->{id} .'"';
                 }
                 else
                 {
                     $parse_ctx->{h_to_end}->{$tt} = ['"'. $node->{id} . '"'];
                 }


             }
             if ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*first\s*row)/i)
             {

                 my @ggg =
                     ($node->{txt} =~ m/(\d+(\.\d*)?)(\s*ms\s*to\s*first\s*row)/i);

 #                print join('*', @ggg), "\n";

                 my $tt = $ggg[0];

                 $node->{to_first} = $tt;

                 $parse_ctx->{alltimes}->{$tt} = 1;

                 if (exists($parse_ctx->{h_to_first}->{$tt}))
                 {
                     push @{$parse_ctx->{h_to_first}->{$tt}}, '"' . $node->{id} . '"' ;
                 }
                 else
                 {
                     $parse_ctx->{h_to_first}->{$tt} = [ '"' . $node->{id} . '"'];
                 }

             }

             if ($node->{txt} =~ m/start offset by (\d+(\.\d*)?)(\s*ms)/i)
             {

                 my @ggg =
                     ($node->{txt} =~ m/start offset by (\d+(\.\d*)?)(\s*ms)/i);

 #                print join('*', @ggg), "\n";

                 my $tt = $ggg[0];

                 $node->{to_startoff} = $tt;

                 $parse_ctx->{allstarttimes}->{$tt} = 1;

                 if (exists($parse_ctx->{h_to_startoff}->{$tt}))
                 {
                     push @{$parse_ctx->{h_to_startoff}->{$tt}}, '"'. $node->{id} .'"';
                 }
                 else
                 {
                     $parse_ctx->{h_to_startoff}->{$tt} = ['"'. $node->{id} . '"'];
                 }
             }

             if (exists($node->{to_end}))
             {
                 $node->{total_time} =
                     (exists($node->{to_first})) ?
                     ($node->{to_end} - $node->{to_first}) :
                     $node->{to_end};
             }


         }

         if (1)
         {
             if (exists($node->{child}))
             {
                 delete $node->{child}
                   unless (defined($node->{child})
                           && scalar(@{$node->{child}}));
             }
         }


     }

 }

 sub parse_node
 {

     my ($ref_id, $parse_ctx, $depth, $plan_rows, $parent) = @_;

 #    print "depth: $depth\n";
 #    print "row: ",$plan_rows->[0],"\n"      if (scalar(@{$plan_rows}));

 #    print "first: $first\n" if defined ($first);

     my $spclen = undef;
     my $node = undef;

     my $no_more_text = 0;

     while (scalar(@{$plan_rows}))
     {
         my $row = $plan_rows->[0];

         unless (defined($node))
         {
             $node = {};

             $node->{child} = [];

             $node->{txt} = "";

             $node->{parent} = $parent
                 if (defined($parent));

             my $id = $$ref_id;
             $id++;
             $$ref_id= $id;
             $node->{id} = $id;
         }

         # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX
         # make initplan into a fake node so the graphs look nicer (eg
         # tpch query 15).  Prefix it with an arrow and add a fake cost.
         if ($row =~ m/\|(\s)*InitPlan(.*)slice/)
         {
             $row =~ s/InitPlan/\-\>  InitPlan/;
             if ($row !~ m/\(cost=/)
             {
                 $row =~ s/\|$/\(cost=\?\)|/;
             }
         }

         if ($row !~ m/\|(\s)*\-\>/)
         {
             # add text to existing node

             if ($no_more_text)
             {
                 print "error: no more text for ". $node->{id}, "\n";
             }

             $node->{txt} .= "\n" . $row;

 #            print "txt: $node->{txt}\n";

             shift @{$plan_rows};
             next;
         }
         else
         {
             # new node
             unless ($no_more_text)
             {
                 unless (length($node->{txt}))
                 {
                     $node->{txt} .= $row;
                     shift @{$plan_rows};
                     next;
                 }
             }

             # match the leading spaces before the '->', eg:
             # "|  ->  Sort  (cost=5990545.19..599..."

             my @spc = ($row =~ m/\|(\s*)\-\>/);

 #            print "space match:", Data::Dumper->Dump(\@spc), "\n";

             $spclen = scalar(@spc) ?  length($spc[0]) : 0;

 #            print "space len: $spclen, depth: $depth\n";

             if ($spclen > $depth)
             {
                 # found a child
                 push @{$node->{child}}, parse_node($ref_id, $parse_ctx,
                                                    $spclen, $plan_rows,
                                                    $node->{id});
             }

         }

         if (defined($spclen))
         {
             if ($spclen <= $depth)
             { # found a sibling or parent
                 # need to put the row back on the head of the list

                 if (defined($node) && exists($node->{txt}))
                 {
                     analyze_node($node, $parse_ctx);

                     return $node;
                 }
             }
         }
         else
         {
             die ("what the heck?");
         }

         $spclen = undef;
         $no_more_text = 1;

     } # end while

     if (defined($node))
     {
         analyze_node($node, $parse_ctx);
     }

     return $node;

 }


 if (1)
 {

     my @bigarr;

     my $state = "INIT";

     my $pair = undef;

     my ($query, $plan);

     my $tpch_format=0;
     my $bigdash = '-' x 40; # make big dash smaller

     my $magic;

     for (<>)
     {
         my $ini = $_;

         if ($state =~ m/INIT/)
         {
             if ($ini !~ m/(^EXPLAIN ANALYZE)|(QUERY PLAN)/)
             {
                 next;
             }

             $query = "";
             $plan  = "";
             $pair = {};

             if ($ini =~ m/^EXPLAIN ANALYZE/)
             {
                 $tpch_format = 1;
                 $state = "GETQUERY";
                 next;
             }

             if ($ini =~ m/QUERY PLAN/)
             {
                 $tpch_format = 0;
                 $plan  = "";
                 $state = "GETPLAN";
                 next;
             }

         }

         if ($state !~ m/GETPLAN/)
         {
             # should be START or GETQUERY only...
             if ($tpch_format)
             {
                 if ($ini =~ m/^EXPLAIN ANALYZE/)
                 {
                     if (defined($pair))
                     {
                         $pair->{plan} = $plan;
                         $pair->{query} = $query;
                         push @bigarr, $pair;
                     }
                     $pair = {};
                     $query = "";
                     $plan  = "";
                     $state = "GETQUERY";
                     next;
                 }
             }
             else
             {
                 # not tpch analyze
                 if ($ini =~ m/QUERY PLAN/)
                 {
                     if (defined($pair))
                     {
                         $pair->{plan} = $plan;
                         $pair->{query} = $query;
                         push @bigarr, $pair;
                     }
                     $pair = {};
                     $query = "";
                     $plan  = "";
                     $state = "GETPLAN";
                     next;
                 }

             }
             if ($state =~ m/GETQUERY/)
             {
                 if ($ini =~ m/QUERY PLAN/)
                 {
                     if (!($tpch_format))
                     {
                         if (defined($pair))
                         {
                             $pair->{plan} = $plan;
                             $pair->{query} = $query;
                             push @bigarr, $pair;
                         }
                         $pair = {};
                         $query = "";
                     }

                     $plan  = "";
                     $state = "GETPLAN";
                     next;
                 }

                 $query .= $ini;
             }

         } # end not getplan

         if ($state =~ m/GETPLAN/)
         {

             if ($tpch_format)
             {
                 if ($ini !~ m/\|(.*)\|/)
                 {
                     $state = "START";
                     next;
                 }
             }
             else
             {
                 if ($ini =~ m/(\(\d+\s+rows\))|(Time\s+was.*seconds\.\s+Query\s+ended)/)
                 {
                     $state = "START";
                     next;
                 }
             }
 			# a bit weird here -- just ignore the separator.  But
 			# maybe we should invest some effort to determine that the
 			# separator is the next line after the header (and only
 			# ignore it once) ?
             next
                 if ($ini =~ m/$bigdash/);

             # add the missing bars
             if (!($tpch_format))
             {
                 if ($ini !~ m/\|(.*)\|/)
                 {
                     $ini = '|' . $ini . '|';
                 }
             }

             $plan .= $ini;
         }

     } # end big for
     if (defined($pair))
     {
         $pair->{plan} = $plan;
         $pair->{query} = $query;
         push @bigarr, $pair;
     }

 #print scalar(@bigarr), "\n";


 #print Data::Dumper->Dump(\@bigarr);

 #print $bigarr[0]->{plan};

     unless(scalar(@{$glob_qlist}))
     {
         # build a 1-based list of queries
         for (my $ii =1; $ii <= scalar(@bigarr); $ii++)
         {
             push @{$glob_qlist}, $ii;
         }
     }

     my $realSTDOUT;

     for my $qqq (@{$glob_qlist})
     {
         my $qnum = $qqq - 1; # 0 based vs 1 based

         if ($qnum >  scalar(@bigarr))
         {
             warn("specified query $qqq is out-of-range -- skipping...\n");
             next;
         }

         if ($glob_optn =~ m/query|text|txt/i)
         {
             doquery($bigarr[$qnum]->{query});
             next;
         }

         my $plantxt = $bigarr[$qnum]->{plan};

         unless (defined($plantxt) && length($plantxt))
         {
             warn("invalid plan for query $qqq -- skipping...\n");
             next;
         }

 #print $plantxt, "\n";

         my @plan_r = split(/\n/, $plantxt);

         my $pr = \@plan_r;

         my $parse_ctx = {};

         my $id = 0;

         $parse_ctx->{alltimes} = {};
         $parse_ctx->{h_to_end} = {};
         $parse_ctx->{h_to_first} = {};

         $parse_ctx->{allstarttimes} = {};
         $parse_ctx->{h_to_startoff} = {};
         $parse_ctx->{explain_analyze_stats} = {};

         my $plantree = parse_node(\$id, $parse_ctx, 0, $pr);

 #        my @timelist = sort {$a <=> $b} keys (%{$parse_ctx->{alltimes}});
         my @timelist = sort {$a <=> $b} keys (%{$parse_ctx->{allstarttimes}});


         if (defined($glob_prune))
         {
             if ($glob_prune =~ m/stat|heavy|heavily/i)
             {
                 my $map_expr = 'delete $node->{to_end};';
                 treeMap($plantree, undef, $map_expr);
                 $map_expr = 'delete $node->{to_first};';
                 treeMap($plantree, undef, $map_expr);

                 # additional statistics
                 $map_expr = 'delete $node->{to_startoff};';
                 treeMap($plantree, undef, $map_expr);
                 $map_expr = 'delete $node->{total_time};';
                 treeMap($plantree, undef, $map_expr);
                 $map_expr = 'delete $node->{statistics};';
                 treeMap($plantree, undef, $map_expr);
             }
             if ($glob_prune =~ m/heavy|heavily/i)
             {
                 treeMap($plantree, 'prune_heavily($node);');
             }
         }


         # magic mode : display everything magically
         #
         # NOTE: only set to magic on the first iteration, then reset
         # to jpg, so performs correctly with multiple queries
         if ($glob_optn =~ m/magic/i)
         {
             $glob_optn = "jpg";

             use IO::File;
             use POSIX qw(tmpnam);

             my $tmpnam;

             for (;;) {
                 my $tmpfh;

                 $tmpnam = tmpnam();
                 sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last;
             }

             # create a temporary directory name -- just append ".dir"
             # to the new tempfile name and mkdir
             my $tmpdir = $tmpnam . ".dir";

             mkdir($tmpdir) or            die "magic failed" ;

             unlink $tmpnam;  # we didn't need this tempfile anyhow

             # reset output file name to create files in the new
             # temporary directory
             $glob_outi = File::Spec->catfile($tmpdir, "query_");

             $magic = $glob_outi;
         }

         if ($glob_outi)
         {
             unless (defined($realSTDOUT))
             {
                 open $realSTDOUT, ">&STDOUT"  or die "Can't dup STDOUT: $!";
             }

             my $outfilename = $glob_outi;

             # only need numbering if processed more than one query
             my $neednum = (scalar(@bigarr) > 1);

             # check if name has an extension like ".foo"
             if ($outfilename =~ m/\.(.){1,5}$/)
             {
                 # qqq is query num (1 based)

                 my $formatq = sprintf("%03d", $qqq);

                 $outfilename =~ s/\.(.*)$/$formatq\.$1/
                     if ($neednum);
             }
             else
             {
                 # qqq is query num (1 based)
                 my $formatq = sprintf("%03d", $qqq);

                 $outfilename .= $formatq
                     if ($neednum);
                 if ($glob_optn =~ m/yaml/i)
                 {
                     $outfilename .= ".yml";
                 }
                 if ($glob_optn =~ m/json/i)
                 {
                     $outfilename .= ".json";
                 }
                 if ($glob_optn =~ m/perl|dump/i)
                 {
                     $outfilename .= ".perl";
                 }
                 if ($glob_optn =~ m/dot|graph/i)
                 {
                     $outfilename .= ".dot";
                 }
                 if ($glob_optn =~ m/$GV_formats/i)
                 {
                     $outfilename .= ".$glob_optn";
                 }
             }

             close STDOUT;

             open (STDOUT, ">$outfilename" ) or die "can't open STDOUT: $!";

 #            print $outfilename, "\n";

         }


         if ($glob_optn =~ m/yaml/i)
         {
             doyaml($plantree);
         }
         if ($glob_optn =~ m/json/i)
         {
             doyaml($plantree, "json");
         }
         if ($glob_optn =~ m/perl|dump/i)
         {
             doDataDump($plantree);
         }
         if ($glob_optn =~ m/dot|graph/i)
         {
             dodotfile($plantree, \@timelist, $qqq, $parse_ctx,
                       $glob_direction);
         }
         if ($glob_optn =~ m/operator/i)
         {
             doOperatorDump($plantree);
         }

         if ($glob_optn =~ m/$GV_formats/i)
         {
             my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/dot";

             if ($^O !~ m/darwin/)
             {
                 $dotapp = `which dot`;
                 chomp($dotapp);
             }
             die "could not find dot app: $dotapp"
                 unless (defined($dotapp) && length($dotapp) && (-e $dotapp));

             # should have been able to redirect STDOUT thru a pipe
             # directly to dotapp, but didn't work.  Use a tmpfile
             # instead.

             use IO::File;
             use POSIX qw(tmpnam);

             my $tmpnam;

             for (;;) {
                 my $tmpfh;

                 $tmpnam = tmpnam();
                 sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last;
             }
             open my $oldout, ">&STDOUT"     or die "Can't dup STDOUT: $!";

             close STDOUT;
             open (STDOUT, ">$tmpnam" ) or die "can't open STDOUT: $!";

             select STDOUT; $| = 1;      # make unbuffered

             dodotfile($plantree, \@timelist, $qqq, $parse_ctx,
                       $glob_direction);

             close STDOUT;
             open STDOUT, ">&", $oldout or die "Can't dup \$oldout: $!";

             system("cat $tmpnam | $dotapp -T$glob_optn");

             unlink $tmpnam;


         }
     } #end for querynum

     if (defined($realSTDOUT))
     {
         close STDOUT;
         open STDOUT, ">&", $realSTDOUT or die "Can't dup \$oldout: $!";

     }

     # magically display all files
     if (defined($magic))
     {
         # only need numbering if processed more than one query
         my $neednum = (scalar(@{$glob_qlist}) > 1);

         if ($^O =~ m/darwin/)
         {
             # use ImageMagick montage
             my $ggg = $magic . '*';
             my $montage = `which montage`;
             chomp($montage);

             # only perform a montage if more than one query
             if ($neednum && defined($montage) && ( -e $montage))
             {
                 my $dir = $magic;
                 # get the directory name (remove "query_" prefix)
                 $dir =~ s/query_$//;
                 system("cd $dir; montage -label \'%f\' $ggg -title \"$dir\n`date`\" -shadow INDEX.html; open INDEX.html");
             }
             else
             {
                 system("open $ggg");
             }
         }
     }

 }
 #print "\nmax id: $id\n\n";

 #

 sub treeMap
 {
     my ($node, $pre_map, $post_map, $ctx) = @_;

     eval "$pre_map"
         if (defined($pre_map));

     if (exists($node->{child}))
     {
         for my $kid (@{$node->{child}})
         {
             treeMap($kid, $pre_map, $post_map, $ctx);
         }
     }
     eval "$post_map"
         if (defined($post_map));
 }

 sub doDataDump
 {
     my $plantree = shift;


     local $Data::Dumper::Indent   = 1;
     local $Data::Dumper::Terse    = 1;
     local $Data::Dumper::Sortkeys = 1;

     my $map_expr = 'delete $node->{txt};';
 #    my $map_expr = 'print "foo\n"';
     treeMap($plantree, undef, $map_expr);

     print Data::Dumper->Dump([$plantree]);
 }

 sub doOperatorDump
 {
     my $plantree = shift;

 	print $plantree->{short}, "\n" if (exists($plantree->{short}));

 	return
 		unless (exists($plantree->{child}));

 	for my $kid (@{$plantree->{child}})
 	{
 		doOperatorDump($kid);
 	}
 }

 # add slice info to node
 # and gather explain analyze stats
 sub addslice
 {
     my ($node, $ctx) = @_;

     # AUTO-6: find nodes with "(slice1)" info where the slice numbers aren't
     # part of the "Slice statistics"

     my $txt1 = $node->{txt};
     $txt1 =~ s/Slice statistics.*//gs;

     if ($txt1 =~ /(slice(\d+))/)
     {
         my @ggg = ($txt1 =~ m/(slice(\d+))/) ;
         $node->{slice} = shift @ggg;

         # check if we have explain analyze stats for the slice
         if (exists($ctx->{explain_analyze_stats})
             && exists($ctx->{explain_analyze_stats}->{memory})
             && exists($ctx->{explain_analyze_stats}->{memory}->{$node->{slice}}))
         {
             $node->{memory} =
                 $ctx->{explain_analyze_stats}->{memory}->{$node->{slice}};
         }

     }
 }


 sub doquery
 {
     my $qtxt = shift;

     print $qtxt, "\n";

 }

 sub doyaml
 {
     my ($plantree, $opti) = @_;

     $opti = "yaml" unless (defined($opti));

     if ($opti =~ m/json/i)
     {
         # JSON might not be installed, so test for it.

         if (eval "require JSON")
         {
             my $map_expr = 'delete $node->{txt};';

             treeMap($plantree, undef, $map_expr);

             # because JSON is REQUIREd, not USEd, the symbols are not
             # imported into the environment.
             print JSON::objToJson($plantree, {pretty => 1, indent => 2});
         }
         else
         {
             die("Fatal Error: The required package JSON is not installed -- please download it from www.cpan.org\n");
             exit(1);
         }

     }
     else
     {
         # YAML might not be installed, so test for it.

         if (eval "require YAML")
         {
             my $map_expr = 'delete $node->{txt};';

             treeMap($plantree, undef, $map_expr);

             # because YAML is REQUIREd, not USEd, the symbols are not
             # imported into the environment.
             print YAML::Dump($plantree);
         }
         else
         {
             die("Fatal Error: The required package YAML is not installed -- please download it from www.cpan.org\n");
             exit(1);
         }

     }

 }

 # remove slice numbering information to construct even more generic plans
 sub prune_heavily
 {
     my $node = shift;

     return
         unless (exists($node->{short}));

 	if ($node->{short} =~ m/Delete\s*\(slice.*segment.*\)\s*\(row.*width.*\)/)
 	{
 		# QA-1309: fix strange DELETE operator formatting
 		$node->{short} = "Delete";
 	}
 	elsif ($node->{short} =~ m/Update\s*\(slice.*segment.*\)\s*\(row.*width.*\)/)
         {
                 # QA-1309: fix strange UPDATE operator formatting
                 $node->{short} = "Update";
         }
 	elsif ($node->{short} =~ m/\d+\:\d+/)
 	{

 		# example: Gather Motion 8:1 (slice4);

 		# strip the number of nodes and slice information
 		$node->{short} =~ s/\s+\d+\:\d+.*//;

 		# Note: don't worry about removing "(slice1)" info from the
 		# "short" because addslice processes node->{text}
 	}
 }

 # identify the slice for each node
 # and find Shared Scan "Primary"
 # and find MultiSliceMotion
 sub pre_slice
 {
     my ($node, $ctx) = @_;

     {
         if (scalar(@{$ctx->{a1}}))
         {
             my $parent = $ctx->{a1}->[-1];

             unless (exists($node->{slice}))
             {
                 if (exists($parent->{slice}))
                 {
                     $node->{slice} = $parent->{slice};
                 }
             }
         }

         # olap stuff

         if ($node->{short} =~ m/^Shared Scan/)
         {
             # if the Shared Scan has a child it is the "primary"
             if (exists($node->{child}))
             {
 				my $share_short_fixup = $node->{short};

 				# remove the slice number from the "short"
 				$share_short_fixup =~ s/(\d+)\:/\:/g;

                 if (!exists($ctx->{share_input_h}->{$share_short_fixup}))
                 {
                     $ctx->{share_input_h}->{$share_short_fixup} = $node;
                 }
             }
             else # not the primary, mark as a duplicate node
             {
                 $node->{SharedScanDuplicate} = 1;
             }
         }
         if ($node->{short} =~ m/^Multi Slice Motion/)
         {
             # choose first Multi Slice Motion node as primary
             if (!exists($ctx->{multi_slice_h}->{$node->{short}}))
             {
                 $ctx->{multi_slice_h}->{$node->{short}} = $node;
             }
             else # not the primary, mark as a duplicate node
             {
                 $node->{MultiSliceMotionDuplicate} = 1;
             }
         }

         if (exists($node->{total_time}))
         {
             my $tt = $node->{total_time};
             my $tt2 = $tt * $tt;
             $ctx->{time_stats_h}->{cnt} += 1;
             $ctx->{time_stats_h}->{sum} += $tt;
             $ctx->{time_stats_h}->{sumsq} += $tt2;

             if (exists($ctx->{time_stats_h}->{tt_h}->{$tt}))
             {
                 push @{$ctx->{time_stats_h}->{tt_h}->{$tt}}, $node;
             }
             else
             {
                 $ctx->{time_stats_h}->{tt_h}->{$tt} = [$node];
             }
         }

     }

     push @{$ctx->{a1}}, $node;

 }

 sub post_slice
 {
     my ($node, $ctx) = @_;

     pop @{$ctx->{a1}};

 }

 # make all duplicate sharedscan nodes point back to primary
 sub sharedscan_fixup
 {
     my ($node, $ctx) = @_;

     if (exists($node->{SharedScanDuplicate}))
     {
 		my $share_short_fixup = $node->{short};

 		# remove the slice number from the "short"
 		$share_short_fixup =~ s/(\d+)\:/\:/g;

         $node->{SharedScanDuplicate} =
             $ctx->{share_input_h}->{$share_short_fixup};
 #        $node->{id} =
 #            $node->{SharedScanDuplicate}->{id};
     }

     if (exists($node->{MultiSliceMotionDuplicate}))
     {
         $node->{MultiSliceMotionDuplicate} =
             $ctx->{multi_slice_h}->{$node->{short}};
         # XXX XXX: for this case the node is really the same
         $node->{id} =
             $node->{MultiSliceMotionDuplicate}->{id};
     }

 }

 sub human_num
 {
     my $esti = shift;

     my @suffix = qw(K M G T P E Z Y);
     my $suff = "";

     # try to shorten estimate specification
     while (length(POSIX::ceil($esti)) > 3)
     {
         $suff = shift @suffix;

         $esti = $esti/1000;
     }

     if (length($suff))
     {
         $esti *= 100;
         $esti = POSIX::floor($esti+0.5);
         $esti = $esti/100;

         $esti .= $suff;
     }

     return $esti;
 }

 # label left and right for nest loops
 sub nestedloop_fixup
 {
     my ($node, $ctx) = @_;

     return
         unless (exists($node->{short}) &&
                 ($node->{short} =~ m/Nested Loop/));

     my @kidlist;

     if (exists($node->{child}))
     {
         for my $kid (@{$node->{child}})
         {
             push @kidlist, $kid;
         }
     }

     return
         unless (2 == scalar(@kidlist));

     if ($kidlist[0]->{id} < $kidlist[1]->{id})
     {
         $kidlist[0]->{nested_loop_position} = "left";
         $kidlist[1]->{nested_loop_position} = "right";
     }
     else
     {
         $kidlist[1]->{nested_loop_position} = "left";
         $kidlist[0]->{nested_loop_position} = "right";
     }

 }

 # find rows out information
 sub get_rows_out
 {
     my ($node, $ctx, $edge) = @_;

     return
         unless ($node->{txt} =~ m/(Rows out\:)|(\(cost\=.*\s+rows=.*\s+width\=.*\))/);

     my $long = ($edge =~ m/long|med/i);

     if ($node->{txt} =~ m/Rows out\:\s+Avg.*\s+rows\s+x\s+.*\s+workers/)
     {
         if (!$long)
         {
             # short result
             my @foo =
                 ($node->{txt} =~
                  m/Rows out\:\s+Avg\s+(.*)\s+rows\s+x\s+(.*)\s+workers/);

             goto L_get_est unless (2 == scalar(@foo));

             # calculate row count as avg x num workers
             $node->{rows_out} = $foo[0] * $foo[1];
         }
         else
         {
             my @foo =
                 ($node->{txt} =~
                  m/Rows out\:\s+(Avg.*workers)/);

             goto L_get_est unless (1 == scalar(@foo));

             # just print the string
             $node->{rows_out} = $foo[0];

             if ($edge =~ m/med/i)
             {
                 $node->{rows_out} =~ s/Avg\s+//;
                 $node->{rows_out} =~ s/rows\s+//;
                 $node->{rows_out} =~ s/\s*workers\s*//;
             }

         }
     }
     elsif ($node->{txt} =~ m/Rows out\:\s+.*\s+rows/)
     {
         my @foo =
             ($node->{txt} =~
              m/Rows out\:\s+(.*)\s+rows/);

         goto L_get_est unless (1 == scalar(@foo));

         $node->{rows_out} = $foo[0];
     }

     if (
         exists($node->{rows_out}) &&
         length($node->{rows_out})
         )
     {
         if (
             ($node->{rows_out} !~ m/avg/i) &&
             ($node->{rows_out} =~ m/x/))
         {
             my @foo = ($node->{rows_out} =~ m/(x.*)$/);

             my $tail = $foo[0];

             @foo = ($node->{rows_out} =~ m/(.*)\s+x.*/);

             my $head = $foo[0];

             if (defined($tail) && defined($head))
             {
                 $head = human_num($head);

                 $node->{rows_out} = $head . " " . $tail;
             }

         }
         elsif ($node->{rows_out} =~ m/^\d+$/)
         {
             $node->{rows_out} = human_num($node->{rows_out});
         }
     }

 L_get_est:

     # add row estimates
     if ($long &&
         ($node->{txt} =~ m/\(cost\=.*\s+rows=.*\s+width\=.*\)/))
     {
         my @foo = ($node->{txt} =~ m/cost\=.*\s+rows=(\d+)\s+width\=.*/);

         if (scalar(@foo))
         {
             use POSIX;

             my $esti = $foo[0];

             $esti = human_num($esti);

             unless (exists($node->{rows_out}) &&
                     length($node->{rows_out}))
             {
                 $node->{rows_out} = "";
             }

             $node->{rows_out} .= " (est $esti)";
         }
     }

 } # end get_rows_out

 sub calc_color_rank
 {
     my $ctx = shift;

     return
         unless (defined($glob_statcolor));

     if ($ctx->{time_stats_h}->{cnt} > 1)
     {
         # population variance =
         # (sum of the squares)/n - (square of the sums)/n*n
         my $sum   = $ctx->{time_stats_h}->{sum};
         my $sumsq = $ctx->{time_stats_h}->{sumsq};
         my $enn   = $ctx->{time_stats_h}->{cnt};

         my $pop_var = ($sumsq/$enn) - (($sum*$sum)/($enn*$enn));
         my $std_dev = sqrt($pop_var);
         my $mean    = $sum/$enn;
         my $half    = $std_dev/2;

         # calculate a stanine (9 buckets, each 1/2 of stddev).  The
         # middle bucket (5, which is 4 if we start at zero) is
         # centered on the mean, so it starts on mean - (1/4 stddev),
         # and ends at mean + (1/4 stddev).
         my @bucket;
         my $buckstart = ($mean-($half/2))-(3*$half);

         push @bucket, 0;

         for my $ii (1..7)
         {
             push @bucket, $buckstart;
             $buckstart += $half;
         }
         push @bucket, 2**40; # "infinity"

         my @tlist = sort {$a <=> $b} (keys %{$ctx->{time_stats_h}->{tt_h}});

         # must have at least two
         my $firstt = shift @tlist;
         my $lastt  = pop @tlist;
 #        print "f,l: $firstt, $lastt\n";

         for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$firstt}})
         {
 #            print "first ", $nod->{id}, ": ", $nod->{short}, " - ", 0, "\n";
             $nod->{color_rank} = 10;
         }
         for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$lastt}})
         {
 #            print "last ", $nod->{id}, ": ", $nod->{short}, " - ", 10, "\n";
             $nod->{color_rank} = 1;
         }

 #        print "bucket: ", Data::Dumper->Dump(\@bucket);
 #        print "tlist: ", Data::Dumper->Dump(\@tlist);
 #        print Data::Dumper->Dump([$ctx->{time_stats_h}]);

         my $bucknum = 1;
         for my $tt (@tlist)
         {
 #            print "tt: $tt\n";
 #            print "bk: $bucket[$bucknum]\n";

             while ($tt > $bucket[$bucknum])
             {
 #                print "$tt > $bucket[$bucknum]\n";
 #                last if ($bucknum >= 11);
                 $bucknum++;
             }
             for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$tt}})
             {
 #                print "node ", $nod->{id}, ": ", $nod->{short}, " - ", $bucknum, "\n";
 #                $nod->{color_rank} = ($bucknum-1);
                 $nod->{color_rank} = (10 - $bucknum);
             }
         }

     }

 }

 sub dodotfile
 {
     my ($plantree, $time_list, $plan_num, $parse_ctx, $direction) = @_;

     {
         my $map_expr = 'addslice($node, $ctx); ';

         treeMap($plantree, $map_expr, undef, $parse_ctx);
     }


 #    $map_expr = 'propslice($node, $ctx);';
     my $ctx = {level => 0, a1 => [],
                share_input_h => {}, multi_slice_h => {},
                time_stats_h => { cnt=>0, sum=>0, sumsq=>0, tt_h => {} }  };

 #    my $map_expr = 'print "foo\n"';
     treeMap($plantree,
             'pre_slice($node, $ctx); ',
             'post_slice($node, $ctx); ',
             $ctx);

     calc_color_rank($ctx);

     treeMap($plantree,
             'sharedscan_fixup($node, $ctx); ',
             undef,
             $ctx);

     # always label the left/right sides of nested loop
     treeMap($plantree,
             'nestedloop_fixup($node, $ctx); ',
             undef,
             $ctx);

     if (defined($glob_edge) && length($glob_edge))
     {
         treeMap($plantree,
                 'get_rows_out($node, $ctx, $glob_edge); ',
                 undef,
                 $ctx);
     }

     my $dotimeline = $glob_timeline;

     makedotfile($plantree, $time_list, $dotimeline, $plan_num, $parse_ctx,
                 $direction);
 }


 sub dotkid
 {
     my $node = shift;

     # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes
     return
         if (exists($node->{MultiSliceMotionDuplicate}));

     # XXX XXX: olap fixup - have children of primary sharedscan
     # point to this node
     if (exists($node->{SharedScanDuplicate}))
     {
         for my $kid (@{$node->{SharedScanDuplicate}->{child}})
         {
             print '"' . $kid->{id} . '" -> "' . $node->{id} . '"' . ";\n";
         }
     }

     my $docrunch = 2;

     if (exists($node->{child}))
     {
         if (($docrunch != 0 ) && (scalar(@{$node->{child}} > 10)))
         {
             my $maxi = scalar(@{$node->{child}});

             $maxi -= 2;

             for my $ii (2..$maxi)
             {
                 $node->{child}->[$ii]->{crunchme} = 1;
             }

             if ($docrunch == 2)
             {
                 splice(@{$node->{child}}, 3, ($maxi-2));

                 $node->{child}->[2]->{short} = "... removed " . ($maxi - 3) . " nodes ...";
             }


         }

         for my $kid (@{$node->{child}})
         {
             my $edge_label = "";

             print '"' . $kid->{id} . '" -> "' . $node->{id} . '"';

             if (exists($kid->{nested_loop_position}))
             {
                 $edge_label .= $kid->{nested_loop_position};
             }

             if (exists($kid->{rows_out}))
             {
                 $edge_label .= " ";
                 $edge_label .= " "
                     if (length($edge_label));
                 $edge_label .= $kid->{rows_out};
             }

             if (length($edge_label))
             {
                 print ' [label="' . $edge_label . '" ] ';
             }

             print ";\n";
         }

         for my $kid (@{$node->{child}})
         {
             dotkid($kid);
         }

     }

 }

 sub dotlabel_detail
 {
     my $node = shift;

 #    return     $node->{short} ;

     my $outi = $node->{short};

     my ($frst, $last) = (" ", " ");

     if (exists($node->{to_end}))
     {
         $last = "end: " . $node->{to_end};
     }
     if (exists($node->{to_first}))
     {
         $frst = "first row: " . $node->{to_first};
     }

     my $slice = $node->{slice};
     $slice = " "
         unless (defined($slice));


     if ((length($frst) > 1) || (length($last) > 1))
     {
         my $memstuff = "";

         # add memory statistics if have them...
         if (exists($node->{memory}))
         {
             $memstuff = " | { {" . $node->{memory} . "} } ";
             # make multiline - split on comma and "Work_mem"
             # (using the vertical bar formatting character)
             $memstuff =~ s/\,/\,\| /gm;
             $memstuff =~ s/Work\_mem/\| Work\_mem/gm;
         }

 #        $outi .= " | { " . join(" | " , $frst, $last) . " } ";
         $outi .= " | { " . join(" | " , $slice, $frst, $last) . " } " . $memstuff;

         # wrapping with braces changes record organization to vertical
         $outi = "{ " . $outi . " } ";
     }


     return $outi;
 }


 sub dotlabel
 {
     my $node = shift;

     # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes
     return
         if (exists($node->{MultiSliceMotionDuplicate}));

     my $colortable = $glob_coltab{$glob_colorscheme};

     my $color = scalar(@{$colortable});
     $color = $node->{slice}  if (exists($node->{slice}));
     $color =~ s/slice//;

     $color = ($color) % (scalar(@{$colortable}));

     # build list of node attributes
     my @attrlist;
     push @attrlist, "shape=record";
 #    push @attrlist, "shape=polygon";
 #    push @attrlist, "peripheries=2";

 #    push @attrlist, 'fontcolor=white';

     push @attrlist, 'label="' . dotlabel_detail($node) .'"';
     push @attrlist, 'style=filled';
 #    push @attrlist, 'style="filled,bold"';
 #    push @attrlist, "color=" . $colortable->[$color];
 #    push @attrlist, "fillcolor=" . $colortable->[$color];

     if (exists($node->{color_rank})) # color by statistical ranking
     {
         my $edgecol = $glob_divcol{rdbu11}->[$node->{color_rank}];
         my $fillcol = $colortable->[$color];

         if (defined($glob_statcolor))
         {
             if ($glob_statcolor =~ m/^t$/i)
             {
                 # show timing color only
                 $fillcol = $edgecol;
             }
             if ($glob_statcolor =~ m/^st/i)
             {
                 # edge is slice color, fill is time stats
                 # invert the selection
                 ($edgecol, $fillcol) = ($fillcol, $edgecol);
             }
         }

         push @attrlist, 'style="filled,setlinewidth(6)"';
         push @attrlist, "color=\"" . $edgecol . '"';

         push @attrlist, "fillcolor=\"" . $fillcol . '"';
     }
     else
     {
         push @attrlist, "color=\"" . $colortable->[$color] . '"';
         push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"';
     }

     if (exists($node->{crunchme}))
     {
         @attrlist = ();

 #    push @attrlist, 'style=filled';
     push @attrlist, 'style=filled';
     push @attrlist, "color=\"" . $colortable->[$color] . '"';
     push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"';
 #    push @attrlist, "shape=circle";
         push @attrlist, "label=\"" . $node->{short} . '"';
 #    push @attrlist, "fontsize=1";
 #    push @attrlist, "height=0.01";
 #    push @attrlist, "width=0.01";
 #    push @attrlist, "height=0.12";
 #    push @attrlist, "width=0.12";

         print '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ;
     }
     else
     {
         print '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ;
     }

     if (exists($node->{child}))
     {
         for my $kid (@{$node->{child}})
         {
             dotlabel($kid);
         }

     }

 }


 sub makedotfile
 {
     my ($plantree, $time_list, $do_timeline, $plan_num, $parse_ctx,
         $direction) = @_;

 #    print "\n\ndigraph plan1 { ranksep=.75; size = \"7.5,7.5\";\n\n \n";
     print "\n\ndigraph plan$plan_num {  \n";

 #    print "graph [bgcolor=black];\n edge [style=bold, color=white];\n";
 #    print "graph [bgcolor=black];\n edge [style=dashed, color=white];\n";
 #    print "graph [bgcolor=black];\n edge [style=dotted, color=white];\n";

     if ($do_timeline && scalar(@{$time_list}))
     {
         print "   ranksep=.75; size = \"7.5,7.5\";\n\n \n";
         print "   {\n node [shape=plaintext, fontsize=16];\n";
         print "/* the time-line graph */\n";

         print join(' -> ', @{$time_list} ), ";\n";
         print "}\n";

         print "node [shape=box];\n";

         while ( my ($kk, $vv) = each(%{$parse_ctx->{h_to_startoff}}))
         {
             print '{ rank = same; ' . $kk . '; ' . join("; ", @{$vv}) . "; }\n";
         }

     }

     print "rankdir=$direction;\n";

     dotkid($plantree);

     dotlabel($plantree);

     print "\n}\n";

 }


 exit();