src/test/regress/explain.pm - cloudberry - Git at Google

 # This is the workhorse of explain.pl, extracted into a module so that it
 # can be called more efficiently from other perl programs.
 package explain;

 use Data::Dumper;
 use strict;
 use warnings;

 use File::Temp;
 use IO::File;

 # IMPLEMENTATION NOTES:
 #
 # EXPLAIN ANALYZE final statistics in analyze_node:
 #
 # The final statistics look like this:
 #
 #  Slice statistics:
 #    (slice0)    Executor memory: 472K bytes.
 #    (slice1)    Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0).
 #  Settings:
 #
 #  Total runtime: 52347.493 ms
 #
 # The "Settings" entry is optional (ie, it only exists if you change the
 # settings in your session).  If the "Settings" entry is missing explain.pl
 # adds a dummy entry to the statistics.  This technique is a bit easier
 # than changing the parser to handle both cases.
 #
 # Parse_node:
 #   InitPlan entries in greenplum are in separate slices sometimes, so
 #   explain.pl prefixes them with an arrow (and adds a fake cost) to make them
 #   look like a top-level execution node.  Again, this technique was easier
 #   than modifying the parser to special case InitPlan.
 #
 #  Plan parsing in general:
 #  The original code only dealt with the TPCH formatted output:
 # |QUERY PLAN|
 # |Gather Motion 64:1  (slice2)  (cost=5990545.19..5990545.21 rows=6 width=51)|
 # |  recv:  Total 4 rows with 1294937 ms to end.|
 # |  Merge Key: partial_aggr.junk_attr_1, partial_aggr.junk_attr_2|
 # |  ->  Sort  (cost=5990545.19..5990545.21 rows=6 width=51)|
 # |        Avg 1.00 rows x 4 workers.  Max 1 rows (seg49) with 1294 ms to end.|
 #
 # It was easier to modify the parser to wrap the input with missing bars
 # than handle two cases (are you sensing a pattern here?).
 #
 # "Magic" Mode:
 #    This mode just adds an output filename option and constructs jpgs
 #
 # Output File Name:
 #  File to write the output to. Defaults to STDOUT.
 #
 # treemap:
 #  This routine applies a function over the entire parse tree
 #
 # OLAP fixups:
 #  OLAP queries have duplicate Shared Scan and Multi Slice Motion nodes.
 #  explain.pl only fixes them up for dot output, but not for yaml, perl, etc.
 #  The rationale is that dot handle digraphs nicely, but yaml and perl are
 #  more suitable for tree output.
 #

 my $outfh; # current output file handle
 my $outfh_need_close = 0;

 my $glob_optn;
 my $glob_qlist;
 my $glob_direction;
 my $glob_timeline;
 my $glob_prune;
 my $glob_outi;
 my $glob_outfh;
 my $glob_in_fh;
 my @glob_in_lines;
 my $glob_statcolor;
 my $glob_edge;

 my $GV_formats; # graphviz output formats

 my %glob_coltab;

 my %glob_divcol;

 my $glob_colorscheme;
 BEGIN {

     $GV_formats = '^(jpg|bmp|ps|pdf|png)$';

     # table of valid "qualitative" color schemes

  %glob_coltab = (set312 => [
                               '#8DD3C7',
                               '#FFFFB3',
                               '#BEBADA',
                               '#FB8072',
                               '#80B1D3',
                               '#FDB462',
                               '#B3DE69',
                               '#FCCDE5',
                               '#D9D9D9',
                               '#BC80BD',
                               '#CCEBC5',
                               '#FFED6F'
                               ],
                    paired12 => [
                                 '#a6cee3',
                                 '#1f78b4',
                                 '#b2df8a',
                                 '#33a02c',
                                 '#fb9a99',
                                 '#e31a1c',
                                 '#fdbf6f',
                                 '#ff7f00',
                                 '#cab2d6',
                                 '#6a3d9a',
                                 '#ffff99',
                                 '#b15928'
                                 ],
                    pastel19 => [
                                 '#fbb4ae',
                                 '#b3cde3',
                                 '#ccebc5',
                                 '#decbe4',
                                 '#fed9a6',
                                 '#ffffcc',
                                 '#e5d8bd',
                                 '#fddaec',
                                 '#f2f2f2'
                                 ],
                    pastel24 => [
                                 '#b3e2cd',
                                 '#fdcdac',
                                 '#cbd5e8',
                                 '#f4cae4',
                                 '#e6f5c9',
                                 '#fff2ae',
                                 '#f1e2cc',
                                 '#cccccc'
                                 ],
                    set19 => [
                              '#e41a1c',
                              '#377eb8',
                              '#4daf4a',
                              '#984ea3',
                              '#ff7f00',
                              '#ffff33',
                              '#a65628',
                              '#f781bf',
                              '#999999'
                              ],
                    set28 => [
                              '#66c2a5',
                              '#fc8d62',
                              '#8da0cb',
                              '#e78ac3',
                              '#a6d854',
                              '#ffd92f',
                              '#e5c494',
                              '#b3b3b3'
                              ],
                    original => [
                                 'azure',
                                 'cornsilk',
                                 'lavender',
                                 'mintcream',
                                 'mistyrose',
                                 'lightgray',
                                 'salmon',
                                 'goldenrod',
                                 'cyan'
                                 ],
                    );

     # diverging color schemes

     %glob_divcol = (
                  rdbu11 => [
                             '#67001f',
                             '#b2182b',
                             '#d6604d',
                             '#f4a582',
                             '#fddac7',
                             '#f6f6f6',
                             '#d1e5f0',
                             '#92c5de',
                             '#4393c3',
                             '#2166ac',
                             '#053061'
                             ],
                     );

 }

 sub qlist_fixup
 {
     my $qlist = shift;

     my @outi;

     for my $qnum (@{$qlist})
     {
         if ($qnum =~ m/^\d+$/)
         {
             push @outi, $qnum;
         }
         else
         {
             if ($qnum =~ m/^\d+\-\d+$/)
             {
                 my $expr = $qnum;
                 $expr =~ s/\-/\.\./;

                 eval "for my \$val ($expr) { push \@outi, \$val; }";
             }
             else
             {
                 die("Invalid format for querylist: \'$qnum\'\n");
                 exit(1);
             }

         }

     }

     return \@outi;

 }


 # dump a nice graph listing all of the color schemes (neato is preferred)
 sub dodumpcolor
 {
     my $coltab = shift;
     my $fh = shift;

     my @ggg = sort(keys(%{$coltab}));

     # centered, with lines (not arrows)
     print $fh "digraph plan1 {  graph [center=\"root\",root=\"root\"] ;\n edge [dir=none]\n";

     # adjust the lengths to avoid overlap
     for my $ii (0..(scalar(@ggg)-1))
     {
         print $fh '"root" -> "' . $ii. '"' . " [len=2];\n";

         my $jj = 0;

         for my $cc (@{$coltab->{$ggg[$ii]}})
         {
             print $fh '"' . $ii. '" -> "' . $ii. "." . $jj. '"'
                 . " [len=1];\n";
             $jj++;
         }
     }

     print $fh '"root" [label="color schemes"]' . ";\n";

     for my $ii (0..(scalar(@ggg)-1))
     {
         print $fh '"' . $ii . '" [label="' . $ggg[$ii] . '"]' . ";\n";

         my $jj = 0;

         for my $cc (@{$coltab->{$ggg[$ii]}})
         {
             print $fh '"' . $ii . "." . $jj .
                 '" [label="", style=filled, ' .
                 'fillcolor="' . $cc . '"]' . ";\n";
             $jj++;
         }
     }
     print $fh "\n}\n";

 }

 # Input can be passed in as an array of lines, or as a file handle.
 sub explain_init
 {
     my %args = (
         # defaults
         QUERY_LIST => [],
         OPERATION => 'YAML',
         INPUT_FH => undef,
         INPUT_LINES => undef,
         OUTPUT => undef,
         OUTPUT_FH => undef,
         DIRECTION => 'BT',
         COLOR_SCHEME => 'set28',
         TIMELINE => '',
         PRUNE => undef,
         STATCOLOR => undef,
         EDGE_SCHEME => undef,

         # override the defaults from argument list
         @_
     );

     my @qlst = @{$args{QUERY_LIST}};

     $glob_optn = $args{OPERATION};
     $glob_optn = "jpg" if ($glob_optn =~ m/^jpeg/i);

     $glob_outi      = $args{OUTPUT};
     $glob_outfh     = $args{OUTPUT_FH};

     if (defined($args{INPUT_FH}))
     {
     $glob_in_fh = $args{INPUT_FH};
     } elsif(defined($args{INPUT_LINES}))
     {
     @glob_in_lines = @{$args{INPUT_LINES}};
     }
     else
     {
     die "INPUT_FH or INPUT_LINES argument must be given";
     }

     my $DEFAULT_COLOR = "set28";
     my $colorscheme = $args{COLOR_SCHEME};

     $glob_timeline  = $args{TIMELINE};
     $glob_prune     = $args{PRUNE};
     $glob_statcolor = $args{STATCOLOR};

     $glob_edge  = $args{EDGE_SCHEME};

     my $dir  = $args{DIRECTION};
     if ($dir !~ m/^(TB|BT|LR|RL)$/i)
     {
         $glob_direction = "BT";
     }
     else
     {
         $glob_direction = uc($dir);
     }

     $colorscheme = lc($colorscheme);

 #    print "color: $colorscheme\n";

     if (exists($glob_coltab{$colorscheme}))
     {
         $glob_colorscheme = $colorscheme;
     }
     else
     {
         if ($colorscheme =~ m/list|dump/i)
         {
             my ($tmpfh, $tmpnam) = tempfile();

             # write to a temporary file
             dodumpcolor(\%glob_coltab, $tmpfh);

             close $tmpfh;

             my $catcmd = "cat $tmpnam";

             # format with neato if jpg was specified
             if ($glob_optn =~ m/$GV_formats/i)
             {
                 my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/neato";

                 if ($^O !~ m/darwin/)
                 {
                     $dotapp = `which neato`;
                     chomp($dotapp);
                 }
                 if (defined($dotapp) && length($dotapp) && (-e $dotapp))
                 {
                     $catcmd .= " | $dotapp -T$glob_optn";
                 }
             }

             system($catcmd);

             unlink $tmpnam;

             exit(0);
         }
         else
         {
             my $colorschemelist = join("\n", sort(keys(%glob_coltab))) . "\n";

             # identify the default color
             $colorschemelist =~
                 s/$DEFAULT_COLOR/$DEFAULT_COLOR  \(default\)/gm;

             print "\nvalid color schemes are:\n";
             print $colorschemelist;
             print "\nUse: \"explain.pl -color dump -opt jpg > graph.jpg\"\n";
             print "to construct a JPEG showing all the valid color schemes.\n";
             print "\nColors from www.ColorBrewer.org by Cynthia A. Brewer, Geography,\nPennsylvania State University.\n\n";

             exit(0);
         }
     }

     @qlst = split(/,/,join(',', @qlst));

     $glob_qlist = qlist_fixup(\@qlst);

 #    print "loading...\n" ;
 }

 sub analyze_node
 {
     my ($node, $parse_ctx) = @_;

     if (defined($node) && exists($node->{txt}))
     {

         # gather analyze statistics if it exists in this node...
         if ($node->{txt} =~
             m/Slice\s+statistics.*(Settings.*)*Total\s+runtime/s)
         {
             my $t1 = $node->{txt};

             # NOTE: the final statistics look something like this:

             # Slice statistics:
             #   (slice0)    Executor memory: 472K bytes.
             #   (slice1)    Executor memory: 464K bytes avg x 2 workers, 464K bytes max (seg0).
             # Settings:
             # Total runtime: 52347.493 ms

             # (we've actually added some vertical bars so it might look
             # like this):
             # || Slice statistics:
             # ||   (slice0)    Executor memory: 472K bytes.

             # NB: the "Settings" entry is optional, so
             # add Settings if they are missing
             unless ($t1 =~
                     m/Slice\s+statistics.*Settings.*Total\s+runtime/s)
             {
                 $t1 =~
                     s/\n.*\s+Total\s+runtime/\n Settings\:   \n Total runtime/;
             }

             my @foo = ($t1 =~ m/Slice\s+statistics\:\s+(.*)\s+Settings\:\s+(.*)\s+Total\s+runtime:\s+(.*)\s+ms/s);

             if (scalar(@foo) == 3)
             {
                 my $mem  = shift @foo;
                 my $sett = shift @foo;
                 my $runt = shift @foo;

                 $mem  =~ s/\|\|//gm; # remove '||'...
                 $sett =~ s/\|\|//gm;

                 my $statstuff = {};

                 my @baz = split(/\n/, $mem);
                 my $sliceh = {};
                 for my $elt (@baz)
                 {
                     my @ztesch = ($elt =~ m/(slice\d+)/);
                     next unless (scalar(@ztesch));
                     $elt =~ s/\s*\(slice\d+\)\s*//;
                     my $val = shift @ztesch;
                     $sliceh->{$val} = $elt;
                 }

                 $statstuff->{memory}   = $sliceh;
                 $statstuff->{settings} = $sett;
                 $statstuff->{runtime}  = $runt;
                 $parse_ctx->{explain_analyze_stats} = $statstuff;
                 $node->{statistics} = $statstuff;
             }
         }

         my @short = $node->{txt} =~ m/\-\>\s*(.*)\s*\(cost\=/;
         $node->{short} = shift @short;

         unless(exists($node->{id}))
         {
             print $outfh Data::Dumper->Dump([$node]), "\n";
         }

         if ($node->{id} == 1)
         {
             @short = $node->{txt} =~ m/^\s*\|\s*(.*)\s*\(cost\=/;
             $node->{short} = shift @short;

             # handle case where dashed line might have wrapped...
             unless (defined($node->{short}) && length($node->{short}))
             {
                 # might not be first line...
                 @short = $node->{txt} =~ m/\s*\|\s*(.*)\s*\(cost\=/;
                 $node->{short} = shift @short;
             }


         }

         # handle case of "cost-free" txt (including a double ||
         # and not first line, or screwed-up parse of short as a single bar
         #
         # example: weird initplan like:
         # ||                       ->  InitPlan  (slice49)
         if (defined($node->{short}) && length($node->{short})
             && ($node->{short} =~ m/\s*\|\s*/))
         {
             $node->{short} = "";
         }

         unless (defined($node->{short}) && length($node->{short}))
         {
             @short = $node->{txt} =~ m/\s*\|(\|)?\s*(\w*)\s*/;
             $node->{short} = shift @short;

             if (defined($node->{short}) && length($node->{short})
                 && ($node->{short} =~ m/\s*\|\s*/))
             {
                 $node->{short} = "";
             }

             # last try!!
             unless (defined($node->{short}) && length($node->{short}))
             {
                 my $foo = $node->{txt};
                 $foo =~ s/\-\>//gm;
                 $foo =~ s/\|//gm;
                 $foo =~ s/^\s+//gm;
                 $foo =~ s/\s+$//gm;
                 $node->{short} = $foo;
             }

 #            print "long: $node->{txt}\n";
 #            print "short: $node->{short}\n";
         }

         $node->{short} =~ s/\s*$//;

         # remove quotes which mess up dot file
         $node->{short} =~ s/\"//gm;

 #            print "long: $node->{txt}\n";
 #            print "short: $node->{short}\n";

         # XXX XXX XXX XXX: FINAL "short" fixups
         while (defined($node->{short}) && length($node->{short})
                && ($node->{short} =~ m/(\n)|^\s+|\s+$|(\(cost\=)/m))
         {
             # remove leading and trailing spaces...
             $node->{short} =~ s/^\s*//;
             $node->{short} =~ s/\s*$//;

             # remove newlines
             $node->{short} =~ s/(\n).*//gm;

             # remove cost=...
             $node->{short} =~ s/\(cost\=.*//gm;

 #            print "short fixup: $node->{short}\n\n\n";
         }

         if (exists($node->{child}))
         {
             delete $node->{child}
             unless (defined($node->{child}) && scalar(@{$node->{child}}));
         }
     }
 }

 sub parse_node
 {

     my ($ref_id, $parse_ctx, $depth, $plan_rows, $parent) = @_;

 #    print "depth: $depth\n";
 #    print "row: ",$plan_rows->[0],"\n"      if (scalar(@{$plan_rows}));

 #    print "first: $first\n" if defined ($first);

     my $spclen = undef;
     my $node = undef;

     my $no_more_text = 0;

     while (scalar(@{$plan_rows}))
     {
         my $row = $plan_rows->[0];

         unless (defined($node))
         {
             $node = {};

             $node->{child} = [];

             $node->{txt} = "";

             $node->{parent} = $parent
                 if (defined($parent));

             my $id = $$ref_id;
             $id++;
             $$ref_id= $id;
             $node->{id} = $id;
         }

         # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX
         # make initplan into a fake node so the graphs look nicer (eg
         # tpch query 15).  Prefix it with an arrow and add a fake cost.
         if ($row =~ m/\|(\s)*InitPlan/)
         {
             $row =~ s/InitPlan/\-\>  InitPlan/;
             if ($row !~ m/\(cost=/)
             {
                 $row =~ s/\|$/\(cost=\?\)|/;
             }
         }

         if ($row !~ m/\|(\s)*\-\>/)
         {
             # add text to existing node

             if ($no_more_text)
             {
                 print "error: no more text for ". $node->{id}, "\n";
             }

             $node->{txt} .= "\n" . $row;

 #            print "txt: $node->{txt}\n";

             shift @{$plan_rows};
             next;
         }
         else
         {
             # new node
             unless ($no_more_text)
             {
                 unless (length($node->{txt}))
                 {
                     $node->{txt} .= $row;
                     shift @{$plan_rows};
                     next;
                 }
             }

             # match the leading spaces before the '->', eg:
             # "|  ->  Sort  (cost=5990545.19..599..."

             my @spc = ($row =~ m/\|(\s*)\-\>/);

 #            print "space match:", Data::Dumper->Dump(\@spc), "\n";

             $spclen = scalar(@spc) ?  length($spc[0]) : 0;

 #            print "space len: $spclen, depth: $depth\n";

             if ($spclen > $depth)
             {
                 # found a child
                 push @{$node->{child}}, parse_node($ref_id, $parse_ctx,
                                                    $spclen, $plan_rows,
                                                    $node->{id});
             }

         }

         if (defined($spclen))
         {
             if ($spclen <= $depth)
             { # found a sibling or parent
                 # need to put the row back on the head of the list

                 if (defined($node) && exists($node->{txt}))
                 {
                     analyze_node($node, $parse_ctx);

                     return $node;
                 }
             }
         }
         else
         {
             die ("what the heck?");
         }

         $spclen = undef;
         $no_more_text = 1;

     } # end while

     if (defined($node))
     {
         analyze_node($node, $parse_ctx);
     }

     return $node;

 }


 sub run
 {
     my @bigarr;

     my $state = "INIT";

     my $pair = undef;

     my ($query, $plan);

     my $tpch_format=0;
     my $bigdash = '-' x 40; # make big dash smaller

     my $magic;

     my $lineno = 0;

     while(1)
     {
         my $ini;

         if ($glob_in_fh)
         {
             $ini = <$glob_in_fh>;
             last if (!defined($ini)); # EOF
         }
         else
         {
             last if (scalar(@glob_in_lines) == 0 || $lineno == $#glob_in_lines);
             $ini = $glob_in_lines[$lineno];
         }
         $lineno++;

         if ($state =~ m/INIT/)
         {
             if ($ini !~ m/(^EXPLAIN ANALYZE)|(QUERY PLAN)/)
             {
                 next;
             }

             $query = "";
             $plan  = "";
             $pair = {};

             if ($ini =~ m/^EXPLAIN ANALYZE/)
             {
                 $tpch_format = 1;
                 $state = "GETQUERY";
                 next;
             }

             if ($ini =~ m/QUERY PLAN/)
             {
                 $tpch_format = 0;
                 $plan  = "";
                 $state = "GETPLAN";
                 next;
             }

         }

         if ($state !~ m/GETPLAN/)
         {
             # should be START or GETQUERY only...
             if ($tpch_format)
             {
                 if ($ini =~ m/^EXPLAIN ANALYZE/)
                 {
                     if (defined($pair))
                     {
                         $pair->{plan} = $plan;
                         $pair->{query} = $query;
                         push @bigarr, $pair;
                     }
                     $pair = {};
                     $query = "";
                     $plan  = "";
                     $state = "GETQUERY";
                     next;
                 }
             }
             else
             {
                 # not tpch analyze
                 if ($ini =~ m/QUERY PLAN/)
                 {
                     if (defined($pair))
                     {
                         $pair->{plan} = $plan;
                         $pair->{query} = $query;
                         push @bigarr, $pair;
                     }
                     $pair = {};
                     $query = "";
                     $plan  = "";
                     $state = "GETPLAN";
                     next;
                 }

             }
             if ($state =~ m/GETQUERY/)
             {
                 if ($ini =~ m/QUERY PLAN/)
                 {
                     if (!($tpch_format))
                     {
                         if (defined($pair))
                         {
                             $pair->{plan} = $plan;
                             $pair->{query} = $query;
                             push @bigarr, $pair;
                         }
                         $pair = {};
                         $query = "";
                     }

                     $plan  = "";
                     $state = "GETPLAN";
                     next;
                 }

                 $query .= $ini;
             }

         } # end not getplan

         if ($state =~ m/GETPLAN/)
         {

             if ($tpch_format)
             {
                 if ($ini !~ m/\|(.*)\|/)
                 {
                     $state = "START";
                     next;
                 }
             }
             else
             {
                 if ($ini =~ m/(\(\d+\s+rows\))|(Time\s+was.*seconds\.\s+Query\s+ended)/)
                 {
                     $state = "START";
                     next;
                 }
             }
             # a bit weird here -- just ignore the separator.  But
             # maybe we should invest some effort to determine that the
             # separator is the next line after the header (and only
             # ignore it once) ?
             next
                 if ($ini =~ m/$bigdash/);

             # add the missing bars
             if (!($tpch_format))
             {
                 if ($ini !~ m/\|(.*)\|/)
                 {
                     $ini = '|' . $ini . '|';
                 }
             }

             $plan .= $ini;
         }

     } # end big for
     if (defined($pair))
     {
         $pair->{plan} = $plan;
         $pair->{query} = $query;
         push @bigarr, $pair;
     }

 #print scalar(@bigarr), "\n";


 #print Data::Dumper->Dump(\@bigarr);

 #print $bigarr[0]->{plan};

     unless(scalar(@{$glob_qlist}))
     {
         # build a 1-based list of queries
         for (my $ii =1; $ii <= scalar(@bigarr); $ii++)
         {
             push @{$glob_qlist}, $ii;
         }
     }

     for my $qqq (@{$glob_qlist})
     {
         my $qnum = $qqq - 1; # 0 based vs 1 based

         if ($qnum >  scalar(@bigarr))
         {
             warn("specified query $qqq is out-of-range -- skipping...\n");
             next;
         }

         if ($glob_optn =~ m/query|text|txt/i)
         {
             doquery($bigarr[$qnum]->{query});
             next;
         }

         my $plantxt = $bigarr[$qnum]->{plan};

         unless (defined($plantxt) && length($plantxt))
         {
             warn("invalid plan for query $qqq -- skipping...\n");
             next;
         }

 #print $plantxt, "\n";

         my @plan_r = split(/\n/, $plantxt);

         my $pr = \@plan_r;

         my $parse_ctx = {};

         my $id = 0;

         $parse_ctx->{alltimes} = {};

         $parse_ctx->{explain_analyze_stats} = {};

         my $plantree = parse_node(\$id, $parse_ctx, 0, $pr);

         if (defined($glob_prune))
         {
             if ($glob_prune =~ m/stat|heavy|heavily/i)
             {
                 my $map_expr = 'delete $node->{to_end};';
                 treeMap($plantree, undef, $map_expr);
                 $map_expr = 'delete $node->{to_first};';
                 treeMap($plantree, undef, $map_expr);

                 # additional statistics
                 $map_expr = 'delete $node->{total_time};';
                 treeMap($plantree, undef, $map_expr);
                 $map_expr = 'delete $node->{statistics};';
                 treeMap($plantree, undef, $map_expr);
             }
             if ($glob_prune =~ m/heavy|heavily/i)
             {
                 treeMap($plantree, 'prune_heavily($node);');
             }
         }


         # magic mode : display everything magically
         #
         # NOTE: only set to magic on the first iteration, then reset
         # to jpg, so performs correctly with multiple queries
         if ($glob_optn =~ m/magic/i)
         {
             $glob_optn = "jpg";

             my ($tmpfh, $tmpnam) = tempfile();

             # create a temporary directory name -- just append ".dir"
             # to the new tempfile name and mkdir
             my $tmpdir = $tmpnam . ".dir";

             mkdir($tmpdir) or            die "magic failed" ;

             unlink $tmpnam;  # we didn't need this tempfile anyhow

             # reset output file name to create files in the new
             # temporary directory
             $glob_outi = File::Spec->catfile($tmpdir, "query_");

             $magic = $glob_outi;
         }

         if ($glob_outi)
         {
             if ($outfh_need_close)
             {
                 close $outfh;
                 $outfh_need_close = 0;
             }

             my $outfilename = $glob_outi;

             # only need numbering if processed more than one query
             my $neednum = (scalar(@bigarr) > 1);

             # check if name has an extension like ".foo"
             if ($outfilename =~ m/\.(.){1,5}$/)
             {
                 # qqq is query num (1 based)

                 my $formatq = sprintf("%03d", $qqq);

                 $outfilename =~ s/\.(.*)$/$formatq\.$1/
                     if ($neednum);
             }
             else
             {
                 # qqq is query num (1 based)
                 my $formatq = sprintf("%03d", $qqq);

                 $outfilename .= $formatq
                     if ($neednum);
                 if ($glob_optn =~ m/yaml/i)
                 {
                     $outfilename .= ".yml";
                 }
                 if ($glob_optn =~ m/json/i)
                 {
                     $outfilename .= ".json";
                 }
                 if ($glob_optn =~ m/perl|dump/i)
                 {
                     $outfilename .= ".perl";
                 }
                 if ($glob_optn =~ m/dot|graph/i)
                 {
                     $outfilename .= ".dot";
                 }
                 if ($glob_optn =~ m/$GV_formats/i)
                 {
                     $outfilename .= ".$glob_optn";
                 }
             }

             open ($outfh, ">$outfilename" ) or die "can't open file $outfilename: $!";
             $outfh_need_close = 1;
 #            print $outfilename, "\n";
         }
         elsif ($glob_outfh)
         {
             $outfh = $glob_outfh;
         }
         else
         {
             $outfh = *STDOUT;
         }

         if ($glob_optn =~ m/yaml/i)
         {
             doyaml($plantree);
         }
         if ($glob_optn =~ m/json/i)
         {
             doyaml($plantree, "json");
         }
         if ($glob_optn =~ m/perl|dump/i)
         {
             doDataDump($plantree);
         }
         if ($glob_optn =~ m/operator/i)
         {
             doOperatorDump($plantree);
         }

         if ($glob_optn =~ m/$GV_formats/i)
         {
             my $dotapp = "/Applications/Graphviz.app/Contents/MacOS/dot";

             if ($^O !~ m/darwin/)
             {
                 $dotapp = `which dot`;
                 chomp($dotapp);
             }
             die "could not find dot app: $dotapp"
                 unless (defined($dotapp) && length($dotapp) && (-e $dotapp));

             # should have been able to redirect STDOUT thru a pipe
             # directly to dotapp, but didn't work.  Use a tmpfile
             # instead.

             my ($tmpfh, $tmpnam) = tempfile();
             open my $oldout, ">&STDOUT"     or die "Can't dup STDOUT: $!";

             close STDOUT;
             open (STDOUT, ">$tmpnam" ) or die "can't open STDOUT: $!";

             select STDOUT; $| = 1;      # make unbuffered

             close STDOUT;
             open STDOUT, ">&", $oldout or die "Can't dup \$oldout: $!";

             system("cat $tmpnam | $dotapp -T$glob_optn");

             unlink $tmpnam;


         }
     } #end for querynum

     if ($outfh_need_close)
     {
         close $outfh;
         $outfh_need_close = 0;
     }

     # magically display all files
     if (defined($magic))
     {
         # only need numbering if processed more than one query
         my $neednum = (scalar(@{$glob_qlist}) > 1);

         if ($^O =~ m/darwin/)
         {
             # use ImageMagick montage
             my $ggg = $magic . '*';
             my $montage = `which montage`;
             chomp($montage);

             # only perform a montage if more than one query
             if ($neednum && defined($montage) && ( -e $montage))
             {
                 my $dir = $magic;
                 # get the directory name (remove "query_" prefix)
                 $dir =~ s/query_$//;
                 system("cd $dir; montage -label \'%f\' $ggg -title \"$dir\n`date`\" -shadow INDEX.html; open INDEX.html");
             }
             else
             {
                 system("open $ggg");
             }
         }
     }

 }
 #print "\nmax id: $id\n\n";

 #

 sub treeMap
 {
     my ($node, $pre_map, $post_map, $ctx) = @_;

     eval "$pre_map"
         if (defined($pre_map));

     if (exists($node->{child}))
     {
         for my $kid (@{$node->{child}})
         {
             treeMap($kid, $pre_map, $post_map, $ctx);
         }
     }
     eval "$post_map"
         if (defined($post_map));
 }

 sub doDataDump
 {
     my $plantree = shift;


     local $Data::Dumper::Indent   = 1;
     local $Data::Dumper::Terse    = 1;
     local $Data::Dumper::Sortkeys = 1;

     my $map_expr = 'delete $node->{txt};';
 #    my $map_expr = 'print "foo\n"';
     treeMap($plantree, undef, $map_expr);

     print $outfh Data::Dumper->Dump([$plantree]);
 }

 sub doOperatorDump
 {
     my $plantree = shift;

     print $outfh $plantree->{short}, "\n" if (exists($plantree->{short}));

     return
         unless (exists($plantree->{child}));

     for my $kid (@{$plantree->{child}})
     {
         doOperatorDump($kid);
     }
 }

 # add slice info to node
 # and gather explain analyze stats
 sub addslice
 {
     my ($node, $ctx) = @_;

     # AUTO-6: find nodes with "(slice1)" info where the slice numbers aren't
     # part of the "Slice statistics"

     my $txt1 = $node->{txt};
     $txt1 =~ s/Slice statistics.*//gs;

     if ($txt1 =~ /(slice(\d+))/)
     {
         my @ggg = ($txt1 =~ m/(slice(\d+))/) ;
         $node->{slice} = shift @ggg;

         # check if we have explain analyze stats for the slice
         if (exists($ctx->{explain_analyze_stats})
             && exists($ctx->{explain_analyze_stats}->{memory})
             && exists($ctx->{explain_analyze_stats}->{memory}->{$node->{slice}}))
         {
             $node->{memory} =
                 $ctx->{explain_analyze_stats}->{memory}->{$node->{slice}};
         }

     }
 }


 sub doquery
 {
     my $qtxt = shift;

     print $outfh $qtxt, "\n";

 }

 sub doyaml
 {
     my ($plantree, $opti) = @_;

     $opti = "yaml" unless (defined($opti));

     if ($opti =~ m/json/i)
     {
         # JSON might not be installed, so test for it.

         if (eval "require JSON")
         {
             my $map_expr = 'delete $node->{txt};';

             treeMap($plantree, undef, $map_expr);

             # because JSON is REQUIREd, not USEd, the symbols are not
             # imported into the environment.
             print $outfh JSON::objToJson($plantree, {pretty => 1, indent => 2});
         }
         else
         {
             die("Fatal Error: The required package JSON is not installed -- please download it from www.cpan.org\n");
             exit(1);
         }

     }
     else
     {
         # YAML might not be installed, so test for it.

         if (eval "require YAML")
         {
             my $map_expr = 'delete $node->{txt};';

             treeMap($plantree, undef, $map_expr);

             # because YAML is REQUIREd, not USEd, the symbols are not
             # imported into the environment.
             print $outfh YAML::Dump($plantree);
         }
         else
         {
             die("Fatal Error: The required package YAML is not installed -- please download it from www.cpan.org\n");
             exit(1);
         }

     }

 }

 # remove slice numbering information to construct even more generic plans
 sub prune_heavily
 {
     my $node = shift;

     return
         unless (exists($node->{short}));

     # example: (slice1; gang3; segments: 3)
     if ($node->{short} =~ m/.*\(slice\d+; gang(\d+);.*\).*/)
     {
         $node->{gangid} = int($1);
     }

     # example: (slice1; segments: 3)
     if ($node->{short} =~ m/.*\(.*segment.*:\s+(\d+).*\).*/)
     {
         $node->{segments} = int($1);
     }

     if ($node->{short} =~ m/Delete\s*\(slice.*segment.*\)\s*\(row.*width.*\)/)
     {
         # QA-1309: fix strange DELETE operator formatting
         $node->{short} = "Delete";
     }
     elsif ($node->{short} =~ m/Update\s*\(slice.*segment.*\)\s*\(row.*width.*\)/)
         {
                 # QA-1309: fix strange UPDATE operator formatting
                 $node->{short} = "Update";
         }
     elsif ($node->{short} =~ m/(\d+)\:(\d+)/)
     {

         # example: Gather Motion 8:1 (slice4);

         $node->{sendsize} = int($1);
         $node->{recvsize} = int($2);

         # strip the number of nodes and slice information
         $node->{short} =~ s/\s+\d+\:\d+.*//;

         # Note: don't worry about removing "(slice1)" info from the
         # "short" because addslice processes node->{text}
     }
 }

 # identify the slice for each node
 # and find Shared Scan "Primary"
 # and find MultiSliceMotion
 sub pre_slice
 {
     my ($node, $ctx) = @_;

     {
         if (scalar(@{$ctx->{a1}}))
         {
             my $parent = $ctx->{a1}->[-1];

             unless (exists($node->{slice}))
             {
                 if (exists($parent->{slice}))
                 {
                     $node->{slice} = $parent->{slice};
                 }
             }
         }

         # olap stuff

         if ($node->{short} =~ m/^Shared Scan/)
         {
             # if the Shared Scan has a child it is the "primary"
             if (exists($node->{child}))
             {
                 my $share_short_fixup = $node->{short};

                 # remove the slice number from the "short"
                 $share_short_fixup =~ s/(\d+)\:/\:/g;

                 if (!exists($ctx->{share_input_h}->{$share_short_fixup}))
                 {
                     $ctx->{share_input_h}->{$share_short_fixup} = $node;
                 }
             }
             else # not the primary, mark as a duplicate node
             {
                 $node->{SharedScanDuplicate} = 1;
             }
         }
         if ($node->{short} =~ m/^Multi Slice Motion/)
         {
             # choose first Multi Slice Motion node as primary
             if (!exists($ctx->{multi_slice_h}->{$node->{short}}))
             {
                 $ctx->{multi_slice_h}->{$node->{short}} = $node;
             }
             else # not the primary, mark as a duplicate node
             {
                 $node->{MultiSliceMotionDuplicate} = 1;
             }
         }

         if (exists($node->{total_time}))
         {
             my $tt = $node->{total_time};
             my $tt2 = $tt * $tt;
             $ctx->{time_stats_h}->{cnt} += 1;
             $ctx->{time_stats_h}->{sum} += $tt;
             $ctx->{time_stats_h}->{sumsq} += $tt2;

             if (exists($ctx->{time_stats_h}->{tt_h}->{$tt}))
             {
                 push @{$ctx->{time_stats_h}->{tt_h}->{$tt}}, $node;
             }
             else
             {
                 $ctx->{time_stats_h}->{tt_h}->{$tt} = [$node];
             }
         }

     }

     push @{$ctx->{a1}}, $node;

 }

 sub post_slice
 {
     my ($node, $ctx) = @_;

     pop @{$ctx->{a1}};

 }

 # make all duplicate sharedscan nodes point back to primary
 sub sharedscan_fixup
 {
     my ($node, $ctx) = @_;

     if (exists($node->{SharedScanDuplicate}))
     {
         my $share_short_fixup = $node->{short};

         # remove the slice number from the "short"
         $share_short_fixup =~ s/(\d+)\:/\:/g;

         $node->{SharedScanDuplicate} =
             $ctx->{share_input_h}->{$share_short_fixup};
 #        $node->{id} =
 #            $node->{SharedScanDuplicate}->{id};
     }

     if (exists($node->{MultiSliceMotionDuplicate}))
     {
         $node->{MultiSliceMotionDuplicate} =
             $ctx->{multi_slice_h}->{$node->{short}};
         # XXX XXX: for this case the node is really the same
         $node->{id} =
             $node->{MultiSliceMotionDuplicate}->{id};
     }

 }

 sub human_num
 {
     my $esti = shift;

     my @suffix = qw(K M G T P E Z Y);
     my $suff = "";

     # try to shorten estimate specification
     while (length(POSIX::ceil($esti)) > 3)
     {
         $suff = shift @suffix;

         $esti = $esti/1000;
     }

     if (length($suff))
     {
         $esti *= 100;
         $esti = POSIX::floor($esti+0.5);
         $esti = $esti/100;

         $esti .= $suff;
     }

     return $esti;
 }

 # label left and right children
 sub label_fixup
 {
     my ($node, $ctx) = @_;

     return
         unless (exists($node->{short}));

     my @kidlist;

     if (exists($node->{child}))
     {
         for my $kid (@{$node->{child}})
         {
             # Ignore InitPlans when deciding inner/outer child
             if ($kid->{txt} !~ /InitPlan/)
             {
                 push @kidlist, $kid;
             }
         }
     }

     my $nkids;
     $nkids = scalar(@kidlist);

     return
         unless ($nkids >= 2);

     # sort kidlist by id for labeling
     my @sortedkidlist = sort { $a->{id} <=> $b->{id} } @kidlist;

     if ($nkids == 2 && $node->{txt} !~ /Append/)
     {
         $sortedkidlist[0]->{label} = "outer";
         $sortedkidlist[1]->{label} = "inner";
     }
     else
     {
         for my $i (0 .. $nkids)
         {
             $sortedkidlist[$i]->{label} = "child$i"
         }
     }
 }

 sub calc_color_rank
 {
     my $ctx = shift;

     return
         unless (defined($glob_statcolor));

     if ($ctx->{time_stats_h}->{cnt} > 1)
     {
         # population variance =
         # (sum of the squares)/n - (square of the sums)/n*n
         my $sum   = $ctx->{time_stats_h}->{sum};
         my $sumsq = $ctx->{time_stats_h}->{sumsq};
         my $enn   = $ctx->{time_stats_h}->{cnt};

         my $pop_var = ($sumsq/$enn) - (($sum*$sum)/($enn*$enn));
         my $std_dev = sqrt($pop_var);
         my $mean    = $sum/$enn;
         my $half    = $std_dev/2;

         # calculate a stanine (9 buckets, each 1/2 of stddev).  The
         # middle bucket (5, which is 4 if we start at zero) is
         # centered on the mean, so it starts on mean - (1/4 stddev),
         # and ends at mean + (1/4 stddev).
         my @bucket;
         my $buckstart = ($mean-($half/2))-(3*$half);

         push @bucket, 0;

         for my $ii (1..7)
         {
             push @bucket, $buckstart;
             $buckstart += $half;
         }
         push @bucket, 2**40; # "infinity"

         my @tlist = sort {$a <=> $b} (keys %{$ctx->{time_stats_h}->{tt_h}});

         # must have at least two
         my $firstt = shift @tlist;
         my $lastt  = pop @tlist;
 #        print "f,l: $firstt, $lastt\n";

         for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$firstt}})
         {
 #            print "first ", $nod->{id}, ": ", $nod->{short}, " - ", 0, "\n";
             $nod->{color_rank} = 10;
         }
         for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$lastt}})
         {
 #            print "last ", $nod->{id}, ": ", $nod->{short}, " - ", 10, "\n";
             $nod->{color_rank} = 1;
         }

 #        print "bucket: ", Data::Dumper->Dump(\@bucket);
 #        print "tlist: ", Data::Dumper->Dump(\@tlist);
 #        print Data::Dumper->Dump([$ctx->{time_stats_h}]);

         my $bucknum = 1;
         for my $tt (@tlist)
         {
 #            print "tt: $tt\n";
 #            print "bk: $bucket[$bucknum]\n";

             while ($tt > $bucket[$bucknum])
             {
 #                print "$tt > $bucket[$bucknum]\n";
 #                last if ($bucknum >= 11);
                 $bucknum++;
             }
             for my $nod (@{$ctx->{time_stats_h}->{tt_h}->{$tt}})
             {
 #                print "node ", $nod->{id}, ": ", $nod->{short}, " - ", $bucknum, "\n";
 #                $nod->{color_rank} = ($bucknum-1);
                 $nod->{color_rank} = (10 - $bucknum);
             }
         }

     }

 }

 sub dodotfile
 {
     my ($plantree, $time_list, $plan_num, $parse_ctx, $direction) = @_;

     {
         my $map_expr = 'addslice($node, $ctx); ';

         treeMap($plantree, $map_expr, undef, $parse_ctx);
     }


 #    $map_expr = 'propslice($node, $ctx);';
     my $ctx = {level => 0, a1 => [],
                share_input_h => {}, multi_slice_h => {},
                time_stats_h => { cnt=>0, sum=>0, sumsq=>0, tt_h => {} }  };

 #    my $map_expr = 'print "foo\n"';
     treeMap($plantree,
             'pre_slice($node, $ctx); ',
             'post_slice($node, $ctx); ',
             $ctx);

     calc_color_rank($ctx);

     treeMap($plantree,
             'sharedscan_fixup($node, $ctx); ',
             undef,
             $ctx);

     # always label the left/right sides
     treeMap($plantree,
             'label_fixup($node, $ctx); ',
             undef,
             $ctx);

     my $dotimeline = $glob_timeline;

     makedotfile($plantree, $time_list, $dotimeline, $plan_num, $parse_ctx,
                 $direction);
 }


 sub dotkid
 {
     my $node = shift;

     # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes
     return
         if (exists($node->{MultiSliceMotionDuplicate}));

     # XXX XXX: olap fixup - have children of primary sharedscan
     # point to this node
     if (exists($node->{SharedScanDuplicate}))
     {
         for my $kid (@{$node->{SharedScanDuplicate}->{child}})
         {
             print $outfh '"' . $kid->{id} . '" -> "' . $node->{id} . '"' . ";\n";
         }
     }

     my $docrunch = 2;

     if (exists($node->{child}))
     {
         if (($docrunch != 0 ) && (scalar(@{$node->{child}} > 10)))
         {
             my $maxi = scalar(@{$node->{child}});

             $maxi -= 2;

             for my $ii (2..$maxi)
             {
                 $node->{child}->[$ii]->{crunchme} = 1;
             }

             if ($docrunch == 2)
             {
                 splice(@{$node->{child}}, 3, ($maxi-2));

                 $node->{child}->[2]->{short} = "... removed " . ($maxi - 3) . " nodes ...";
             }


         }

         for my $kid (@{$node->{child}})
         {
             my $edge_label = "";

             print $outfh '"' . $kid->{id} . '" -> "' . $node->{id} . '"';

             if (exists($kid->{label}))
             {
                 $edge_label .= $kid->{label};
             }

             if (exists($kid->{rows_out}))
             {
                 $edge_label .= " ";
                 $edge_label .= " "
                     if (length($edge_label));
                 $edge_label .= $kid->{rows_out};
             }

             if (length($edge_label))
             {
                 print $outfh ' [label="' . $edge_label . '" ] ';
             }

             print $outfh ";\n";
         }

         for my $kid (@{$node->{child}})
         {
             dotkid($kid);
         }

     }

 }

 sub dotlabel_detail
 {
     my $node = shift;

 #    return     $node->{short} ;

     my $outi = $node->{short};

     my ($frst, $last) = (" ", " ");

     if (exists($node->{to_end}))
     {
         $last = "end: " . $node->{to_end};
     }
     if (exists($node->{to_first}))
     {
         $frst = "first row: " . $node->{to_first};
     }

     my $slice = $node->{slice};
     $slice = " "
         unless (defined($slice));


     if ((length($frst) > 1) || (length($last) > 1))
     {
         my $memstuff = "";

         # add memory statistics if have them...
         if (exists($node->{memory}))
         {
             $memstuff = " | { {" . $node->{memory} . "} } ";
             # make multiline - split on comma and "Work_mem"
             # (using the vertical bar formatting character)
             $memstuff =~ s/\,/\,\| /gm;
             $memstuff =~ s/Work\_mem/\| Work\_mem/gm;
         }

 #        $outi .= " | { " . join(" | " , $frst, $last) . " } ";
         $outi .= " | { " . join(" | " , $slice, $frst, $last) . " } " . $memstuff;

         # wrapping with braces changes record organization to vertical
         $outi = "{ " . $outi . " } ";
     }


     return $outi;
 }


 sub dotlabel
 {
     my $node = shift;

     # XXX XXX: olap fixup - don't label duplicate multi slice motion nodes
     return
         if (exists($node->{MultiSliceMotionDuplicate}));

     my $colortable = $glob_coltab{$glob_colorscheme};

     my $color = scalar(@{$colortable});
     $color = $node->{slice}  if (exists($node->{slice}));
     $color =~ s/slice//;

     $color = ($color) % (scalar(@{$colortable}));

     # build list of node attributes
     my @attrlist;
     push @attrlist, "shape=record";
 #    push @attrlist, "shape=polygon";
 #    push @attrlist, "peripheries=2";

 #    push @attrlist, 'fontcolor=white';

     push @attrlist, 'label="' . dotlabel_detail($node) .'"';
     push @attrlist, 'style=filled';
 #    push @attrlist, 'style="filled,bold"';
 #    push @attrlist, "color=" . $colortable->[$color];
 #    push @attrlist, "fillcolor=" . $colortable->[$color];

     if (exists($node->{color_rank})) # color by statistical ranking
     {
         my $edgecol = $glob_divcol{rdbu11}->[$node->{color_rank}];
         my $fillcol = $colortable->[$color];

         if (defined($glob_statcolor))
         {
             if ($glob_statcolor =~ m/^t$/i)
             {
                 # show timing color only
                 $fillcol = $edgecol;
             }
             if ($glob_statcolor =~ m/^st/i)
             {
                 # edge is slice color, fill is time stats
                 # invert the selection
                 ($edgecol, $fillcol) = ($fillcol, $edgecol);
             }
         }

         push @attrlist, 'style="filled,setlinewidth(6)"';
         push @attrlist, "color=\"" . $edgecol . '"';

         push @attrlist, "fillcolor=\"" . $fillcol . '"';
     }
     else
     {
         push @attrlist, "color=\"" . $colortable->[$color] . '"';
         push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"';
     }

     if (exists($node->{crunchme}))
     {
         @attrlist = ();

 #    push @attrlist, 'style=filled';
     push @attrlist, 'style=filled';
     push @attrlist, "color=\"" . $colortable->[$color] . '"';
     push @attrlist, "fillcolor=\"" . $colortable->[$color] . '"';
 #    push @attrlist, "shape=circle";
         push @attrlist, "label=\"" . $node->{short} . '"';
 #    push @attrlist, "fontsize=1";
 #    push @attrlist, "height=0.01";
 #    push @attrlist, "width=0.01";
 #    push @attrlist, "height=0.12";
 #    push @attrlist, "width=0.12";

         print $outfh '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ;
     }
     else
     {
         print $outfh '"' . $node->{id} . '" [' . join(", ", @attrlist) . '];' . "\n" ;
     }

     if (exists($node->{child}))
     {
         for my $kid (@{$node->{child}})
         {
             dotlabel($kid);
         }

     }

 }


 sub makedotfile
 {
     my ($plantree, $time_list, $do_timeline, $plan_num, $parse_ctx,
         $direction) = @_;

 #    print $outfh "\n\ndigraph plan1 { ranksep=.75; size = \"7.5,7.5\";\n\n \n";
     print $outfh "\n\ndigraph plan$plan_num {  \n";

 #    print $outfh "graph [bgcolor=black];\n edge [style=bold, color=white];\n";
 #    print $outfh "graph [bgcolor=black];\n edge [style=dashed, color=white];\n";
 #    print $outfh "graph [bgcolor=black];\n edge [style=dotted, color=white];\n";

     if ($do_timeline && scalar(@{$time_list}))
     {
         print $outfh "   ranksep=.75; size = \"7.5,7.5\";\n\n \n";
         print $outfh "   {\n node [shape=plaintext, fontsize=16];\n";
         print $outfh "/* the time-line graph */\n";

         print $outfh join(' -> ', @{$time_list} ), ";\n";
         print $outfh "}\n";

         print $outfh "node [shape=box];\n";
     }

     print $outfh "rankdir=$direction;\n";

     dotkid($plantree);

     dotlabel($plantree);

     print $outfh "\n}\n";

 }

 1;