blob: 99e0063f05c8197c96b2662cd38a9dd182e1d677 [file] [log] [blame]
#!/usr/bin/env perl
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
use Pod::Usage;
use strict;
use warnings;
use POSIX;
use File::Spec;
use File::Temp;
use Config;
=head1 NAME
B<gpdiff.pl> - GreenPlum diff
=head1 SYNOPSIS
B<gpdiff.pl> [options] logfile [logfile...]
Options:
Normally, gpdiff takes the standard "diff" options and passes them
directly to the diff program. Try `diff --help' for more information
on the standard options. The following options are specific to gpdiff:
-help brief help message
-man full documentation
-version print gpdiff version and underlying diff version
-gpd_ignore_headers ignore header lines in query output
-gpd_ignore_plans ignore explain plan content in input files
-gpd_init <file> load initialization file
=head1 OPTIONS
=over 8
=item B<-help>
Print a brief help message and exits.
=item B<-man>
Prints the manual page and exits.
=item B<-version>
Prints the gpdiff version and underlying diff version
=item B<-gpd_ignore_headers>
gpdiff/atmsort expect Postgresql "psql-style" output for SELECT
statements, with a two line header composed of the column names,
separated by vertical bars (|), and a "separator" line of dashes and
pluses beneath, followed by the row output. The psql utility performs
some formatting to adjust the column widths to match the size of the
row output. Setting this parameter causes gpdiff to ignore any
differences in the column naming and format widths globally.
=item B<-gpd_ignore_plans>
Specify this option to ignore any explain plan diffs between the
input files. This will completely ignore any plan content in
the input files thus masking differences in plans between the input files.
=item B<-gpd_init> <file>
Specify an initialization file containing a series of directives
(mainly for match_subs) that get applied to the input files. To
specify multiple initialization files, use multiple gpd_init arguments, eg:
-gpd_init file1 -gpd_init file2
=back
=head1 DESCRIPTION
gpdiff compares files using diff after processing them with atmsort.pl.
This comparison is designed to ignore certain Greenplum-specific
informational messages, as well as handle the cases where query output
order may differ for a multi-segment Greenplum database versus a
single Postgresql instance. Type "atmsort.pl --man" for more details.
gpdiff is invoked by pg_regress as part of "make install-check".
In this case the diff options are something like:
"-w -I NOTICE: -I HINT: -I CONTEXT: -I GP_IGNORE:".
Like diff, gpdiff can compare two files, a file and directory, a
directory and file, and two directories. However, when gpdiff compares
two directories, it only returns the exit status of the diff
comparison of the final two files.
=head1 BUGS
While the exit status is set correctly for most cases,
STDERR messages from diff are not displayed.
Also, atmsort cannot handle "unsorted" SELECT queries where the output
has strings with embedded newlines or pipe ("|") characters due to
limitations with the parser in the "tablelizer" function. Queries
with these characteristics must have an ORDER BY clause to avoid
potential erroneous comparison.
=head1 AUTHORS
Apache HAWQ
Address bug reports and comments to: dev@hawq.incubator.apache.org
=cut
our $ATMSORT;
our $ATMDIFF = "diff";
my $glob_ignore_headers;
my $glob_ignore_plans;
my $glob_init_file = [];
# assume atmsort.pl in same directory
BEGIN
{
my $plname = $0;
my @foo = File::Spec->splitpath(File::Spec->rel2abs($plname));
return 0 unless (scalar(@foo));
pop @foo;
$ATMSORT = File::Spec->catfile( @foo, "atmsort.pl");
$glob_init_file = [];
}
sub gpdiff_files
{
my ($f1, $f2, $d2d) = @_;
my $need_equiv = 0;
my @tmpfils;
# need gnu diff on solaris
if ($Config{'osname'} =~ m/solaris|sunos/i)
{
$ATMDIFF = "gdiff";
}
for my $ii (1..2)
{
my $tmpnam;
for (;;)
{
my $tmpfh;
$tmpnam = tmpnam();
sysopen($tmpfh, $tmpnam, O_RDWR | O_CREAT | O_EXCL) && last;
}
push @tmpfils, $tmpnam;
}
my $newf1 = shift @tmpfils;
my $newf2 = shift @tmpfils;
# print $ATMSORT, "\n";
if (defined($d2d) && exists($d2d->{equiv}))
{
# assume f1 and f2 are the same...
system "$ATMSORT --do_equiv=compare < $f1 > $newf1";
system "$ATMSORT --do_equiv=make < $f2 > $newf2";
}
else
{
system "$ATMSORT < $f1 > $newf1";
system "$ATMSORT < $f2 > $newf2";
}
my $args = join(" ", @ARGV, $newf1, $newf2);
# print "args: $args\n";
my $outi =`$ATMDIFF $args`;
my $stat = $? >> 8; # diff status
unless (defined($d2d) && exists($d2d->{equiv}))
{
# check for start_equiv unless already doing equiv check
# get the count of matching lines
my $grepout = `grep -c start_equiv $f1`;
chomp $grepout;
$need_equiv = $grepout;
# $need_equiv = 0;
if ($need_equiv)
{
$d2d = {} unless (defined($d2d));
$d2d->{dir} = 1;
}
}
# prefix the diff output with the files names for a "directory to
# directory" diff
if (defined($d2d) && length($outi))
{
if (exists($d2d->{equiv}))
{
$outi = "$ATMDIFF $f1 $f2" . ".equiv\n" . $outi;
}
else
{
$outi = "$ATMDIFF $f1 $f2\n" . $outi;
}
}
# replace temp file name references with actual file names
$outi =~ s/$newf1/$f1/gm;
$outi =~ s/$newf2/$f2/gm;
print $outi;
#my $stat = WEXITVALUE($?); # diff status
unlink $newf1;
unlink $newf2;
if ($need_equiv)
{
my $new_d2d = {};
$new_d2d->{equiv} = 1;
# call recursively if need to perform equiv comparison.
my $stat1 = gpdiff_files($f1, $f1, $new_d2d);
my $stat2 = gpdiff_files($f2, $f2, $new_d2d);
$stat = $stat1 if ($stat1);
$stat = $stat2 if ($stat2);
}
return ($stat);
}
sub filefunc
{
my ($f1, $f2, $d2d) = @_;
if ((-f $f1) && (-f $f2))
{
return (gpdiff_files($f1, $f2, $d2d));
}
# if f1 is a directory, do the filefunc of every file in that directory
if ((-d $f1) && (-d $f2))
{
my $dir = $f1;
my ($dir_h, $stat);
if ( opendir($dir_h, $dir) )
{
my $fnam;
while ($fnam = readdir($dir_h))
{
# ignore ., ..
next
unless ($fnam !~ m/^(\.)(\.)*$/);
my $absname =
File::Spec->rel2abs(
File::Spec->catfile(
$dir,
$fnam
));
# specify that is a directory comparison
$d2d = {} unless (defined($d2d));
$d2d->{dir} = 1;
$stat = filefunc($absname, $f2, $d2d);
} # end while
closedir $dir_h;
} # end if open
return $stat;
}
# if f2 is a directory, find the corresponding file in that directory
if ((-f $f1) && (-d $f2))
{
my $stat;
my @foo = File::Spec->splitpath($f1);
return 0
unless (scalar(@foo));
my $basenam = $foo[-1];
my $fnam =
File::Spec->rel2abs(
File::Spec->catfile(
$f2,
$basenam
));
$stat = filefunc($f1, $fnam, $d2d);
return $stat;
}
# find f2 in dir f1
if ((-f $f2) && (-d $f1))
{
my $stat;
my @foo = File::Spec->splitpath($f2);
return 0
unless (scalar(@foo));
my $basenam = $foo[-1];
my $fnam =
File::Spec->rel2abs(
File::Spec->catfile(
$f1,
$basenam
));
$stat = filefunc($fnam, $f2, $d2d);
return $stat;
}
return 0;
}
if (1)
{
my $man = 0;
my $help = 0;
my $verzion = 0;
my $pmsg = "";
my @arg2; # arg list for diff
my %init_dup;
# getatm();
# check for man or help args
if (scalar(@ARGV))
{
my $argc = -1;
my $maxarg = scalar(@ARGV);
while (($argc+1) < $maxarg)
{
$argc++;
my $arg = $ARGV[$argc];
unless ($arg =~ m/^\-/)
{
# even if no dash, might be a value for a dash arg...
push @arg2, $arg;
next;
}
# ENGINF-180: ignore header formatting
if ($arg =~
m/^\-(\-)*(gpd\_ignore\_headers|gp\_ignore\_headers)$/i)
{
$glob_ignore_headers = 1;
next;
}
if ($arg =~
m/^\-(\-)*(gpd\_ignore\_plans|gp\_ignore\_plans)$/i)
{
$glob_ignore_plans = 1;
next;
}
if ($arg =~
m/^\-(\-)*(gpd\_init|gp\_init)(\=)*(.*)$/i)
{
if ($arg =~ m/\=/) # check if "=filename"
{
my @foo = split (/\=/, $arg, 2);
die "no init file"
unless (2 == scalar(@foo));
my $init_file = pop @foo;
# ENGINF-200: allow multiple init files
if (exists($init_dup{$init_file}))
{
warn "duplicate init file \'$init_file\', skipping...";
}
else
{
push @{$glob_init_file}, $init_file;
$init_dup{$init_file} = 1;
}
}
else # next arg must be init file
{
$argc++;
die "no init file"
unless (defined($ARGV[$argc]));
my $init_file = $ARGV[$argc];
# ENGINF-200: allow multiple init files
if (exists($init_dup{$init_file}))
{
warn "duplicate init file \'$init_file\', skipping...";
}
else
{
push @{$glob_init_file}, $init_file;
$init_dup{$init_file} = 1;
}
}
next;
}
if ($arg =~ m/^\-(\-)*(v|version)$/)
{
$verzion = 1;
next;
}
if ($arg =~ m/^\-(\-)*(man|help|\?)$/i)
{
if ($arg =~ m/man/i)
{
$man = 1;
}
else
{
$help = 1;
}
next;
}
# put all "dash" args on separate list for diff
push @arg2, $arg;
} # end for
}
else
{
$pmsg = "missing an operand after \`gpdiff\'";
$help = 1;
}
if ((1 == scalar(@ARGV)) && (!($help || $man || $verzion)))
{
$pmsg = "unknown operand: $ARGV[0]";
$help = 1;
}
if ($verzion)
{
my $VERSION = do { my @r = (q$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker
# need gnu diff on solaris
if ($Config{'osname'} =~ m/solaris|sunos/i)
{
$ATMDIFF = "gdiff";
}
my $whichdiff = `which $ATMDIFF`;
chomp $whichdiff;
print "$0 version $VERSION\n";
print "Type \'gpdiff.pl --help\' for more information on the standard options\n";
print "$0 calls the \"", $whichdiff, "\" utility:\n\n";
my $outi = `$ATMDIFF -v`;
$outi =~ s/^/ /gm;
print $outi, "\n";
exit(1);
}
pod2usage(-msg => $pmsg, -exitstatus => 1) if $help;
pod2usage(-msg => $pmsg, -exitstatus => 0, -verbose => 2) if $man;
my $f2 = pop @ARGV;
my $f1 = pop @ARGV;
for my $fname ($f1, $f2)
{
unless (-e $fname)
{
print STDERR "gpdiff: $fname: No such file or directory\n";
}
}
exit(2)
unless ((-e $f1) && (-e $f2));
# use the "stripped" arg list for diff
@ARGV = ();
# remove the filenames
pop @arg2;
pop @arg2;
push(@ARGV, @arg2);
# ENGINF-180: tell atmsort to ignore header formatting (globally)
if ($glob_ignore_headers)
{
$ATMSORT .= " --ignore_headers ";
}
# Tell atmsort to ignore plan content if -gpd_ignore_plans is set
if ($glob_ignore_plans)
{
$ATMSORT .= " --ignore_plans ";
}
# ENGINF-200: allow multiple init files
if (defined($glob_init_file) && scalar(@{$glob_init_file}))
{
# MPP-12262: test here, because we don't get status for atmsort call
for my $init_file (@{$glob_init_file})
{
die "no such file: $init_file"
unless (-e $init_file);
}
$ATMSORT .= " --init=". join(" --init=", @{$glob_init_file}) . " ";
}
exit(filefunc($f1, $f2));
}