src/tools/check_keywords.pl - hawq - Git at Google

 #!/usr/bin/perl -w

 use strict;

 # Check that the keyword lists in gram.y and kwlist.h are sane. Run from
 # the top directory, or pass a path to a top directory as argument.
 #
 # src/tools/check_keywords.pl

 my $path;

 if (@ARGV) {
 	$path = $ARGV[0];
 	shift @ARGV;
 } else {
 	$path = ".";
 }

 $[ = 1;			# set array base to 1
 $, = ' ';		# set output field separator
 $\ = "\n";		# set output record separator

 my %keyword_categories;
 $keyword_categories{'unreserved_keyword'} = 'UNRESERVED_KEYWORD';
 $keyword_categories{'col_name_keyword'} = 'COL_NAME_KEYWORD';
 $keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD';
 $keyword_categories{'reserved_keyword'} = 'RESERVED_KEYWORD';

 my $gram_filename = "$path/src/backend/parser/gram.y";
 open(GRAM, $gram_filename) || die("Could not open : $gram_filename");

 my ($S, $s, $k, $n, $kcat);
 my $comment;
 my @arr;
 my %keywords;

 line: while (<GRAM>) {
     chomp;	# strip record separator

     $S = $_;
     # Make sure any braces are split
     $s = '{', $S =~ s/$s/ { /g;
     $s = '}', $S =~ s/$s/ } /g;
     # Any comments are split
     $s = '[/][*]', $S =~ s#$s# /* #g;
     $s = '[*][/]', $S =~ s#$s# */ #g;

     if (!($kcat)) {
 	# Is this the beginning of a keyword list?
 	foreach $k (keys %keyword_categories) {
 	    if ($S =~ m/^($k):/) {
 		$kcat = $k;
 		next line;
 	    }
 	}
 	next line;
     }

     # Now split the line into individual fields
     $n = (@arr = split(' ', $S));

     # Ok, we're in a keyword list. Go through each field in turn
     for (my $fieldIndexer = 1; $fieldIndexer <= $n; $fieldIndexer++) {
 	if ($arr[$fieldIndexer] eq '*/' && $comment) {
 	    $comment = 0;
 	    next;
 	}
 	elsif ($comment) {
 	    next;
 	}
 	elsif ($arr[$fieldIndexer] eq '/*') {
 	    # start of a multiline comment
 	    $comment = 1;
 	    next;
 	}
 	elsif ($arr[$fieldIndexer] eq '//') {
 	    next line;
 	}

 	if ($arr[$fieldIndexer] eq ';') {
 	    # end of keyword list
 	    $kcat = '';
 	    next;
 	}

 	if ($arr[$fieldIndexer] eq '|') {
 	    next;
 	}

 	# Put this keyword into the right list
 	push @{$keywords{$kcat}}, $arr[$fieldIndexer];
     }
 }
 close GRAM;

 # Check that all keywords are in alphabetical order
 my ($prevkword, $kword, $bare_kword);
 foreach $kcat (keys %keyword_categories) {
     $prevkword = '';

     foreach $kword (@{$keywords{$kcat}}) {
 	# Some keyword have a _P suffix. Remove it for the comparison.
 	$bare_kword = $kword;
 	$bare_kword =~ s/_P$//;
 	if ($bare_kword le $prevkword) {
 	    print "'$bare_kword' after '$prevkword' in $kcat list is misplaced";
 	}
 	$prevkword = $bare_kword;
     }
 }

 # Transform the keyword lists into hashes.
 # kwhashes is a hash of hashes, keyed by keyword category id, e.g.
 # UNRESERVED_KEYWORD. Each inner hash is a keyed by keyword id, e.g. ABORT_P
 # with a dummy value.
 my %kwhashes;
 while ( my ($kcat, $kcat_id) = each(%keyword_categories) ) {
     @arr = @{$keywords{$kcat}};

     my $hash;
     foreach my $item (@arr) { $hash->{$item} = 1 }

     $kwhashes{$kcat_id} = $hash;
 }

 # Now read in kwlist.h

 my $kwlist_filename = "$path/src/include/parser/kwlist.h";
 open(KWLIST, $kwlist_filename) || die("Could not open : $kwlist_filename");

 my $prevkwstring = '';
 my $bare_kwname;
 my %kwhash;
 kwlist_line: while (<KWLIST>) {
     my($line) = $_;

     if ($line =~ /^PG_KEYWORD\(\"(.*)\", (.*), (.*)\)/)
     {
 	my($kwstring) = $1;
 	my($kwname) = $2;
 	my($kwcat_id) = $3;

 	# Check that the list is in alphabetical order
 	if ($kwstring le $prevkwstring) {
 	    print "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced";
 	}
 	$prevkwstring = $kwstring;

 	# Check that the keyword string is valid: all lower-case ASCII chars
 	if ($kwstring !~ /^[a-z_]*$/) {
 	    print "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars";
 	}

 	# Check that the keyword name is valid: all upper-case ASCII chars
 	if ($kwname !~ /^[A-Z_]*$/) {
 	    print "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars";
 	}

 	# Check that the keyword string matches keyword name
 	$bare_kwname = $kwname;
 	$bare_kwname =~ s/_P$//;
 	if ($bare_kwname ne uc($kwstring)) {
 	    print "keyword name '$kwname' doesn't match keyword string '$kwstring'";
 	}

 	# Check that the keyword is present in the grammar
 	%kwhash = %{$kwhashes{$kwcat_id}};

 	if (!(%kwhash))	{
 	    #print "Unknown kwcat_id: $kwcat_id";
 	} else {
 	    if (!($kwhash{$kwname})) {
 		print "'$kwname' not present in $kwcat_id section of gram.y";
 	    } else {
 		# Remove it from the hash, so that we can complain at the end
 		# if there's keywords left that were not found in kwlist.h
 		delete $kwhashes{$kwcat_id}->{$kwname};
 	    }
 	}
     }
 }
 close KWLIST;

 # Check that we've paired up all keywords from gram.y with lines in kwlist.h
 while ( my ($kwcat, $kwcat_id) = each(%keyword_categories) ) {
     %kwhash = %{$kwhashes{$kwcat_id}};

     for my $kw ( keys %kwhash ) {
 	print "'$kw' found in gram.y $kwcat category, but not in kwlist.h"
     }
 }
	#!/usr/bin/perl -w

	use strict;

	# Check that the keyword lists in gram.y and kwlist.h are sane. Run from
	# the top directory, or pass a path to a top directory as argument.
	#
	# src/tools/check_keywords.pl

	my $path;

	if (@ARGV) {
	$path = $ARGV[0];
	shift @ARGV;
	} else {
	$path = ".";
	}

	$[ = 1; # set array base to 1
	$, = ' '; # set output field separator
	$\ = "\n"; # set output record separator

	my %keyword_categories;
	$keyword_categories{'unreserved_keyword'} = 'UNRESERVED_KEYWORD';
	$keyword_categories{'col_name_keyword'} = 'COL_NAME_KEYWORD';
	$keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD';
	$keyword_categories{'reserved_keyword'} = 'RESERVED_KEYWORD';

	my $gram_filename = "$path/src/backend/parser/gram.y";
	open(GRAM, $gram_filename) \|\| die("Could not open : $gram_filename");

	my ($S, $s, $k, $n, $kcat);
	my $comment;
	my @arr;
	my %keywords;

	line: while (<GRAM>) {
	chomp; # strip record separator

	$S = $_;
	# Make sure any braces are split
	$s = '{', $S =~ s/$s/ { /g;
	$s = '}', $S =~ s/$s/ } /g;
	# Any comments are split
	$s = '[/][]', $S =~ s#$s# / #g;
	$s = '[][/]', $S =~ s#$s# / #g;

	if (!($kcat)) {
	# Is this the beginning of a keyword list?
	foreach $k (keys %keyword_categories) {
	if ($S =~ m/^($k):/) {
	$kcat = $k;
	next line;
	}
	}
	next line;
	}

	# Now split the line into individual fields
	$n = (@arr = split(' ', $S));

	# Ok, we're in a keyword list. Go through each field in turn
	for (my $fieldIndexer = 1; $fieldIndexer <= $n; $fieldIndexer++) {
	if ($arr[$fieldIndexer] eq '*/' && $comment) {
	$comment = 0;
	next;
	}
	elsif ($comment) {
	next;
	}
	elsif ($arr[$fieldIndexer] eq '/*') {
	# start of a multiline comment
	$comment = 1;
	next;
	}
	elsif ($arr[$fieldIndexer] eq '//') {
	next line;
	}

	if ($arr[$fieldIndexer] eq ';') {
	# end of keyword list
	$kcat = '';
	next;
	}

	if ($arr[$fieldIndexer] eq '\|') {
	next;
	}

	# Put this keyword into the right list
	push @{$keywords{$kcat}}, $arr[$fieldIndexer];
	}
	}
	close GRAM;

	# Check that all keywords are in alphabetical order
	my ($prevkword, $kword, $bare_kword);
	foreach $kcat (keys %keyword_categories) {
	$prevkword = '';

	foreach $kword (@{$keywords{$kcat}}) {
	# Some keyword have a _P suffix. Remove it for the comparison.
	$bare_kword = $kword;
	$bare_kword =~ s/_P$//;
	if ($bare_kword le $prevkword) {
	print "'$bare_kword' after '$prevkword' in $kcat list is misplaced";
	}
	$prevkword = $bare_kword;
	}
	}

	# Transform the keyword lists into hashes.
	# kwhashes is a hash of hashes, keyed by keyword category id, e.g.
	# UNRESERVED_KEYWORD. Each inner hash is a keyed by keyword id, e.g. ABORT_P
	# with a dummy value.
	my %kwhashes;
	while ( my ($kcat, $kcat_id) = each(%keyword_categories) ) {
	@arr = @{$keywords{$kcat}};

	my $hash;
	foreach my $item (@arr) { $hash->{$item} = 1 }

	$kwhashes{$kcat_id} = $hash;
	}

	# Now read in kwlist.h

	my $kwlist_filename = "$path/src/include/parser/kwlist.h";
	open(KWLIST, $kwlist_filename) \|\| die("Could not open : $kwlist_filename");

	my $prevkwstring = '';
	my $bare_kwname;
	my %kwhash;
	kwlist_line: while (<KWLIST>) {
	my($line) = $_;

	if ($line =~ /^PG_KEYWORD\(\"(.)\", (.), (.*)\)/)
	{
	my($kwstring) = $1;
	my($kwname) = $2;
	my($kwcat_id) = $3;

	# Check that the list is in alphabetical order
	if ($kwstring le $prevkwstring) {
	print "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced";
	}
	$prevkwstring = $kwstring;

	# Check that the keyword string is valid: all lower-case ASCII chars
	if ($kwstring !~ /^[a-z_]*$/) {
	print "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars";
	}

	# Check that the keyword name is valid: all upper-case ASCII chars
	if ($kwname !~ /^[A-Z_]*$/) {
	print "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars";
	}

	# Check that the keyword string matches keyword name
	$bare_kwname = $kwname;
	$bare_kwname =~ s/_P$//;
	if ($bare_kwname ne uc($kwstring)) {
	print "keyword name '$kwname' doesn't match keyword string '$kwstring'";
	}

	# Check that the keyword is present in the grammar
	%kwhash = %{$kwhashes{$kwcat_id}};

	if (!(%kwhash)) {
	#print "Unknown kwcat_id: $kwcat_id";
	} else {
	if (!($kwhash{$kwname})) {
	print "'$kwname' not present in $kwcat_id section of gram.y";
	} else {
	# Remove it from the hash, so that we can complain at the end
	# if there's keywords left that were not found in kwlist.h
	delete $kwhashes{$kwcat_id}->{$kwname};
	}
	}
	}
	}
	close KWLIST;

	# Check that we've paired up all keywords from gram.y with lines in kwlist.h
	while ( my ($kwcat, $kwcat_id) = each(%keyword_categories) ) {
	%kwhash = %{$kwhashes{$kwcat_id}};

	for my $kw ( keys %kwhash ) {
	print "'$kw' found in gram.y $kwcat category, but not in kwlist.h"
	}
	}