| #!/usr/bin/perl |
| |
| # Check that the keyword lists in gram.y and kwlist.h are sane. |
| # Usage: check_keywords.pl gram.y kwlist.h |
| |
| # src/backend/parser/check_keywords.pl |
| # Copyright (c) 2009-2023, PostgreSQL Global Development Group |
| |
| use strict; |
| use warnings; |
| |
| my $gram_filename = $ARGV[0]; |
| my $kwlist_filename = $ARGV[1]; |
| |
| my $errors = 0; |
| |
| sub error |
| { |
| print STDERR @_; |
| $errors = 1; |
| return; |
| } |
| |
| # Check alphabetical order of a set of keyword symbols |
| # (note these are NOT the actual keyword strings) |
| sub check_alphabetical_order |
| { |
| my ($listname, $list) = @_; |
| my $prevkword = ''; |
| |
| foreach my $kword (@$list) |
| { |
| # Some symbols have a _P suffix. Remove it for the comparison. |
| my $bare_kword = $kword; |
| $bare_kword =~ s/_P$//; |
| if ($bare_kword le $prevkword) |
| { |
| error |
| "'$bare_kword' after '$prevkword' in $listname list is misplaced"; |
| } |
| $prevkword = $bare_kword; |
| } |
| return; |
| } |
| |
| $, = ' '; # set output field separator |
| $\ = "\n"; # set output record separator |
| |
| my %keyword_categories; |
| $keyword_categories{'unreserved_keyword'} = 'UNRESERVED_KEYWORD'; |
| $keyword_categories{'col_name_keyword'} = 'COL_NAME_KEYWORD'; |
| $keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD'; |
| $keyword_categories{'reserved_keyword'} = 'RESERVED_KEYWORD'; |
| |
| open(my $gram, '<', $gram_filename) || die("Could not open : $gram_filename"); |
| |
| my $kcat; |
| my $in_bare_labels; |
| my $comment; |
| my @arr; |
| my %keywords; |
| my @bare_label_keywords; |
| |
| line: while (my $S = <$gram>) |
| { |
| chomp $S; # strip record separator |
| |
| my $s; |
| |
| # Make sure any braces are split |
| $s = '{', $S =~ s/$s/ { /g; |
| $s = '}', $S =~ s/$s/ } /g; |
| |
| # Any comments are split |
| $s = '[/][*]', $S =~ s#$s# /* #g; |
| $s = '[*][/]', $S =~ s#$s# */ #g; |
| |
| if (!($kcat) && !($in_bare_labels)) |
| { |
| |
| # Is this the beginning of a keyword list? |
| foreach my $k (keys %keyword_categories) |
| { |
| if ($S =~ m/^($k):/) |
| { |
| $kcat = $k; |
| next line; |
| } |
| } |
| |
| # Is this the beginning of the bare_label_keyword list? |
| $in_bare_labels = 1 if ($S =~ m/^bare_label_keyword:/); |
| |
| next line; |
| } |
| |
| # Now split the line into individual fields |
| my $n = (@arr = split(' ', $S)); |
| |
| # Ok, we're in a keyword list. Go through each field in turn |
| for (my $fieldIndexer = 0; $fieldIndexer < $n; $fieldIndexer++) |
| { |
| if ($arr[$fieldIndexer] eq '*/' && $comment) |
| { |
| $comment = 0; |
| next; |
| } |
| elsif ($comment) |
| { |
| next; |
| } |
| elsif ($arr[$fieldIndexer] eq '/*') |
| { |
| |
| # start of a multiline comment |
| $comment = 1; |
| next; |
| } |
| elsif ($arr[$fieldIndexer] eq '//') |
| { |
| next line; |
| } |
| |
| if ($arr[$fieldIndexer] eq ';') |
| { |
| |
| # end of keyword list |
| undef $kcat; |
| undef $in_bare_labels; |
| next; |
| } |
| |
| if ($arr[$fieldIndexer] eq '|') |
| { |
| next; |
| } |
| |
| # Put this keyword into the right list |
| if ($in_bare_labels) |
| { |
| push @bare_label_keywords, $arr[$fieldIndexer]; |
| } |
| else |
| { |
| push @{ $keywords{$kcat} }, $arr[$fieldIndexer]; |
| } |
| } |
| } |
| close $gram; |
| |
| # Check that each keyword list is in alphabetical order (just for neatnik-ism) |
| check_alphabetical_order($_, $keywords{$_}) for (keys %keyword_categories); |
| check_alphabetical_order('bare_label_keyword', \@bare_label_keywords); |
| |
| # Transform the keyword lists into hashes. |
| # kwhashes is a hash of hashes, keyed by keyword category id, |
| # e.g. UNRESERVED_KEYWORD. |
| # Each inner hash is keyed by keyword id, e.g. ABORT_P, with a dummy value. |
| my %kwhashes; |
| while (my ($kcat, $kcat_id) = each(%keyword_categories)) |
| { |
| @arr = @{ $keywords{$kcat} }; |
| |
| my $hash; |
| foreach my $item (@arr) { $hash->{$item} = 1; } |
| |
| $kwhashes{$kcat_id} = $hash; |
| } |
| my %bare_label_keywords = map { $_ => 1 } @bare_label_keywords; |
| |
| # Now read in kwlist.h |
| |
| open(my $kwlist, '<', $kwlist_filename) |
| || die("Could not open : $kwlist_filename"); |
| |
| my $prevkwstring = ''; |
| my $bare_kwname; |
| my %kwhash; |
| kwlist_line: while (<$kwlist>) |
| { |
| my ($line) = $_; |
| |
| if ($line =~ /^PG_KEYWORD\(\"(.*)\", (.*), (.*), (.*)\)/) |
| { |
| my ($kwstring) = $1; |
| my ($kwname) = $2; |
| my ($kwcat_id) = $3; |
| my ($collabel) = $4; |
| |
| # Check that the list is in alphabetical order (critical!) |
| if ($kwstring le $prevkwstring) |
| { |
| error |
| "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced"; |
| } |
| $prevkwstring = $kwstring; |
| |
| # Check that the keyword string is valid: all lower-case ASCII chars |
| if ($kwstring !~ /^[a-z_]+$/) |
| { |
| error |
| "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars"; |
| } |
| |
| # Check that the keyword name is valid: all upper-case ASCII chars |
| if ($kwname !~ /^[A-Z_]+$/) |
| { |
| error |
| "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars"; |
| } |
| |
| # Check that the keyword string matches keyword name |
| $bare_kwname = $kwname; |
| $bare_kwname =~ s/_P$//; |
| if ($bare_kwname ne uc($kwstring)) |
| { |
| error |
| "keyword name '$kwname' doesn't match keyword string '$kwstring'"; |
| } |
| |
| # Check that the keyword is present in the right category list |
| %kwhash = %{ $kwhashes{$kwcat_id} }; |
| |
| if (!(%kwhash)) |
| { |
| error "Unknown keyword category: $kwcat_id"; |
| } |
| else |
| { |
| if (!($kwhash{$kwname})) |
| { |
| error "'$kwname' not present in $kwcat_id section of gram.y"; |
| } |
| else |
| { |
| |
| # Remove it from the hash, so that we can |
| # complain at the end if there's keywords left |
| # that were not found in kwlist.h |
| delete $kwhashes{$kwcat_id}->{$kwname}; |
| } |
| } |
| |
| # Check that the keyword's collabel property matches gram.y |
| if ($collabel eq 'BARE_LABEL') |
| { |
| unless ($bare_label_keywords{$kwname}) |
| { |
| error |
| "'$kwname' is marked as BARE_LABEL in kwlist.h, but it is missing from gram.y's bare_label_keyword rule"; |
| } |
| } |
| elsif ($collabel eq 'AS_LABEL') |
| { |
| if ($bare_label_keywords{$kwname}) |
| { |
| error |
| "'$kwname' is marked as AS_LABEL in kwlist.h, but it is listed in gram.y's bare_label_keyword rule"; |
| } |
| } |
| else |
| { |
| error |
| "'$collabel' not recognized in kwlist.h. Expected either 'BARE_LABEL' or 'AS_LABEL'"; |
| } |
| } |
| } |
| close $kwlist; |
| |
| # Check that we've paired up all keywords from gram.y with lines in kwlist.h |
| while (my ($kwcat, $kwcat_id) = each(%keyword_categories)) |
| { |
| %kwhash = %{ $kwhashes{$kwcat_id} }; |
| |
| for my $kw (keys %kwhash) |
| { |
| error "'$kw' found in gram.y $kwcat category, but not in kwlist.h"; |
| } |
| } |
| |
| exit $errors; |