| # |
| # For PostgreSQL Database Management System: |
| # (formerly known as Postgres, then as Postgres95) |
| # |
| # Portions Copyright (c) 1996-2010, The PostgreSQL Global Development Group |
| # |
| # Portions Copyright (c) 1994, The Regents of the University of California |
| # |
| # Permission to use, copy, modify, and distribute this software and its documentation for any purpose, |
| # without fee, and without a written agreement is hereby granted, provided that the above copyright notice |
| # and this paragraph and the following two paragraphs appear in all copies. |
| # |
| # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, |
| # INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, |
| # ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY |
| # OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| # |
| # THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| # BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
| # |
| # THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA |
| # HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| # |
| #---------------------------------------------------------------------- |
| # |
| # gen_keywordlist.pl |
| # Perl script that transforms a list of keywords into a ScanKeywordList |
| # data structure that can be passed to ScanKeywordLookup(). |
| # |
| # The input is a C header file containing a series of macro calls |
| # PG_KEYWORD("keyword", ...) |
| # Lines not starting with PG_KEYWORD are ignored. The keywords are |
| # implicitly numbered 0..N-1 in order of appearance in the header file. |
| # Currently, the keywords are required to appear in ASCII order. |
| # |
| # The output is a C header file that defines a "const ScanKeywordList" |
| # variable named according to the -v switch ("ScanKeywords" by default). |
| # The variable is marked "static" unless the -e switch is given. |
| # |
| # ScanKeywordList uses hash-based lookup, so this script also selects |
| # a minimal perfect hash function for the keyword set, and emits a |
| # static hash function that is referenced in the ScanKeywordList struct. |
| # The hash function is case-insensitive unless --no-case-fold is specified. |
| # Note that case folding works correctly only for all-ASCII keywords! |
| # |
| # |
| # Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| # Portions Copyright (c) 1994, Regents of the University of California |
| # |
| # src/tools/gen_keywordlist.pl |
| # |
| #---------------------------------------------------------------------- |
| |
| |
| use strict; |
| use warnings; |
| use Getopt::Long; |
| |
| use FindBin; |
| use lib $FindBin::RealBin; |
| |
| use PerfectHash; |
| |
| my $output_path = ''; |
| my $extern = 0; |
| my $case_fold = 1; |
| my $varname = 'ScanKeywords'; |
| |
| GetOptions( |
| 'output:s' => \$output_path, |
| 'extern' => \$extern, |
| 'case-fold!' => \$case_fold, |
| 'varname:s' => \$varname) || usage(); |
| |
| my $kw_input_file = shift @ARGV || die "No input file.\n"; |
| |
| # Make sure output_path ends in a slash if needed. |
| if ($output_path ne '' && substr($output_path, -1) ne '/') |
| { |
| $output_path .= '/'; |
| } |
| |
| $kw_input_file =~ /(\w+)\.h$/ |
| || die "Input file must be named something.h.\n"; |
| my $base_filename = $1 . '_d'; |
| my $kw_def_file = $output_path . $base_filename . '.h'; |
| |
| open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!\n"; |
| open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!\n"; |
| |
| # Opening boilerplate for keyword definition header. |
| printf $kwdef <<EOM, $base_filename, uc $base_filename, uc $base_filename; |
| /*------------------------------------------------------------------------- |
| * |
| * %s.h |
| * List of keywords represented as a ScanKeywordList. |
| * |
| * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * NOTES |
| * ****************************** |
| * *** DO NOT EDIT THIS FILE! *** |
| * ****************************** |
| * |
| * It has been GENERATED by src/tools/gen_keywordlist.pl |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #ifndef %s_H |
| #define %s_H |
| |
| #include "common/kwlookup.h" |
| |
| EOM |
| |
| # Parse input file for keyword names. |
| my @keywords; |
| while (<$kif>) |
| { |
| if (/^PG_KEYWORD\("(\w+)"/) |
| { |
| push @keywords, $1; |
| } |
| } |
| |
| # When being case-insensitive, insist that the input be all-lower-case. |
| if ($case_fold) |
| { |
| foreach my $kw (@keywords) |
| { |
| die qq|The keyword "$kw" is not lower-case in $kw_input_file\n| |
| if ($kw ne lc $kw); |
| } |
| } |
| |
| # Error out if the keyword names are not in ASCII order. |
| # |
| # While this isn't really necessary with hash-based lookup, it's still |
| # helpful because it provides a cheap way to reject duplicate keywords. |
| # Also, insisting on sorted order ensures that code that scans the keyword |
| # table linearly will see the keywords in a canonical order. |
| for my $i (0 .. $#keywords - 1) |
| { |
| die |
| qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file\n| |
| if ($keywords[$i] cmp $keywords[ $i + 1 ]) >= 0; |
| } |
| |
| # Emit the string containing all the keywords. |
| |
| printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname; |
| print $kwdef join qq|\\0"\n\t"|, @keywords; |
| print $kwdef qq|";\n\n|; |
| |
| # Emit an array of numerical offsets which will be used to index into the |
| # keyword string. Also determine max keyword length. |
| |
| printf $kwdef "static const uint16 %s_kw_offsets[] = {\n", $varname; |
| |
| my $offset = 0; |
| my $max_len = 0; |
| foreach my $name (@keywords) |
| { |
| my $this_length = length($name); |
| |
| print $kwdef "\t$offset,\n"; |
| |
| # Calculate the cumulative offset of the next keyword, |
| # taking into account the null terminator. |
| $offset += $this_length + 1; |
| |
| # Update max keyword length. |
| $max_len = $this_length if $max_len < $this_length; |
| } |
| |
| print $kwdef "};\n\n"; |
| |
| # Emit a macro defining the number of keywords. |
| # (In some places it's useful to have access to that as a constant.) |
| |
| printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords; |
| |
| # Emit the definition of the hash function. |
| |
| my $funcname = $varname . "_hash_func"; |
| |
| my $f = PerfectHash::generate_hash_function(\@keywords, $funcname, |
| case_fold => $case_fold); |
| |
| printf $kwdef qq|static %s\n|, $f; |
| |
| # Emit the struct that wraps all this lookup info into one variable. |
| |
| printf $kwdef "static " if !$extern; |
| printf $kwdef "const ScanKeywordList %s = {\n", $varname; |
| printf $kwdef qq|\t%s_kw_string,\n|, $varname; |
| printf $kwdef qq|\t%s_kw_offsets,\n|, $varname; |
| printf $kwdef qq|\t%s,\n|, $funcname; |
| printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname; |
| printf $kwdef qq|\t%d\n|, $max_len; |
| printf $kwdef "};\n\n"; |
| |
| printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename; |
| |
| |
| sub usage |
| { |
| die <<EOM; |
| Usage: gen_keywordlist.pl [--output/-o <path>] [--varname/-v <varname>] [--extern/-e] [--[no-]case-fold] input_file |
| --output Output directory (default '.') |
| --varname Name for ScanKeywordList variable (default 'ScanKeywords') |
| --extern Allow the ScanKeywordList variable to be globally visible |
| --no-case-fold Keyword matching is to be case-sensitive |
| |
| gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList. |
| The output filename is derived from the input file by inserting _d, |
| for example kwlist_d.h is produced from kwlist.h. |
| EOM |
| } |