| #!/usr/bin/perl |
| #---------------------------------------------------------------------- |
| # |
| # reformat_dat_file.pl |
| # Perl script that reads in catalog data file(s) and writes out |
| # functionally equivalent file(s) in a standard format. |
| # |
| # In each entry of a reformatted file, metadata fields (if present) |
| # come first, with normal attributes starting on the following line, |
| # in the same order as the columns of the corresponding catalog. |
| # Comments and blank lines are preserved. |
| # |
| # Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| # Portions Copyright (c) 1994, Regents of the University of California |
| # |
| # src/include/catalog/reformat_dat_file.pl |
| # |
| #---------------------------------------------------------------------- |
| |
| use strict; |
| use warnings; |
| |
| use FindBin; |
| use Getopt::Long; |
| |
| # If you copy this script to somewhere other than src/include/catalog, |
| # you'll need to modify this "use lib" or provide a suitable -I switch. |
| use lib "$FindBin::RealBin/../../backend/catalog/"; |
| use Catalog; |
| |
| # Names of the metadata fields of a catalog entry. |
| # Note: oid is a normal column from a storage perspective, but it's more |
| # important than the rest, so it's listed first among the metadata fields. |
| # Note: line_number is also a metadata field, but we never write it out, |
| # so it's not listed here. |
| my @METADATA = |
| ('oid', 'oid_symbol', 'array_type_oid', 'descr', 'autogenerated'); |
| |
| # Process command line switches. |
| my $output_path = ''; |
| my $full_tuples = 0; |
| |
| GetOptions( |
| 'output=s' => \$output_path, |
| 'full-tuples' => \$full_tuples) || usage(); |
| |
| # Sanity check arguments. |
| die "No input files.\n" unless @ARGV; |
| |
| # Make sure output_path ends in a slash. |
| if ($output_path ne '' && substr($output_path, -1) ne '/') |
| { |
| $output_path .= '/'; |
| } |
| |
| # Read all the input files into internal data structures. |
| # We pass data file names as arguments and then look for matching |
| # headers to parse the schema from. |
| my %catalogs; |
| my %catalog_data; |
| my @catnames; |
| foreach my $datfile (@ARGV) |
| { |
| $datfile =~ /(.+)\.dat$/ |
| or die "Input files need to be data (.dat) files.\n"; |
| |
| my $header = "$1.h"; |
| die "There in no header file corresponding to $datfile" |
| if !-e $header; |
| |
| my $catalog = Catalog::ParseHeader($header); |
| my $catname = $catalog->{catname}; |
| my $schema = $catalog->{columns}; |
| |
| push @catnames, $catname; |
| $catalogs{$catname} = $catalog; |
| |
| $catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 1); |
| } |
| |
| ######################################################################## |
| # At this point, we have read all the data. If you are modifying this |
| # script for bulk editing, this is a good place to build lookup tables, |
| # if you need to. In the following example, the "next if !ref $row" |
| # check below is a hack to filter out non-hash objects. This is because |
| # we build the lookup tables from data that we read using the |
| # "preserve_formatting" parameter. |
| # |
| ##Index access method lookup. |
| #my %amnames; |
| #foreach my $row (@{ $catalog_data{pg_am} }) |
| #{ |
| # next if !ref $row; |
| # $amnames{$row->{oid}} = $row->{amname}; |
| #} |
| ######################################################################## |
| |
| # Write the data. |
| foreach my $catname (@catnames) |
| { |
| my $catalog = $catalogs{$catname}; |
| my @attnames; |
| my $schema = $catalog->{columns}; |
| |
| foreach my $column (@$schema) |
| { |
| my $attname = $column->{name}; |
| |
| # We may have ordinary columns at the storage level that we still |
| # want to format as a special value. Exclude these from the column |
| # list so they are not written twice. |
| push @attnames, $attname |
| if !(grep { $_ eq $attname } @METADATA); |
| } |
| |
| # Write output files to specified directory. |
| my $datfile = "$output_path$catname.dat"; |
| open my $dat, '>', $datfile |
| or die "can't open $datfile: $!"; |
| |
| foreach my $data (@{ $catalog_data{$catname} }) |
| { |
| |
| # Hash ref representing a data entry. |
| if (ref $data eq 'HASH') |
| { |
| my %values = %$data; |
| |
| ############################################################ |
| # At this point we have the full tuple in memory as a hash |
| # and can do any operations we want. As written, it only |
| # removes default values, but this script can be adapted to |
| # do one-off bulk-editing. |
| ############################################################ |
| |
| if (!$full_tuples) |
| { |
| # If it's an autogenerated entry, drop it completely. |
| next if $values{autogenerated}; |
| # Else, just drop any default/computed fields. |
| strip_default_values(\%values, $schema, $catname); |
| } |
| |
| print $dat "{"; |
| |
| # Separate out metadata fields for readability. |
| my $metadata_str = format_hash(\%values, @METADATA); |
| if ($metadata_str) |
| { |
| print $dat $metadata_str; |
| |
| # User attributes start on next line. |
| print $dat ",\n "; |
| } |
| |
| my $data_str = format_hash(\%values, @attnames); |
| print $dat $data_str; |
| print $dat " },\n"; |
| } |
| |
| # Preserve blank lines. |
| elsif ($data =~ /^\s*$/) |
| { |
| print $dat "\n"; |
| } |
| |
| # Preserve comments or brackets that are on their own line. |
| elsif ($data =~ /^\s*(\[|\]|#.*?)\s*$/) |
| { |
| print $dat "$1\n"; |
| } |
| } |
| close $dat; |
| } |
| |
| # Remove column values for which there is a matching default, |
| # or if the value can be computed from other columns. |
| sub strip_default_values |
| { |
| my ($row, $schema, $catname) = @_; |
| |
| # Delete values that match defaults. |
| foreach my $column (@$schema) |
| { |
| my $attname = $column->{name}; |
| |
| # It's okay if we have no oid value, since it will be assigned |
| # automatically before bootstrap. |
| die "strip_default_values: $catname.$attname undefined\n" |
| if !defined $row->{$attname} and $attname ne 'oid'; |
| |
| if (defined $column->{default} |
| and ($row->{$attname} eq $column->{default})) |
| { |
| delete $row->{$attname}; |
| } |
| } |
| |
| # Delete computed values. See AddDefaultValues() in Catalog.pm. |
| # Note: This must be done after deleting values matching defaults. |
| if ($catname eq 'pg_proc') |
| { |
| delete $row->{pronargs} if defined $row->{proargtypes}; |
| } |
| |
| # If a pg_type entry has an auto-generated array type, then its |
| # typarray field is a computed value too (see GenerateArrayTypes). |
| if ($catname eq 'pg_type') |
| { |
| delete $row->{typarray} if defined $row->{array_type_oid}; |
| } |
| |
| return; |
| } |
| |
| # Format the individual elements of a Perl hash into a valid string |
| # representation. We do this ourselves, rather than use native Perl |
| # facilities, so we can keep control over the exact formatting of the |
| # data files. |
| sub format_hash |
| { |
| my $data = shift; |
| my @orig_attnames = @_; |
| |
| # Copy attname to new array if it has a value, so we can determine |
| # the last populated element. We do this because we may have default |
| # values or empty metadata fields. |
| my @attnames; |
| foreach my $orig_attname (@orig_attnames) |
| { |
| push @attnames, $orig_attname |
| if defined $data->{$orig_attname}; |
| } |
| |
| # When calling this function, we ether have an open-bracket or a |
| # leading space already. |
| my $char_count = 1; |
| |
| my $threshold; |
| my $hash_str = ''; |
| my $element_count = 0; |
| |
| foreach my $attname (@attnames) |
| { |
| $element_count++; |
| |
| # To limit the line to 80 chars, we need to account for the |
| # trailing characters. |
| if ($element_count == $#attnames + 1) |
| { |
| # Last element, so allow space for ' },' |
| $threshold = 77; |
| } |
| else |
| { |
| # Just need space for trailing comma |
| $threshold = 79; |
| } |
| |
| if ($element_count > 1) |
| { |
| $hash_str .= ','; |
| $char_count++; |
| } |
| |
| my $value = $data->{$attname}; |
| |
| # Escape single quotes. |
| $value =~ s/'/\\'/g; |
| |
| # Include a leading space in the key-value pair, since this will |
| # always go after either a comma or an additional padding space on |
| # the next line. |
| my $element = " $attname => '$value'"; |
| my $element_length = length($element); |
| |
| # If adding the element to the current line would expand the line |
| # beyond 80 chars, put it on the next line. We don't do this for |
| # the first element, since that would create a blank line. |
| if ($element_count > 1 and $char_count + $element_length > $threshold) |
| { |
| |
| # Put on next line with an additional space preceding. There |
| # are now two spaces in front of the key-value pair, lining |
| # it up with the line above it. |
| $hash_str .= "\n $element"; |
| $char_count = $element_length + 1; |
| } |
| else |
| { |
| $hash_str .= $element; |
| $char_count += $element_length; |
| } |
| } |
| return $hash_str; |
| } |
| |
| sub usage |
| { |
| die <<EOM; |
| Usage: reformat_dat_file.pl [options] datafile... |
| |
| Options: |
| --output PATH output directory (default '.') |
| --full-tuples write out full tuples, including default values |
| |
| Non-option arguments are the names of input .dat files. |
| Updated files are written to the output directory, |
| possibly overwriting the input files. |
| |
| EOM |
| } |