src/backend/catalog/Catalog.pm - cloudberry - Git at Google

 #----------------------------------------------------------------------
 #
 # Catalog.pm
 #    Perl module that extracts info from catalog files into Perl
 #    data structures
 #
 # Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
 # Portions Copyright (c) 1994, Regents of the University of California
 #
 # src/backend/catalog/Catalog.pm
 #
 #----------------------------------------------------------------------

 package Catalog;

 use strict;
 use warnings;

 use File::Compare;


 # Parses a catalog header file into a data structure describing the schema
 # of the catalog.
 sub ParseHeader
 {
 	my $input_file = shift;

 	# There are a few types which are given one name in the C source, but a
 	# different name at the SQL level.  These are enumerated here.
 	my %RENAME_ATTTYPE = (
 		'int16' => 'int2',
 		'int32' => 'int4',
 		'int64' => 'int8',
 		'Oid' => 'oid',
 		'NameData' => 'name',
 		'TransactionId' => 'xid',
 		'XLogRecPtr' => 'pg_lsn');

 	my %catalog;
 	my $declaring_attributes = 0;
 	my $is_varlen = 0;
 	my $is_client_code = 0;

 	$catalog{columns} = [];
 	$catalog{toasting} = [];
 	$catalog{indexing} = [];
 	$catalog{other_oids} = [];
 	$catalog{foreign_keys} = [];
 	$catalog{client_code} = [];

 	open(my $ifh, '<', $input_file) || die "$input_file: $!";

 	# Scan the input file.
 	while (<$ifh>)
 	{

 		# Set appropriate flag when we're in certain code sections.
 		if (/^#/)
 		{
 			$is_varlen = 1 if /^#ifdef\s+CATALOG_VARLEN/;
 			if (/^#ifdef\s+EXPOSE_TO_CLIENT_CODE/)
 			{
 				$is_client_code = 1;
 				next;
 			}
 			next if !$is_client_code;
 		}

 		if (!$is_client_code)
 		{
 			# Strip C-style comments.
 			s;/\*(.|\n)*\*/;;g;
 			if (m;/\*;)
 			{

 				# handle multi-line comments properly.
 				my $next_line = <$ifh>;
 				die "$input_file: ends within C-style comment\n"
 				  if !defined $next_line;
 				$_ .= $next_line;
 				redo;
 			}

 			# Strip useless whitespace and trailing semicolons.
 			chomp;
 			s/^\s+//;
 			s/;\s*$//;
 			s/\s+/ /g;
 		}

 		# Push the data into the appropriate data structure.
 		# Caution: when adding new recognized OID-defining macros,
 		# also update src/include/catalog/renumber_oids.pl.
 		if (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
 		{
 			push @{ $catalog{toasting} },
 			  { parent_table => $1, toast_oid => $2, toast_index_oid => $3 };
 		}
 		elsif (
 			/^DECLARE_TOAST_WITH_MACRO\(\s*(\w+),\s*(\d+),\s*(\d+),\s*(\w+),\s*(\w+)\)/
 		  )
 		{
 			push @{ $catalog{toasting} },
 			  {
 				parent_table => $1,
 				toast_oid => $2,
 				toast_index_oid => $3,
 				toast_oid_macro => $4,
 				toast_index_oid_macro => $5
 			  };
 		}
 		elsif (
 			/^DECLARE_(UNIQUE_)?INDEX(_PKEY)?\(\s*(\w+),\s*(\d+),\s*(\w+),\s*(.+)\)/
 		  )
 		{
 			push @{ $catalog{indexing} },
 			  {
 				is_unique => $1 ? 1 : 0,
 				is_pkey => $2 ? 1 : 0,
 				index_name => $3,
 				index_oid => $4,
 				index_oid_macro => $5,
 				index_decl => $6
 			  };
 		}
 		elsif (/^DECLARE_OID_DEFINING_MACRO\(\s*(\w+),\s*(\d+)\)/)
 		{
 			push @{ $catalog{other_oids} },
 			  {
 				other_name => $1,
 				other_oid => $2
 			  };
 		}
 		elsif (
 			/^DECLARE_(ARRAY_)?FOREIGN_KEY(_OPT)?\(\s*\(([^)]+)\),\s*(\w+),\s*\(([^)]+)\)\)/
 		  )
 		{
 			push @{ $catalog{foreign_keys} },
 			  {
 				is_array => $1 ? 1 : 0,
 				is_opt => $2 ? 1 : 0,
 				fk_cols => $3,
 				pk_table => $4,
 				pk_cols => $5
 			  };
 		}
 		elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/)
 		{
 			$catalog{catname} = $1;
 			$catalog{relation_oid} = $2;
 			$catalog{relation_oid_macro} = $3;

 			$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
 			$catalog{shared_relation} =
 			  /BKI_SHARED_RELATION/ ? ' shared_relation' : '';
 			if (/BKI_ROWTYPE_OID\((\d+),(\w+)\)/)
 			{
 				$catalog{rowtype_oid} = $1;
 				$catalog{rowtype_oid_clause} = " rowtype_oid $1";
 				$catalog{rowtype_oid_macro} = $2;
 			}
 			else
 			{
 				$catalog{rowtype_oid} = '';
 				$catalog{rowtype_oid_clause} = '';
 				$catalog{rowtype_oid_macro} = '';
 			}
 			$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
 			$declaring_attributes = 1;
 		}
 		elsif ($is_client_code)
 		{
 			if (/^#endif/)
 			{
 				$is_client_code = 0;
 			}
 			else
 			{
 				push @{ $catalog{client_code} }, $_;
 			}
 		}
 		elsif ($declaring_attributes)
 		{
 			next if (/^{|^$/);
 			if (/^}/)
 			{
 				$declaring_attributes = 0;
 			}
 			else
 			{
 				my %column;
 				my @attopts = split /\s+/, $_;
 				my $atttype = shift @attopts;
 				my $attname = shift @attopts;
 				die "parse error ($input_file)"
 				  unless ($attname and $atttype);

 				if (exists $RENAME_ATTTYPE{$atttype})
 				{
 					$atttype = $RENAME_ATTTYPE{$atttype};
 				}

 				# If the C name ends with '[]' or '[digits]', we have
 				# an array type, so we discard that from the name and
 				# prepend '_' to the type.
 				if ($attname =~ /(\w+)\[\d*\]/)
 				{
 					$attname = $1;
 					$atttype = '_' . $atttype;
 				}

 				$column{type} = $atttype;
 				$column{name} = $attname;
 				$column{is_varlen} = 1 if $is_varlen;

 				foreach my $attopt (@attopts)
 				{
 					if ($attopt eq 'BKI_FORCE_NULL')
 					{
 						$column{forcenull} = 1;
 					}
 					elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
 					{
 						$column{forcenotnull} = 1;
 					}

 					# We use quotes for values like \0 and \054, to
 					# make sure all compilers and syntax highlighters
 					# can recognize them properly.
 					elsif ($attopt =~ /BKI_DEFAULT\(['"]?([^'"]+)['"]?\)/)
 					{
 						$column{default} = $1;
 					}
 					elsif (
 						$attopt =~ /BKI_ARRAY_DEFAULT\(['"]?([^'"]+)['"]?\)/)
 					{
 						$column{array_default} = $1;
 					}
 					elsif ($attopt =~ /BKI_LOOKUP(_OPT)?\((\w+)\)/)
 					{
 						$column{lookup} = $2;
 						$column{lookup_opt} = $1 ? 1 : 0;
 						# BKI_LOOKUP implicitly makes an FK reference
 						push @{ $catalog{foreign_keys} },
 						  {
 							is_array => (
 								$atttype eq 'oidvector' || $atttype eq '_oid')
 							? 1
 							: 0,
 							is_opt => $column{lookup_opt},
 							fk_cols => $attname,
 							pk_table => $column{lookup},
 							pk_cols => 'oid'
 						  };
 					}
 					else
 					{
 						die
 						  "unknown or misformatted column option $attopt on column $attname";
 					}

 					if ($column{forcenull} and $column{forcenotnull})
 					{
 						die "$attname is forced both null and not null";
 					}
 				}
 				push @{ $catalog{columns} }, \%column;
 			}
 		}
 	}
 	close $ifh;
 	return \%catalog;
 }

 # Parses a file containing Perl data structure literals, returning live data.
 #
 # The parameter $preserve_formatting needs to be set for callers that want
 # to work with non-data lines in the data files, such as comments and blank
 # lines. If a caller just wants to consume the data, leave it unset.
 sub ParseData
 {
 	my ($input_file, $schema, $preserve_formatting) = @_;

 	open(my $ifd, '<', $input_file) || die "$input_file: $!";
 	$input_file =~ /(\w+)\.dat$/
 	  or die "Input file $input_file needs to be a .dat file.\n";
 	my $catname = $1;
 	my $data    = [];

 	# Scan the input file.
 	while (<$ifd>)
 	{
 		my $hash_ref;

 		if (/{/)
 		{
 			# Capture the hash ref
 			# NB: Assumes that the next hash ref can't start on the
 			# same line where the present one ended.
 			# Not foolproof, but we shouldn't need a full parser,
 			# since we expect relatively well-behaved input.

 			# Quick hack to detect when we have a full hash ref to
 			# parse. We can't just use a regex because of values in
 			# pg_aggregate and pg_proc like '{0,0}'.  This will need
 			# work if we ever need to allow unbalanced braces within
 			# a field value.
 			my $lcnt = tr/{//;
 			my $rcnt = tr/}//;

 			if ($lcnt == $rcnt)
 			{
 				# We're treating the input line as a piece of Perl, so we
 				# need to use string eval here. Tell perlcritic we know what
 				# we're doing.
 				eval '$hash_ref = ' . $_;   ## no critic (ProhibitStringyEval)
 				if (!ref $hash_ref)
 				{
 					die "$input_file: error parsing line $.:\n$_\n";
 				}

 				# Annotate each hash with the source line number.
 				$hash_ref->{line_number} = $.;

 				# Expand tuples to their full representation.
 				AddDefaultValues($hash_ref, $schema, $catname);
 			}
 			else
 			{
 				my $next_line = <$ifd>;
 				die "$input_file: file ends within Perl hash\n"
 				  if !defined $next_line;
 				$_ .= $next_line;
 				redo;
 			}
 		}

 		# If we found a hash reference, keep it, unless it is marked as
 		# autogenerated; in that case it'd duplicate an entry we'll
 		# autogenerate below.  (This makes it safe for reformat_dat_file.pl
 		# with --full-tuples to print autogenerated entries, which seems like
 		# useful behavior for debugging.)
 		#
 		# Only keep non-data strings if we are told to preserve formatting.
 		if (defined $hash_ref)
 		{
 			push @$data, $hash_ref if !$hash_ref->{autogenerated};
 		}
 		elsif ($preserve_formatting)
 		{
 			push @$data, $_;
 		}
 	}
 	close $ifd;

 	# If this is pg_type, auto-generate array types too.
 	GenerateArrayTypes($schema, $data) if $catname eq 'pg_type';

 	#
 	# GPDB:
 	# Bitmap Index AM supports all the same operations as the B-tree index, it
 	# just stores the entries differently. Make copies of all the built-in B-tree
 	# operator classes and families for the bitmap AM, too.
 	#
 	GenerateBitmapOpFamilies($schema, $data) if $catname eq 'pg_opfamily';
 	GenerateBitmapOpClasses($schema, $data) if $catname eq 'pg_opclass';
 	GenerateBitmapAmOps($schema, $data) if $catname eq 'pg_amop';
 	GenerateBitmapAmProcs($schema, $data) if $catname eq 'pg_amproc';

 	return $data;
 }

 # Fill in default values of a record using the given schema.
 # It's the caller's responsibility to specify other values beforehand.
 sub AddDefaultValues
 {
 	my ($row, $schema, $catname) = @_;
 	my @missing_fields;

 	# Compute special-case column values.
 	# Note: If you add new cases here, you must also teach
 	# strip_default_values() in include/catalog/reformat_dat_file.pl
 	# to delete them.
 	if ($catname eq 'pg_proc')
 	{
 		# pg_proc.pronargs can be derived from proargtypes.
 		if (defined $row->{proargtypes})
 		{
 			my @proargtypes = split /\s+/, $row->{proargtypes};
 			$row->{pronargs} = scalar(@proargtypes);
 		}
 	}

 	# Now fill in defaults, and note any columns that remain undefined.
 	foreach my $column (@$schema)
 	{
 		my $attname = $column->{name};

 		# No work if field already has a value.
 		next if defined $row->{$attname};

 		# Ignore 'oid' columns, they're handled elsewhere.
 		next if $attname eq 'oid';

 		# If column has a default value, fill that in.
 		if (defined $column->{default})
 		{
 			$row->{$attname} = $column->{default};
 			next;
 		}

 		# Failed to find a value for this field.
 		push @missing_fields, $attname;
 	}

 	# Failure to provide all columns is a hard error.
 	if (@missing_fields)
 	{
 		die sprintf "missing values for field(s) %s in %s.dat line %s\n",
 		  join(', ', @missing_fields), $catname, $row->{line_number};
 	}
 }

 # If a pg_type entry has an array_type_oid metadata field,
 # auto-generate an entry for its array type.
 sub GenerateArrayTypes
 {
 	my $pgtype_schema = shift;
 	my $types = shift;
 	my @array_types;

 	foreach my $elem_type (@$types)
 	{
 		next if !(ref $elem_type eq 'HASH');
 		next if !defined($elem_type->{array_type_oid});

 		my %array_type;

 		# Set up metadata fields for array type.
 		$array_type{oid} = $elem_type->{array_type_oid};
 		$array_type{autogenerated} = 1;
 		$array_type{line_number} = $elem_type->{line_number};

 		# Set up column values derived from the element type.
 		$array_type{typname} = '_' . $elem_type->{typname};
 		$array_type{typelem} = $elem_type->{typname};

 		# Arrays require INT alignment, unless the element type requires
 		# DOUBLE alignment.
 		$array_type{typalign} = $elem_type->{typalign} eq 'd' ? 'd' : 'i';

 		# Fill in the rest of the array entry's fields.
 		foreach my $column (@$pgtype_schema)
 		{
 			my $attname = $column->{name};

 			# Skip if we already set it above.
 			next if defined $array_type{$attname};

 			# Apply the BKI_ARRAY_DEFAULT setting if there is one,
 			# otherwise copy the field from the element type.
 			if (defined $column->{array_default})
 			{
 				$array_type{$attname} = $column->{array_default};
 			}
 			else
 			{
 				$array_type{$attname} = $elem_type->{$attname};
 			}
 		}

 		# Lastly, cross-link the array to the element type.
 		$elem_type->{typarray} = $array_type{typname};

 		push @array_types, \%array_type;
 	}

 	push @$types, @array_types;

 	return;
 }

 sub GenerateBitmapOpFamilies
 {
 	my $pgopfamily_schema = shift;
 	my $opfamilies         = shift;
 	my @bitmap_opfamilies;

 	foreach my $opfamily (@$opfamilies)
 	{
 		next if !(ref $opfamily eq 'HASH');
 		next if !($opfamily->{opfmethod} eq 'btree');

 		my %bitmap_opfamily;

 		# Set up metadata fields for the bitmap opfamily.
 		%bitmap_opfamily = %{$opfamily};
 		$bitmap_opfamily{autogenerated} = 1;
 		$bitmap_opfamily{opfmethod} = 'bitmap';

 		# Clear out OIDs
 		delete $bitmap_opfamily{oid};
 		delete $bitmap_opfamily{oid_symbol};

 		push @bitmap_opfamilies, \%bitmap_opfamily;
 	}

 	push @$opfamilies, @bitmap_opfamilies;

 	return;
 }

 sub GenerateBitmapOpClasses
 {
 	my $pgopclass_schema = shift;
 	my $opclasses         = shift;
 	my @bitmap_opclasses;

 	foreach my $opclass (@$opclasses)
 	{
 		next if !(ref $opclass eq 'HASH');
 		next if !($opclass->{opcmethod} eq 'btree');

 		my %bitmap_opclass;

 		# Set up metadata fields for the bitmap opclass.
 		%bitmap_opclass = %{$opclass};
 		$bitmap_opclass{autogenerated} = 1;
 		$bitmap_opclass{opcmethod} = 'bitmap';
 		$bitmap_opclass{opcfamily} =~ s/btree/bitmap/;

 		# Undo the "ugly little hack" on name_ops opclass to use
 		# 'cstring' as the key type for 'name' datatype.
 		if ($opclass->{opcname} eq 'name_ops')
 		{
 			$bitmap_opclass{opckeytype} = 0;
 		}

 		# Clear out OIDs
 		delete $bitmap_opclass{oid};
 		delete $bitmap_opclass{oid_symbol};

 		push @bitmap_opclasses, \%bitmap_opclass;
 	}

 	push @$opclasses, @bitmap_opclasses;

 	return;
 }

 sub GenerateBitmapAmOps
 {
 	my $pgamop_schema = shift;
 	my $amops         = shift;
 	my @bitmap_amops;

 	foreach my $amop (@$amops)
 	{
 		next if !(ref $amop eq 'HASH');
 		next if !($amop->{amopmethod} eq 'btree');

 		my %bitmap_amop;

 		# Set up metadata fields for the bitmap opclass.
 		%bitmap_amop = %{$amop};
 		$bitmap_amop{autogenerated} = 1;
 		$bitmap_amop{amopmethod} = 'bitmap';
 		$bitmap_amop{amopfamily} =~ s/btree/bitmap/;

 		# Clear out OIDs
 		delete $bitmap_amop{oid};
 		delete $bitmap_amop{oid_symbol};

 		push @bitmap_amops, \%bitmap_amop;
 	}

 	push @$amops, @bitmap_amops;

 	return;
 }

 sub GenerateBitmapAmProcs
 {
 	my $pgamproc_schema = shift;
 	my $amprocs         = shift;
 	my @bitmap_amprocs;

 	foreach my $amproc (@$amprocs)
 	{
 		next if !(ref $amproc eq 'HASH');
 		next if !($amproc->{amprocfamily} =~ '^btree/');

 		my %bitmap_amproc;

 		# Set up metadata fields for the bitmap opclass.
 		%bitmap_amproc = %{$amproc};
 		$bitmap_amproc{autogenerated} = 1;
 		$bitmap_amproc{amprocfamily} =~ s/btree/bitmap/;

 		# Clear out OIDs
 		delete $bitmap_amproc{oid};
 		delete $bitmap_amproc{oid_symbol};

 		push @bitmap_amprocs, \%bitmap_amproc;
 	}

 	push @$amprocs, @bitmap_amprocs;

 	return;
 }

 # Rename temporary files to final names.
 # Call this function with the final file name and the .tmp extension.
 #
 # If the final file already exists and has identical contents, don't
 # overwrite it; this behavior avoids unnecessary recompiles due to
 # updating the mod date on unchanged header files.
 #
 # Note: recommended extension is ".tmp$$", so that parallel make steps
 # can't use the same temp files.
 sub RenameTempFile
 {
 	my $final_name = shift;
 	my $extension = shift;
 	my $temp_name = $final_name . $extension;

 	if (-f $final_name
 		&& compare($temp_name, $final_name) == 0)
 	{
 		unlink($temp_name) || die "unlink: $temp_name: $!";
 	}
 	else
 	{
 		rename($temp_name, $final_name) || die "rename: $temp_name: $!";
 	}
 	return;
 }

 # Find a symbol defined in a particular header file and extract the value.
 # include_path should be the path to src/include/.
 sub FindDefinedSymbol
 {
 	my ($catalog_header, $include_path, $symbol) = @_;
 	my $value;

 	# Make sure include path ends in a slash.
 	if (substr($include_path, -1) ne '/')
 	{
 		$include_path .= '/';
 	}
 	my $file = $include_path . $catalog_header;
 	open(my $find_defined_symbol, '<', $file) || die "$file: $!";
 	while (<$find_defined_symbol>)
 	{
 		if (/^#define\s+\Q$symbol\E\s+(\S+)/)
 		{
 			$value = $1;
 			last;
 		}
 	}
 	close $find_defined_symbol;
 	return $value if defined $value;
 	die "$file: no definition found for $symbol\n";
 }

 # Similar to FindDefinedSymbol, but looks in the bootstrap metadata.
 sub FindDefinedSymbolFromData
 {
 	my ($data, $symbol) = @_;
 	foreach my $row (@{$data})
 	{
 		if ($row->{oid_symbol} eq $symbol)
 		{
 			return $row->{oid};
 		}
 	}
 	die "no definition found for $symbol\n";
 }

 # Extract an array of all the OIDs assigned in the specified catalog headers
 # and their associated data files (if any).
 # Caution: genbki.pl contains equivalent logic; change it too if you need to
 # touch this.
 sub FindAllOidsFromHeaders
 {
 	my @input_files = @_;

 	my @oids = ();

 	foreach my $header (@input_files)
 	{
 		$header =~ /(.+)\.h$/
 		  or die "Input files need to be header files.\n";
 		my $datfile = "$1.dat";

 		my $catalog = Catalog::ParseHeader($header);

 		# We ignore the pg_class OID and rowtype OID of bootstrap catalogs,
 		# as those are expected to appear in the initial data for pg_class
 		# and pg_type.  For regular catalogs, include these OIDs.
 		if (!$catalog->{bootstrap})
 		{
 			push @oids, $catalog->{relation_oid}
 			  if ($catalog->{relation_oid});
 			push @oids, $catalog->{rowtype_oid} if ($catalog->{rowtype_oid});
 		}

 		# Not all catalogs have a data file.
 		if (-e $datfile)
 		{
 			my $catdata =
 			  Catalog::ParseData($datfile, $catalog->{columns}, 0);

 			foreach my $row (@$catdata)
 			{
 				push @oids, $row->{oid} if defined $row->{oid};
 			}
 		}

 		foreach my $toast (@{ $catalog->{toasting} })
 		{
 			push @oids, $toast->{toast_oid}, $toast->{toast_index_oid};
 		}
 		foreach my $index (@{ $catalog->{indexing} })
 		{
 			push @oids, $index->{index_oid};
 		}
 		foreach my $other (@{ $catalog->{other_oids} })
 		{
 			push @oids, $other->{other_oid};
 		}
 	}

 	return \@oids;
 }

 1;
	#----------------------------------------------------------------------
	#
	# Catalog.pm
	# Perl module that extracts info from catalog files into Perl
	# data structures
	#
	# Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
	# Portions Copyright (c) 1994, Regents of the University of California
	#
	# src/backend/catalog/Catalog.pm
	#
	#----------------------------------------------------------------------

	package Catalog;

	use strict;
	use warnings;

	use File::Compare;


	# Parses a catalog header file into a data structure describing the schema
	# of the catalog.
	sub ParseHeader
	{
	my $input_file = shift;

	# There are a few types which are given one name in the C source, but a
	# different name at the SQL level. These are enumerated here.
	my %RENAME_ATTTYPE = (
	'int16' => 'int2',
	'int32' => 'int4',
	'int64' => 'int8',
	'Oid' => 'oid',
	'NameData' => 'name',
	'TransactionId' => 'xid',
	'XLogRecPtr' => 'pg_lsn');

	my %catalog;
	my $declaring_attributes = 0;
	my $is_varlen = 0;
	my $is_client_code = 0;

	$catalog{columns} = [];
	$catalog{toasting} = [];
	$catalog{indexing} = [];
	$catalog{other_oids} = [];
	$catalog{foreign_keys} = [];
	$catalog{client_code} = [];

	open(my $ifh, '<', $input_file) \|\| die "$input_file: $!";

	# Scan the input file.
	while (<$ifh>)
	{

	# Set appropriate flag when we're in certain code sections.
	if (/^#/)
	{
	$is_varlen = 1 if /^#ifdef\s+CATALOG_VARLEN/;
	if (/^#ifdef\s+EXPOSE_TO_CLIENT_CODE/)
	{
	$is_client_code = 1;
	next;
	}
	next if !$is_client_code;
	}

	if (!$is_client_code)
	{
	# Strip C-style comments.
	s;/\(.\|\n)\*/;;g;
	if (m;/\*;)
	{

	# handle multi-line comments properly.
	my $next_line = <$ifh>;
	die "$input_file: ends within C-style comment\n"
	if !defined $next_line;
	$_ .= $next_line;
	redo;
	}

	# Strip useless whitespace and trailing semicolons.
	chomp;
	s/^\s+//;
	s/;\s*$//;
	s/\s+/ /g;
	}

	# Push the data into the appropriate data structure.
	# Caution: when adding new recognized OID-defining macros,
	# also update src/include/catalog/renumber_oids.pl.
	if (/^DECLARE_TOAST\(\s(\w+),\s(\d+),\s*(\d+)\)/)
	{
	push @{ $catalog{toasting} },
	{ parent_table => $1, toast_oid => $2, toast_index_oid => $3 };
	}
	elsif (
	/^DECLARE_TOAST_WITH_MACRO\(\s(\w+),\s(\d+),\s(\d+),\s(\w+),\s*(\w+)\)/
	)
	{
	push @{ $catalog{toasting} },
	{
	parent_table => $1,
	toast_oid => $2,
	toast_index_oid => $3,
	toast_oid_macro => $4,
	toast_index_oid_macro => $5
	};
	}
	elsif (
	/^DECLARE_(UNIQUE_)?INDEX(_PKEY)?\(\s(\w+),\s(\d+),\s(\w+),\s(.+)\)/
	)
	{
	push @{ $catalog{indexing} },
	{
	is_unique => $1 ? 1 : 0,
	is_pkey => $2 ? 1 : 0,
	index_name => $3,
	index_oid => $4,
	index_oid_macro => $5,
	index_decl => $6
	};
	}
	elsif (/^DECLARE_OID_DEFINING_MACRO\(\s(\w+),\s(\d+)\)/)
	{
	push @{ $catalog{other_oids} },
	{
	other_name => $1,
	other_oid => $2
	};
	}
	elsif (
	/^DECLARE_(ARRAY_)?FOREIGN_KEY(_OPT)?\(\s\(([^)]+)\),\s(\w+),\s*\(([^)]+)\)\)/
	)
	{
	push @{ $catalog{foreign_keys} },
	{
	is_array => $1 ? 1 : 0,
	is_opt => $2 ? 1 : 0,
	fk_cols => $3,
	pk_table => $4,
	pk_cols => $5
	};
	}
	elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/)
	{
	$catalog{catname} = $1;
	$catalog{relation_oid} = $2;
	$catalog{relation_oid_macro} = $3;

	$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
	$catalog{shared_relation} =
	/BKI_SHARED_RELATION/ ? ' shared_relation' : '';
	if (/BKI_ROWTYPE_OID\((\d+),(\w+)\)/)
	{
	$catalog{rowtype_oid} = $1;
	$catalog{rowtype_oid_clause} = " rowtype_oid $1";
	$catalog{rowtype_oid_macro} = $2;
	}
	else
	{
	$catalog{rowtype_oid} = '';
	$catalog{rowtype_oid_clause} = '';
	$catalog{rowtype_oid_macro} = '';
	}
	$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
	$declaring_attributes = 1;
	}
	elsif ($is_client_code)
	{
	if (/^#endif/)
	{
	$is_client_code = 0;
	}
	else
	{
	push @{ $catalog{client_code} }, $_;
	}
	}
	elsif ($declaring_attributes)
	{
	next if (/^{\|^$/);
	if (/^}/)
	{
	$declaring_attributes = 0;
	}
	else
	{
	my %column;
	my @attopts = split /\s+/, $_;
	my $atttype = shift @attopts;
	my $attname = shift @attopts;
	die "parse error ($input_file)"
	unless ($attname and $atttype);

	if (exists $RENAME_ATTTYPE{$atttype})
	{
	$atttype = $RENAME_ATTTYPE{$atttype};
	}

	# If the C name ends with '[]' or '[digits]', we have
	# an array type, so we discard that from the name and
	# prepend '_' to the type.
	if ($attname =~ /(\w+)\[\d*\]/)
	{
	$attname = $1;
	$atttype = '_' . $atttype;
	}

	$column{type} = $atttype;
	$column{name} = $attname;
	$column{is_varlen} = 1 if $is_varlen;

	foreach my $attopt (@attopts)
	{
	if ($attopt eq 'BKI_FORCE_NULL')
	{
	$column{forcenull} = 1;
	}
	elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
	{
	$column{forcenotnull} = 1;
	}

	# We use quotes for values like \0 and \054, to
	# make sure all compilers and syntax highlighters
	# can recognize them properly.
	elsif ($attopt =~ /BKI_DEFAULT\(['"]?([^'"]+)['"]?\)/)
	{
	$column{default} = $1;
	}
	elsif (
	$attopt =~ /BKI_ARRAY_DEFAULT\(['"]?([^'"]+)['"]?\)/)
	{
	$column{array_default} = $1;
	}
	elsif ($attopt =~ /BKI_LOOKUP(_OPT)?\((\w+)\)/)
	{
	$column{lookup} = $2;
	$column{lookup_opt} = $1 ? 1 : 0;
	# BKI_LOOKUP implicitly makes an FK reference
	push @{ $catalog{foreign_keys} },
	{
	is_array => (
	$atttype eq 'oidvector' \|\| $atttype eq '_oid')
	? 1
	: 0,
	is_opt => $column{lookup_opt},
	fk_cols => $attname,
	pk_table => $column{lookup},
	pk_cols => 'oid'
	};
	}
	else
	{
	die
	"unknown or misformatted column option $attopt on column $attname";
	}

	if ($column{forcenull} and $column{forcenotnull})
	{
	die "$attname is forced both null and not null";
	}
	}
	push @{ $catalog{columns} }, \%column;
	}
	}
	}
	close $ifh;
	return \%catalog;
	}

	# Parses a file containing Perl data structure literals, returning live data.
	#
	# The parameter $preserve_formatting needs to be set for callers that want
	# to work with non-data lines in the data files, such as comments and blank
	# lines. If a caller just wants to consume the data, leave it unset.
	sub ParseData
	{
	my ($input_file, $schema, $preserve_formatting) = @_;

	open(my $ifd, '<', $input_file) \|\| die "$input_file: $!";
	$input_file =~ /(\w+)\.dat$/
	or die "Input file $input_file needs to be a .dat file.\n";
	my $catname = $1;
	my $data = [];

	# Scan the input file.
	while (<$ifd>)
	{
	my $hash_ref;

	if (/{/)
	{
	# Capture the hash ref
	# NB: Assumes that the next hash ref can't start on the
	# same line where the present one ended.
	# Not foolproof, but we shouldn't need a full parser,
	# since we expect relatively well-behaved input.

	# Quick hack to detect when we have a full hash ref to
	# parse. We can't just use a regex because of values in
	# pg_aggregate and pg_proc like '{0,0}'. This will need
	# work if we ever need to allow unbalanced braces within
	# a field value.
	my $lcnt = tr/{//;
	my $rcnt = tr/}//;

	if ($lcnt == $rcnt)
	{
	# We're treating the input line as a piece of Perl, so we
	# need to use string eval here. Tell perlcritic we know what
	# we're doing.
	eval '$hash_ref = ' . $_; ## no critic (ProhibitStringyEval)
	if (!ref $hash_ref)
	{
	die "$input_file: error parsing line $.:\n$_\n";
	}

	# Annotate each hash with the source line number.
	$hash_ref->{line_number} = $.;

	# Expand tuples to their full representation.
	AddDefaultValues($hash_ref, $schema, $catname);
	}
	else
	{
	my $next_line = <$ifd>;
	die "$input_file: file ends within Perl hash\n"
	if !defined $next_line;
	$_ .= $next_line;
	redo;
	}
	}

	# If we found a hash reference, keep it, unless it is marked as
	# autogenerated; in that case it'd duplicate an entry we'll
	# autogenerate below. (This makes it safe for reformat_dat_file.pl
	# with --full-tuples to print autogenerated entries, which seems like
	# useful behavior for debugging.)
	#
	# Only keep non-data strings if we are told to preserve formatting.
	if (defined $hash_ref)
	{
	push @$data, $hash_ref if !$hash_ref->{autogenerated};
	}
	elsif ($preserve_formatting)
	{
	push @$data, $_;
	}
	}
	close $ifd;

	# If this is pg_type, auto-generate array types too.
	GenerateArrayTypes($schema, $data) if $catname eq 'pg_type';

	#
	# GPDB:
	# Bitmap Index AM supports all the same operations as the B-tree index, it
	# just stores the entries differently. Make copies of all the built-in B-tree
	# operator classes and families for the bitmap AM, too.
	#
	GenerateBitmapOpFamilies($schema, $data) if $catname eq 'pg_opfamily';
	GenerateBitmapOpClasses($schema, $data) if $catname eq 'pg_opclass';
	GenerateBitmapAmOps($schema, $data) if $catname eq 'pg_amop';
	GenerateBitmapAmProcs($schema, $data) if $catname eq 'pg_amproc';

	return $data;
	}

	# Fill in default values of a record using the given schema.
	# It's the caller's responsibility to specify other values beforehand.
	sub AddDefaultValues
	{
	my ($row, $schema, $catname) = @_;
	my @missing_fields;

	# Compute special-case column values.
	# Note: If you add new cases here, you must also teach
	# strip_default_values() in include/catalog/reformat_dat_file.pl
	# to delete them.
	if ($catname eq 'pg_proc')
	{
	# pg_proc.pronargs can be derived from proargtypes.
	if (defined $row->{proargtypes})
	{
	my @proargtypes = split /\s+/, $row->{proargtypes};
	$row->{pronargs} = scalar(@proargtypes);
	}
	}

	# Now fill in defaults, and note any columns that remain undefined.
	foreach my $column (@$schema)
	{
	my $attname = $column->{name};

	# No work if field already has a value.
	next if defined $row->{$attname};

	# Ignore 'oid' columns, they're handled elsewhere.
	next if $attname eq 'oid';

	# If column has a default value, fill that in.
	if (defined $column->{default})
	{
	$row->{$attname} = $column->{default};
	next;
	}

	# Failed to find a value for this field.
	push @missing_fields, $attname;
	}

	# Failure to provide all columns is a hard error.
	if (@missing_fields)
	{
	die sprintf "missing values for field(s) %s in %s.dat line %s\n",
	join(', ', @missing_fields), $catname, $row->{line_number};
	}
	}

	# If a pg_type entry has an array_type_oid metadata field,
	# auto-generate an entry for its array type.
	sub GenerateArrayTypes
	{
	my $pgtype_schema = shift;
	my $types = shift;
	my @array_types;

	foreach my $elem_type (@$types)
	{
	next if !(ref $elem_type eq 'HASH');
	next if !defined($elem_type->{array_type_oid});

	my %array_type;

	# Set up metadata fields for array type.
	$array_type{oid} = $elem_type->{array_type_oid};
	$array_type{autogenerated} = 1;
	$array_type{line_number} = $elem_type->{line_number};

	# Set up column values derived from the element type.
	$array_type{typname} = '_' . $elem_type->{typname};
	$array_type{typelem} = $elem_type->{typname};

	# Arrays require INT alignment, unless the element type requires
	# DOUBLE alignment.
	$array_type{typalign} = $elem_type->{typalign} eq 'd' ? 'd' : 'i';

	# Fill in the rest of the array entry's fields.
	foreach my $column (@$pgtype_schema)
	{
	my $attname = $column->{name};

	# Skip if we already set it above.
	next if defined $array_type{$attname};

	# Apply the BKI_ARRAY_DEFAULT setting if there is one,
	# otherwise copy the field from the element type.
	if (defined $column->{array_default})
	{
	$array_type{$attname} = $column->{array_default};
	}
	else
	{
	$array_type{$attname} = $elem_type->{$attname};
	}
	}

	# Lastly, cross-link the array to the element type.
	$elem_type->{typarray} = $array_type{typname};

	push @array_types, \%array_type;
	}

	push @$types, @array_types;

	return;
	}

	sub GenerateBitmapOpFamilies
	{
	my $pgopfamily_schema = shift;
	my $opfamilies = shift;
	my @bitmap_opfamilies;

	foreach my $opfamily (@$opfamilies)
	{
	next if !(ref $opfamily eq 'HASH');
	next if !($opfamily->{opfmethod} eq 'btree');

	my %bitmap_opfamily;

	# Set up metadata fields for the bitmap opfamily.
	%bitmap_opfamily = %{$opfamily};
	$bitmap_opfamily{autogenerated} = 1;
	$bitmap_opfamily{opfmethod} = 'bitmap';

	# Clear out OIDs
	delete $bitmap_opfamily{oid};
	delete $bitmap_opfamily{oid_symbol};

	push @bitmap_opfamilies, \%bitmap_opfamily;
	}

	push @$opfamilies, @bitmap_opfamilies;

	return;
	}

	sub GenerateBitmapOpClasses
	{
	my $pgopclass_schema = shift;
	my $opclasses = shift;
	my @bitmap_opclasses;

	foreach my $opclass (@$opclasses)
	{
	next if !(ref $opclass eq 'HASH');
	next if !($opclass->{opcmethod} eq 'btree');

	my %bitmap_opclass;

	# Set up metadata fields for the bitmap opclass.
	%bitmap_opclass = %{$opclass};
	$bitmap_opclass{autogenerated} = 1;
	$bitmap_opclass{opcmethod} = 'bitmap';
	$bitmap_opclass{opcfamily} =~ s/btree/bitmap/;

	# Undo the "ugly little hack" on name_ops opclass to use
	# 'cstring' as the key type for 'name' datatype.
	if ($opclass->{opcname} eq 'name_ops')
	{
	$bitmap_opclass{opckeytype} = 0;
	}

	# Clear out OIDs
	delete $bitmap_opclass{oid};
	delete $bitmap_opclass{oid_symbol};

	push @bitmap_opclasses, \%bitmap_opclass;
	}

	push @$opclasses, @bitmap_opclasses;

	return;
	}

	sub GenerateBitmapAmOps
	{
	my $pgamop_schema = shift;
	my $amops = shift;
	my @bitmap_amops;

	foreach my $amop (@$amops)
	{
	next if !(ref $amop eq 'HASH');
	next if !($amop->{amopmethod} eq 'btree');

	my %bitmap_amop;

	# Set up metadata fields for the bitmap opclass.
	%bitmap_amop = %{$amop};
	$bitmap_amop{autogenerated} = 1;
	$bitmap_amop{amopmethod} = 'bitmap';
	$bitmap_amop{amopfamily} =~ s/btree/bitmap/;

	# Clear out OIDs
	delete $bitmap_amop{oid};
	delete $bitmap_amop{oid_symbol};

	push @bitmap_amops, \%bitmap_amop;
	}

	push @$amops, @bitmap_amops;

	return;
	}

	sub GenerateBitmapAmProcs
	{
	my $pgamproc_schema = shift;
	my $amprocs = shift;
	my @bitmap_amprocs;

	foreach my $amproc (@$amprocs)
	{
	next if !(ref $amproc eq 'HASH');
	next if !($amproc->{amprocfamily} =~ '^btree/');

	my %bitmap_amproc;

	# Set up metadata fields for the bitmap opclass.
	%bitmap_amproc = %{$amproc};
	$bitmap_amproc{autogenerated} = 1;
	$bitmap_amproc{amprocfamily} =~ s/btree/bitmap/;

	# Clear out OIDs
	delete $bitmap_amproc{oid};
	delete $bitmap_amproc{oid_symbol};

	push @bitmap_amprocs, \%bitmap_amproc;
	}

	push @$amprocs, @bitmap_amprocs;

	return;
	}

	# Rename temporary files to final names.
	# Call this function with the final file name and the .tmp extension.
	#
	# If the final file already exists and has identical contents, don't
	# overwrite it; this behavior avoids unnecessary recompiles due to
	# updating the mod date on unchanged header files.
	#
	# Note: recommended extension is ".tmp$$", so that parallel make steps
	# can't use the same temp files.
	sub RenameTempFile
	{
	my $final_name = shift;
	my $extension = shift;
	my $temp_name = $final_name . $extension;

	if (-f $final_name
	&& compare($temp_name, $final_name) == 0)
	{
	unlink($temp_name) \|\| die "unlink: $temp_name: $!";
	}
	else
	{
	rename($temp_name, $final_name) \|\| die "rename: $temp_name: $!";
	}
	return;
	}

	# Find a symbol defined in a particular header file and extract the value.
	# include_path should be the path to src/include/.
	sub FindDefinedSymbol
	{
	my ($catalog_header, $include_path, $symbol) = @_;
	my $value;

	# Make sure include path ends in a slash.
	if (substr($include_path, -1) ne '/')
	{
	$include_path .= '/';
	}
	my $file = $include_path . $catalog_header;
	open(my $find_defined_symbol, '<', $file) \|\| die "$file: $!";
	while (<$find_defined_symbol>)
	{
	if (/^#define\s+\Q$symbol\E\s+(\S+)/)
	{
	$value = $1;
	last;
	}
	}
	close $find_defined_symbol;
	return $value if defined $value;
	die "$file: no definition found for $symbol\n";
	}

	# Similar to FindDefinedSymbol, but looks in the bootstrap metadata.
	sub FindDefinedSymbolFromData
	{
	my ($data, $symbol) = @_;
	foreach my $row (@{$data})
	{
	if ($row->{oid_symbol} eq $symbol)
	{
	return $row->{oid};
	}
	}
	die "no definition found for $symbol\n";
	}

	# Extract an array of all the OIDs assigned in the specified catalog headers
	# and their associated data files (if any).
	# Caution: genbki.pl contains equivalent logic; change it too if you need to
	# touch this.
	sub FindAllOidsFromHeaders
	{
	my @input_files = @_;

	my @oids = ();

	foreach my $header (@input_files)
	{
	$header =~ /(.+)\.h$/
	or die "Input files need to be header files.\n";
	my $datfile = "$1.dat";

	my $catalog = Catalog::ParseHeader($header);

	# We ignore the pg_class OID and rowtype OID of bootstrap catalogs,
	# as those are expected to appear in the initial data for pg_class
	# and pg_type. For regular catalogs, include these OIDs.
	if (!$catalog->{bootstrap})
	{
	push @oids, $catalog->{relation_oid}
	if ($catalog->{relation_oid});
	push @oids, $catalog->{rowtype_oid} if ($catalog->{rowtype_oid});
	}

	# Not all catalogs have a data file.
	if (-e $datfile)
	{
	my $catdata =
	Catalog::ParseData($datfile, $catalog->{columns}, 0);

	foreach my $row (@$catdata)
	{
	push @oids, $row->{oid} if defined $row->{oid};
	}
	}

	foreach my $toast (@{ $catalog->{toasting} })
	{
	push @oids, $toast->{toast_oid}, $toast->{toast_index_oid};
	}
	foreach my $index (@{ $catalog->{indexing} })
	{
	push @oids, $index->{index_oid};
	}
	foreach my $other (@{ $catalog->{other_oids} })
	{
	push @oids, $other->{other_oid};
	}
	}

	return \@oids;
	}

	1;