src/common/unicode/generate-unicode_combining_table.pl - cloudberry - Git at Google

 #!/usr/bin/perl
 #
 # Generate sorted list of non-overlapping intervals of non-spacing
 # characters, using Unicode data files as input.  Pass UnicodeData.txt
 # as argument.  The output is on stdout.
 #
 # Copyright (c) 2019-2021, PostgreSQL Global Development Group

 use strict;
 use warnings;

 my $range_start = undef;
 my $codepoint;
 my $prev_codepoint;
 my $count = 0;

 print
   "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";

 print "static const struct mbinterval combining[] = {\n";

 foreach my $line (<ARGV>)
 {
 	chomp $line;
 	my @fields = split ';', $line;
 	$codepoint = hex $fields[0];

 	next if $codepoint > 0xFFFF;

 	if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
 	{
 		# combining character, save for start of range
 		if (!defined($range_start))
 		{
 			$range_start = $codepoint;
 		}
 	}
 	else
 	{
 		# not a combining character, print out previous range if any
 		if (defined($range_start))
 		{
 			printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
 			$range_start = undef;
 		}
 	}
 }
 continue
 {
 	$prev_codepoint = $codepoint;
 }

 print "};\n";
	#!/usr/bin/perl
	#
	# Generate sorted list of non-overlapping intervals of non-spacing
	# characters, using Unicode data files as input. Pass UnicodeData.txt
	# as argument. The output is on stdout.
	#
	# Copyright (c) 2019-2021, PostgreSQL Global Development Group

	use strict;
	use warnings;

	my $range_start = undef;
	my $codepoint;
	my $prev_codepoint;
	my $count = 0;

	print
	"/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";

	print "static const struct mbinterval combining[] = {\n";

	foreach my $line (<ARGV>)
	{
	chomp $line;
	my @fields = split ';', $line;
	$codepoint = hex $fields[0];

	next if $codepoint > 0xFFFF;

	if ($fields[2] eq 'Me' \|\| $fields[2] eq 'Mn')
	{
	# combining character, save for start of range
	if (!defined($range_start))
	{
	$range_start = $codepoint;
	}
	}
	else
	{
	# not a combining character, print out previous range if any
	if (defined($range_start))
	{
	printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
	$range_start = undef;
	}
	}
	}
	continue
	{
	$prev_codepoint = $codepoint;
	}

	print "};\n";