.github/scripts/benchmark_formatter.py - casbin - Git at Google

 import pathlib, re, sys

 try:
     p = pathlib.Path("comparison.md")
     if not p.exists():
         print("comparison.md not found, skipping post-processing.")
         sys.exit(0)

     lines = p.read_text(encoding="utf-8").splitlines()
     processed_lines = []
     in_code = False
     def strip_worker_suffix(text: str) -> str:
         return re.sub(r'(\S+?)-\d+(\s|$)', r'\1\2', text)

     def get_icon(diff_val: float) -> str:
         if diff_val > 10:
             return "🐌"
         if diff_val < -10:
             return "🚀"
         return "➡️"

     def clean_superscripts(text: str) -> str:
         return re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹⁰]', '', text)

     def parse_val(token: str):
         if '%' in token or '=' in token:
             return None
         token = clean_superscripts(token)
         token = token.split('±')[0].strip()
         token = token.split('(')[0].strip()
         if not token:
             return None

         m = re.match(r'^([-+]?\d*\.?\d+)([a-zA-Zµ]+)?$', token)
         if not m:
             return None
         try:
             val = float(m.group(1))
         except ValueError:
             return None
         suffix = (m.group(2) or "").replace("µ", "u")
         multipliers = {
             "n": 1e-9,
             "ns": 1e-9,
             "u": 1e-6,
             "us": 1e-6,
             "m": 1e-3,
             "ms": 1e-3,
             "s": 1.0,
             "k": 1e3,
             "K": 1e3,
             "M": 1e6,
             "G": 1e9,
             "Ki": 1024.0,
             "Mi": 1024.0**2,
             "Gi": 1024.0**3,
             "Ti": 1024.0**4,
             "B": 1.0,
             "B/op": 1.0,
             "C": 1.0,  # tolerate degree/unit markers that don't affect ratio
         }
         return val * multipliers.get(suffix, 1.0)

     def extract_two_numbers(tokens):
         found = []
         for t in tokens[1:]:  # skip name
             if t in {"±", "∞", "~", "│", "│"}:
                 continue
             if '%' in t or '=' in t:
                 continue
             val = parse_val(t)
             if val is not None:
                 found.append(val)
                 if len(found) == 2:
                     break
         return found

     # Pass 0:
     # 1. find a header line with pipes to derive alignment hint
     # 2. calculate max content width to ensure right-most alignment
     max_content_width = 0

     for line in lines:
         if line.strip() == "```":
             in_code = not in_code
             continue
         if not in_code:
             continue

         # Skip footnotes/meta for width calculation
         if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
             continue
         if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:', 'cpu:')):
             continue
         # Header lines are handled separately in Pass 1
         if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
             continue

         # It's likely a data line
         # Check if it has an existing percentage we might move/align
         curr_line = strip_worker_suffix(line).rstrip()
         pct_match = re.search(r'([+-]?\d+\.\d+)%', curr_line)
         if pct_match:
             # If we are going to realign this, we count width up to the percentage
             w = len(curr_line[:pct_match.start()].rstrip())
         else:
             w = len(curr_line)

         if w > max_content_width:
             max_content_width = w

     # Calculate global alignment target for Diff column
     # Ensure target column is beyond the longest line with some padding
     diff_col_start = max_content_width + 4

     # Calculate right boundary (pipe) position
     # Diff column width ~12 chars (e.g. "+100.00% 🚀")
     right_boundary = diff_col_start + 14

     # Reset code fence tracking state for Pass 1
     in_code = False
     for line in lines:

         if line.strip() == "```":
             in_code = not in_code
             processed_lines.append(line)
             continue

         if not in_code:
             processed_lines.append(line)
             continue

         # footnotes keep untouched
         if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
             processed_lines.append(line)
             continue

         # header lines: ensure last column labeled Diff and force alignment
         if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
             # Strip trailing pipe and whitespace
             stripped_header = line.rstrip().rstrip('│').rstrip()

             # If "vs base" is present, ensure we don't duplicate "Diff" if it's already there
             # But we want to enforce OUR alignment, so we might strip existing Diff
             stripped_header = re.sub(r'\s+Diff\s*$', '', stripped_header, flags=re.IGNORECASE)
             stripped_header = re.sub(r'\s+Delta\b', '', stripped_header, flags=re.IGNORECASE)

             # Pad to diff_col_start
             if len(stripped_header) < diff_col_start:
                 new_header = stripped_header + " " * (diff_col_start - len(stripped_header))
             else:
                 new_header = stripped_header + "  "

             # Add Diff column header if it's the second header row (vs base)
             if 'vs base' in line:
                 new_header += "Diff"

             # Add closing pipe at the right boundary
             current_len = len(new_header)
             if current_len < right_boundary:
                 new_header += " " * (right_boundary - current_len)

             new_header += "│"
             processed_lines.append(new_header)
             continue

         # non-data meta lines
         if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:')):
             processed_lines.append(line)
             continue

         line = strip_worker_suffix(line)
         tokens = line.split()
         if not tokens:
             processed_lines.append(line)
             continue

         numbers = extract_two_numbers(tokens)
         pct_match = re.search(r'([+-]?\d+\.\d+)%', line)

         # Helper to align and append
         def append_aligned(left_part, content):
             if len(left_part) < diff_col_start:
                 aligned = left_part + " " * (diff_col_start - len(left_part))
             else:
                 aligned = left_part + "  "

             # Ensure content doesn't exceed right boundary (visual check only, we don't truncate)
             # But users asked not to exceed header pipe.
             # Header pipe is at right_boundary.
             # Content starts at diff_col_start.
             # So content length should be <= right_boundary - diff_col_start
             return f"{aligned}{content}"

         # Special handling for geomean when values missing or zero
         is_geomean = tokens[0] == "geomean"
         if is_geomean and (len(numbers) < 2 or any(v == 0 for v in numbers)) and not pct_match:
             leading = re.match(r'^\s*', line).group(0)
             left = f"{leading}geomean"
             processed_lines.append(append_aligned(left, "n/a (has zero)"))
             continue

         # when both values are zero, force diff = 0 and align
         if len(numbers) == 2 and numbers[0] == 0 and numbers[1] == 0:
             diff_val = 0.0
             icon = get_icon(diff_val)
             left = line.rstrip()
             processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
             continue

         # recompute diff when we have two numeric values
         if len(numbers) == 2 and numbers[0] != 0:
             diff_val = (numbers[1] - numbers[0]) / numbers[0] * 100
             icon = get_icon(diff_val)

             left = line
             if pct_match:
                 left = line[:pct_match.start()].rstrip()
             else:
                 left = line.rstrip()

             processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
             continue

         # fallback: align existing percentage to Diff column and (re)append icon
         if pct_match:
             try:
                 pct_val = float(pct_match.group(1))
                 icon = get_icon(pct_val)

                 left = line[:pct_match.start()].rstrip()
                 suffix = line[pct_match.end():]
                 # Remove any existing icon after the percentage to avoid duplicates
                 suffix = re.sub(r'\s*(🐌|🚀|➡️)', '', suffix)

                 processed_lines.append(append_aligned(left, f"{pct_val:+.2f}% {icon}{suffix}"))
             except ValueError:
                 processed_lines.append(line)
             continue

         # If we cannot parse numbers or percentages, keep the original (only worker suffix stripped)
         processed_lines.append(line)

     p.write_text("\n".join(processed_lines) + "\n", encoding="utf-8")

 except Exception as e:
     print(f"Error post-processing comparison.md: {e}")
     sys.exit(1)
	import pathlib, re, sys

	try:
	p = pathlib.Path("comparison.md")
	if not p.exists():
	print("comparison.md not found, skipping post-processing.")
	sys.exit(0)

	lines = p.read_text(encoding="utf-8").splitlines()
	processed_lines = []
	in_code = False
	def strip_worker_suffix(text: str) -> str:
	return re.sub(r'(\S+?)-\d+(\s\|$)', r'\1\2', text)

	def get_icon(diff_val: float) -> str:
	if diff_val > 10:
	return "🐌"
	if diff_val < -10:
	return "🚀"
	return "➡️"

	def clean_superscripts(text: str) -> str:
	return re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹⁰]', '', text)

	def parse_val(token: str):
	if '%' in token or '=' in token:
	return None
	token = clean_superscripts(token)
	token = token.split('±')[0].strip()
	token = token.split('(')[0].strip()
	if not token:
	return None

	m = re.match(r'^([-+]?\d*\.?\d+)([a-zA-Zµ]+)?$', token)
	if not m:
	return None
	try:
	val = float(m.group(1))
	except ValueError:
	return None
	suffix = (m.group(2) or "").replace("µ", "u")
	multipliers = {
	"n": 1e-9,
	"ns": 1e-9,
	"u": 1e-6,
	"us": 1e-6,
	"m": 1e-3,
	"ms": 1e-3,
	"s": 1.0,
	"k": 1e3,
	"K": 1e3,
	"M": 1e6,
	"G": 1e9,
	"Ki": 1024.0,
	"Mi": 1024.0**2,
	"Gi": 1024.0**3,
	"Ti": 1024.0**4,
	"B": 1.0,
	"B/op": 1.0,
	"C": 1.0, # tolerate degree/unit markers that don't affect ratio
	}
	return val * multipliers.get(suffix, 1.0)

	def extract_two_numbers(tokens):
	found = []
	for t in tokens[1:]: # skip name
	if t in {"±", "∞", "~", "│", "│"}:
	continue
	if '%' in t or '=' in t:
	continue
	val = parse_val(t)
	if val is not None:
	found.append(val)
	if len(found) == 2:
	break
	return found

	# Pass 0:
	# 1. find a header line with pipes to derive alignment hint
	# 2. calculate max content width to ensure right-most alignment
	max_content_width = 0

	for line in lines:
	if line.strip() == "```":
	in_code = not in_code
	continue
	if not in_code:
	continue

	# Skip footnotes/meta for width calculation
	if re.match(r'^\s[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s>?=\s*\d+\s+samples', line):
	continue
	if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:', 'cpu:')):
	continue
	# Header lines are handled separately in Pass 1
	if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
	continue

	# It's likely a data line
	# Check if it has an existing percentage we might move/align
	curr_line = strip_worker_suffix(line).rstrip()
	pct_match = re.search(r'([+-]?\d+\.\d+)%', curr_line)
	if pct_match:
	# If we are going to realign this, we count width up to the percentage
	w = len(curr_line[:pct_match.start()].rstrip())
	else:
	w = len(curr_line)

	if w > max_content_width:
	max_content_width = w

	# Calculate global alignment target for Diff column
	# Ensure target column is beyond the longest line with some padding
	diff_col_start = max_content_width + 4

	# Calculate right boundary (pipe) position
	# Diff column width ~12 chars (e.g. "+100.00% 🚀")
	right_boundary = diff_col_start + 14

	# Reset code fence tracking state for Pass 1
	in_code = False
	for line in lines:

	if line.strip() == "```":
	in_code = not in_code
	processed_lines.append(line)
	continue

	if not in_code:
	processed_lines.append(line)
	continue

	# footnotes keep untouched
	if re.match(r'^\s[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s>?=\s*\d+\s+samples', line):
	processed_lines.append(line)
	continue

	# header lines: ensure last column labeled Diff and force alignment
	if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
	# Strip trailing pipe and whitespace
	stripped_header = line.rstrip().rstrip('│').rstrip()

	# If "vs base" is present, ensure we don't duplicate "Diff" if it's already there
	# But we want to enforce OUR alignment, so we might strip existing Diff
	stripped_header = re.sub(r'\s+Diff\s*$', '', stripped_header, flags=re.IGNORECASE)
	stripped_header = re.sub(r'\s+Delta\b', '', stripped_header, flags=re.IGNORECASE)

	# Pad to diff_col_start
	if len(stripped_header) < diff_col_start:
	new_header = stripped_header + " " * (diff_col_start - len(stripped_header))
	else:
	new_header = stripped_header + " "

	# Add Diff column header if it's the second header row (vs base)
	if 'vs base' in line:
	new_header += "Diff"

	# Add closing pipe at the right boundary
	current_len = len(new_header)
	if current_len < right_boundary:
	new_header += " " * (right_boundary - current_len)

	new_header += "│"
	processed_lines.append(new_header)
	continue

	# non-data meta lines
	if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:')):
	processed_lines.append(line)
	continue

	line = strip_worker_suffix(line)
	tokens = line.split()
	if not tokens:
	processed_lines.append(line)
	continue

	numbers = extract_two_numbers(tokens)
	pct_match = re.search(r'([+-]?\d+\.\d+)%', line)

	# Helper to align and append
	def append_aligned(left_part, content):
	if len(left_part) < diff_col_start:
	aligned = left_part + " " * (diff_col_start - len(left_part))
	else:
	aligned = left_part + " "

	# Ensure content doesn't exceed right boundary (visual check only, we don't truncate)
	# But users asked not to exceed header pipe.
	# Header pipe is at right_boundary.
	# Content starts at diff_col_start.
	# So content length should be <= right_boundary - diff_col_start
	return f"{aligned}{content}"

	# Special handling for geomean when values missing or zero
	is_geomean = tokens[0] == "geomean"
	if is_geomean and (len(numbers) < 2 or any(v == 0 for v in numbers)) and not pct_match:
	leading = re.match(r'^\s*', line).group(0)
	left = f"{leading}geomean"
	processed_lines.append(append_aligned(left, "n/a (has zero)"))
	continue

	# when both values are zero, force diff = 0 and align
	if len(numbers) == 2 and numbers[0] == 0 and numbers[1] == 0:
	diff_val = 0.0
	icon = get_icon(diff_val)
	left = line.rstrip()
	processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
	continue

	# recompute diff when we have two numeric values
	if len(numbers) == 2 and numbers[0] != 0:
	diff_val = (numbers[1] - numbers[0]) / numbers[0] * 100
	icon = get_icon(diff_val)

	left = line
	if pct_match:
	left = line[:pct_match.start()].rstrip()
	else:
	left = line.rstrip()

	processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
	continue

	# fallback: align existing percentage to Diff column and (re)append icon
	if pct_match:
	try:
	pct_val = float(pct_match.group(1))
	icon = get_icon(pct_val)

	left = line[:pct_match.start()].rstrip()
	suffix = line[pct_match.end():]
	# Remove any existing icon after the percentage to avoid duplicates
	suffix = re.sub(r'\s*(🐌\|🚀\|➡️)', '', suffix)

	processed_lines.append(append_aligned(left, f"{pct_val:+.2f}% {icon}{suffix}"))
	except ValueError:
	processed_lines.append(line)
	continue

	# If we cannot parse numbers or percentages, keep the original (only worker suffix stripped)
	processed_lines.append(line)

	p.write_text("\n".join(processed_lines) + "\n", encoding="utf-8")

	except Exception as e:
	print(f"Error post-processing comparison.md: {e}")
	sys.exit(1)