blob: 89247d18fb1e0ac62f8063733c9a5ee9cec84e8d [file] [log] [blame]
import pathlib, re, sys
try:
p = pathlib.Path("comparison.md")
if not p.exists():
print("comparison.md not found, skipping post-processing.")
sys.exit(0)
lines = p.read_text(encoding="utf-8").splitlines()
processed_lines = []
in_code = False
def strip_worker_suffix(text: str) -> str:
return re.sub(r'(\S+?)-\d+(\s|$)', r'\1\2', text)
def get_icon(diff_val: float) -> str:
if diff_val > 10:
return "🐌"
if diff_val < -10:
return "🚀"
return "➡️"
def clean_superscripts(text: str) -> str:
return re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹⁰]', '', text)
def parse_val(token: str):
if '%' in token or '=' in token:
return None
token = clean_superscripts(token)
token = token.split('±')[0].strip()
token = token.split('(')[0].strip()
if not token:
return None
m = re.match(r'^([-+]?\d*\.?\d+)([a-zA-Zµ]+)?$', token)
if not m:
return None
try:
val = float(m.group(1))
except ValueError:
return None
suffix = (m.group(2) or "").replace("µ", "u")
multipliers = {
"n": 1e-9,
"ns": 1e-9,
"u": 1e-6,
"us": 1e-6,
"m": 1e-3,
"ms": 1e-3,
"s": 1.0,
"k": 1e3,
"K": 1e3,
"M": 1e6,
"G": 1e9,
"Ki": 1024.0,
"Mi": 1024.0**2,
"Gi": 1024.0**3,
"Ti": 1024.0**4,
"B": 1.0,
"B/op": 1.0,
"C": 1.0, # tolerate degree/unit markers that don't affect ratio
}
return val * multipliers.get(suffix, 1.0)
def extract_two_numbers(tokens):
found = []
for t in tokens[1:]: # skip name
if t in {"±", "∞", "~", "│", "│"}:
continue
if '%' in t or '=' in t:
continue
val = parse_val(t)
if val is not None:
found.append(val)
if len(found) == 2:
break
return found
# Pass 0:
# 1. find a header line with pipes to derive alignment hint
# 2. calculate max content width to ensure right-most alignment
max_content_width = 0
for line in lines:
if line.strip() == "```":
in_code = not in_code
continue
if not in_code:
continue
# Skip footnotes/meta for width calculation
if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
continue
if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:', 'cpu:')):
continue
# Header lines are handled separately in Pass 1
if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
continue
# It's likely a data line
# Check if it has an existing percentage we might move/align
curr_line = strip_worker_suffix(line).rstrip()
pct_match = re.search(r'([+-]?\d+\.\d+)%', curr_line)
if pct_match:
# If we are going to realign this, we count width up to the percentage
w = len(curr_line[:pct_match.start()].rstrip())
else:
w = len(curr_line)
if w > max_content_width:
max_content_width = w
# Calculate global alignment target for Diff column
# Ensure target column is beyond the longest line with some padding
diff_col_start = max_content_width + 4
# Calculate right boundary (pipe) position
# Diff column width ~12 chars (e.g. "+100.00% 🚀")
right_boundary = diff_col_start + 14
# Reset code fence tracking state for Pass 1
in_code = False
for line in lines:
if line.strip() == "```":
in_code = not in_code
processed_lines.append(line)
continue
if not in_code:
processed_lines.append(line)
continue
# footnotes keep untouched
if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
processed_lines.append(line)
continue
# header lines: ensure last column labeled Diff and force alignment
if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
# Strip trailing pipe and whitespace
stripped_header = line.rstrip().rstrip('│').rstrip()
# If "vs base" is present, ensure we don't duplicate "Diff" if it's already there
# But we want to enforce OUR alignment, so we might strip existing Diff
stripped_header = re.sub(r'\s+Diff\s*$', '', stripped_header, flags=re.IGNORECASE)
stripped_header = re.sub(r'\s+Delta\b', '', stripped_header, flags=re.IGNORECASE)
# Pad to diff_col_start
if len(stripped_header) < diff_col_start:
new_header = stripped_header + " " * (diff_col_start - len(stripped_header))
else:
new_header = stripped_header + " "
# Add Diff column header if it's the second header row (vs base)
if 'vs base' in line:
new_header += "Diff"
# Add closing pipe at the right boundary
current_len = len(new_header)
if current_len < right_boundary:
new_header += " " * (right_boundary - current_len)
new_header += "│"
processed_lines.append(new_header)
continue
# non-data meta lines
if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:')):
processed_lines.append(line)
continue
line = strip_worker_suffix(line)
tokens = line.split()
if not tokens:
processed_lines.append(line)
continue
numbers = extract_two_numbers(tokens)
pct_match = re.search(r'([+-]?\d+\.\d+)%', line)
# Helper to align and append
def append_aligned(left_part, content):
if len(left_part) < diff_col_start:
aligned = left_part + " " * (diff_col_start - len(left_part))
else:
aligned = left_part + " "
# Ensure content doesn't exceed right boundary (visual check only, we don't truncate)
# But users asked not to exceed header pipe.
# Header pipe is at right_boundary.
# Content starts at diff_col_start.
# So content length should be <= right_boundary - diff_col_start
return f"{aligned}{content}"
# Special handling for geomean when values missing or zero
is_geomean = tokens[0] == "geomean"
if is_geomean and (len(numbers) < 2 or any(v == 0 for v in numbers)) and not pct_match:
leading = re.match(r'^\s*', line).group(0)
left = f"{leading}geomean"
processed_lines.append(append_aligned(left, "n/a (has zero)"))
continue
# when both values are zero, force diff = 0 and align
if len(numbers) == 2 and numbers[0] == 0 and numbers[1] == 0:
diff_val = 0.0
icon = get_icon(diff_val)
left = line.rstrip()
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
continue
# recompute diff when we have two numeric values
if len(numbers) == 2 and numbers[0] != 0:
diff_val = (numbers[1] - numbers[0]) / numbers[0] * 100
icon = get_icon(diff_val)
left = line
if pct_match:
left = line[:pct_match.start()].rstrip()
else:
left = line.rstrip()
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
continue
# fallback: align existing percentage to Diff column and (re)append icon
if pct_match:
try:
pct_val = float(pct_match.group(1))
icon = get_icon(pct_val)
left = line[:pct_match.start()].rstrip()
suffix = line[pct_match.end():]
# Remove any existing icon after the percentage to avoid duplicates
suffix = re.sub(r'\s*(🐌|🚀|➡️)', '', suffix)
processed_lines.append(append_aligned(left, f"{pct_val:+.2f}% {icon}{suffix}"))
except ValueError:
processed_lines.append(line)
continue
# If we cannot parse numbers or percentages, keep the original (only worker suffix stripped)
processed_lines.append(line)
p.write_text("\n".join(processed_lines) + "\n", encoding="utf-8")
except Exception as e:
print(f"Error post-processing comparison.md: {e}")
sys.exit(1)