benchmarks/rust/benchmark_report.py - fory - Git at Google

 #!/usr/bin/env python3
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import argparse
 import os
 import platform
 import re
 from collections import defaultdict
 from datetime import datetime

 import matplotlib.pyplot as plt
 import numpy as np

 try:
     import psutil

     HAS_PSUTIL = True
 except ImportError:
     HAS_PSUTIL = False

 COLORS = {
     "fory": "#FF6f01",
     "protobuf": "#55BCC2",
 }
 SERIALIZER_ORDER = ["fory", "protobuf"]
 SERIALIZER_LABELS = {
     "fory": "fory",
     "protobuf": "protobuf",
 }
 DATATYPE_ORDER = [
     "struct",
     "sample",
     "mediacontent",
     "structlist",
     "samplelist",
     "mediacontentlist",
 ]
 OPERATIONS = ["serialize", "deserialize"]
 UNIT_TO_NS = {
     "ps": 1e-3,
     "ns": 1.0,
     "us": 1e3,
     "µs": 1e3,
     "ms": 1e6,
     "s": 1e9,
 }


 def parse_args():
     parser = argparse.ArgumentParser(
         description="Generate Rust benchmark report from Criterion output"
     )
     parser.add_argument(
         "--log-file",
         default="results/cargo_bench.log",
         help="Criterion cargo bench log file",
     )
     parser.add_argument(
         "--size-file",
         default="results/serialized_sizes.txt",
         help="Serialized size table generated by fory_profiler",
     )
     parser.add_argument(
         "--output-dir",
         default="results",
         help="Output directory for report artifacts",
     )
     parser.add_argument(
         "--plot-prefix",
         default="",
         help="Image path prefix inside the markdown report",
     )
     return parser.parse_args()


 def datatype_title(datatype):
     if datatype == "mediacontent":
         return "MediaContent"
     if datatype == "mediacontentlist":
         return "MediaContentList"
     if datatype.endswith("list"):
         return f"{datatype[:-4].capitalize()}List"
     return datatype.capitalize()


 def datatype_plot_label(datatype):
     if datatype == "mediacontent":
         return "MediaContent"
     if datatype == "mediacontentlist":
         return "MediaContent\nList"
     if datatype.endswith("list"):
         return f"{datatype[:-4].capitalize()}\nList"
     return datatype.capitalize()


 def get_system_info(log_file):
     info = {
         "OS": f"{platform.system()} {platform.release()}",
         "Machine": platform.machine(),
         "Processor": platform.processor() or "Unknown",
     }
     if HAS_PSUTIL:
         info["CPU Cores (Physical)"] = psutil.cpu_count(logical=False)
         info["CPU Cores (Logical)"] = psutil.cpu_count(logical=True)
         info["Total RAM (GB)"] = round(psutil.virtual_memory().total / (1024**3), 2)
     if os.path.exists(log_file):
         info["Benchmark Date"] = datetime.fromtimestamp(
             os.path.getmtime(log_file)
         ).isoformat(timespec="seconds")
     return info


 def parse_time_ns(measurement):
     tokens = measurement.replace("μ", "µ").split()
     if len(tokens) < 4:
         raise ValueError(f"unexpected criterion timing format: {measurement}")
     median_value = float(tokens[2])
     median_unit = tokens[3]
     return median_value * UNIT_TO_NS[median_unit]


 def load_benchmark_results(log_file):
     pattern = re.compile(
         r"Benchmarking\s+([A-Za-z0-9_]+)/([A-Za-z0-9_]+).*?time:\s+\[([^\]]+)\]",
         re.DOTALL,
     )
     results = defaultdict(lambda: defaultdict(dict))

     with open(log_file, "r", encoding="utf-8") as file:
         content = file.read()

     for datatype, benchmark_name, measurement in pattern.findall(content):
         if datatype not in DATATYPE_ORDER:
             continue
         if "_" not in benchmark_name:
             continue
         serializer, operation = benchmark_name.split("_", 1)
         if serializer not in SERIALIZER_ORDER or operation not in OPERATIONS:
             continue
         time_ns = parse_time_ns(measurement)
         results[datatype][operation][serializer] = time_ns
     return results


 def load_serialized_sizes(size_file):
     if not os.path.exists(size_file):
         return {}

     pattern = re.compile(r"^\|\s*([^|]+?)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|$")
     sizes = {}
     with open(size_file, "r", encoding="utf-8") as file:
         for line in file:
             match = pattern.match(line.strip())
             if not match:
                 continue
             datatype, fory_size, protobuf_size = match.groups()
             if datatype == "Datatype":
                 continue
             sizes[datatype] = {
                 "fory": int(fory_size),
                 "protobuf": int(protobuf_size),
             }
     return sizes


 def format_tps_label(tps):
     if tps >= 1e9:
         return f"{tps / 1e9:.2f}G"
     if tps >= 1e6:
         return f"{tps / 1e6:.2f}M"
     if tps >= 1e3:
         return f"{tps / 1e3:.2f}K"
     return f"{tps:.0f}"


 def plot_datatype(ax, results, datatype, operation):
     if datatype not in results or operation not in results[datatype]:
         ax.set_title(f"{datatype} {operation} - No Data")
         ax.axis("off")
         return

     libs = [
         serializer
         for serializer in SERIALIZER_ORDER
         if results[datatype][operation].get(serializer, 0) > 0
     ]
     throughput = [1e9 / results[datatype][operation][serializer] for serializer in libs]
     x = np.arange(len(libs))
     bars = ax.bar(
         x,
         throughput,
         color=[COLORS.get(serializer, "#888888") for serializer in libs],
         width=0.6,
     )

     ax.set_title(f"{operation.capitalize()} Throughput (higher is better)")
     ax.set_xticks(x)
     ax.set_xticklabels([SERIALIZER_LABELS[serializer] for serializer in libs])
     ax.set_ylabel("Throughput (ops/sec)")
     ax.grid(True, axis="y", linestyle="--", alpha=0.5)
     ax.ticklabel_format(style="scientific", axis="y", scilimits=(0, 0))

     for bar, value in zip(bars, throughput):
         ax.annotate(
             format_tps_label(value),
             xy=(bar.get_x() + bar.get_width() / 2, value),
             xytext=(0, 3),
             textcoords="offset points",
             ha="center",
             va="bottom",
             fontsize=9,
         )


 def plot_combined_subplot(ax, results, datatypes, operation, title):
     if not datatypes:
         ax.set_title(f"{title}\nNo Data")
         ax.axis("off")
         return

     available = [
         serializer
         for serializer in SERIALIZER_ORDER
         if any(
             results[datatype][operation].get(serializer, 0) > 0
             for datatype in datatypes
         )
     ]
     if not available:
         ax.set_title(f"{title}\nNo Data")
         ax.axis("off")
         return

     x = np.arange(len(datatypes))
     width = 0.8 / len(available)
     for index, serializer in enumerate(available):
         throughput = []
         for datatype in datatypes:
             time_ns = results[datatype][operation].get(serializer, 0)
             throughput.append(1e9 / time_ns if time_ns > 0 else 0)
         offset = (index - (len(available) - 1) / 2) * width
         ax.bar(
             x + offset,
             throughput,
             width,
             label=SERIALIZER_LABELS[serializer],
             color=COLORS.get(serializer, "#888888"),
         )

     ax.set_title(title)
     ax.set_xticks(x)
     ax.set_xticklabels([datatype_plot_label(datatype) for datatype in datatypes])
     ax.grid(True, axis="y", linestyle="--", alpha=0.5)
     ax.legend()
     ax.ticklabel_format(style="scientific", axis="y", scilimits=(0, 0))


 def generate_plots(results, output_dir):
     os.makedirs(output_dir, exist_ok=True)
     plot_images = []

     for datatype in DATATYPE_ORDER:
         if datatype not in results:
             continue
         fig, axes = plt.subplots(1, 2, figsize=(12, 5))
         for index, operation in enumerate(OPERATIONS):
             plot_datatype(axes[index], results, datatype, operation)
         fig.suptitle(f"{datatype_title(datatype)} Throughput", fontsize=14)
         fig.tight_layout(rect=[0, 0, 1, 0.95])
         plot_path = os.path.join(output_dir, f"{datatype}.png")
         plt.savefig(plot_path, dpi=150)
         plt.close(fig)
         plot_images.append((datatype, plot_path))

     non_list = [
         datatype
         for datatype in DATATYPE_ORDER
         if datatype in results and not datatype.endswith("list")
     ]
     list_only = [
         datatype
         for datatype in DATATYPE_ORDER
         if datatype in results and datatype.endswith("list")
     ]

     fig, axes = plt.subplots(1, 4, figsize=(28, 6))
     fig.supylabel("Throughput (ops/sec)")
     plot_combined_subplot(
         axes[0],
         results,
         non_list,
         "serialize",
         "Serialize Throughput (higher is better)",
     )
     plot_combined_subplot(
         axes[1],
         results,
         non_list,
         "deserialize",
         "Deserialize Throughput (higher is better)",
     )
     plot_combined_subplot(
         axes[2], results, list_only, "serialize", "Serialize Throughput (*List)"
     )
     plot_combined_subplot(
         axes[3], results, list_only, "deserialize", "Deserialize Throughput (*List)"
     )
     fig.tight_layout()
     throughput_path = os.path.join(output_dir, "throughput.png")
     plt.savefig(throughput_path, dpi=150)
     plt.close(fig)
     plot_images.append(("throughput", throughput_path))

     return plot_images


 def write_report(system_info, results, sizes, plot_images, output_dir, plot_prefix):
     report = [
         "# Rust Benchmark Performance Report\n\n",
         f"_Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_\n\n",
         "## How to Generate This Report\n\n",
         "```bash\n",
         "cd benchmarks/rust\n",
         "cargo bench --bench serialization_bench 2>&1 | tee results/cargo_bench.log\n",
         "cargo run --release --bin fory_profiler -- --print-all-serialized-sizes | tee results/serialized_sizes.txt\n",
         "python benchmark_report.py --log-file results/cargo_bench.log --size-file results/serialized_sizes.txt --output-dir results\n",
         "```\n\n",
         "## Hardware & OS Info\n\n",
         "| Key | Value |\n",
         "|-----|-------|\n",
     ]

     for key, value in system_info.items():
         report.append(f"| {key} | {value} |\n")

     report.append("\n## Benchmark Plots\n")
     report.append("\nAll class-level plots below show throughput (ops/sec).\n")

     sorted_plots = sorted(
         plot_images, key=lambda item: (0 if item[0] == "throughput" else 1, item[0])
     )
     for datatype, image_path in sorted_plots:
         plot_title = datatype_title(datatype)
         report.append(f"\n### {plot_title}\n\n")
         report.append(f"![{plot_title}]({plot_prefix}{os.path.basename(image_path)})\n")

     report.append("\n## Benchmark Results\n\n")
     report.append("### Timing Results (nanoseconds)\n\n")
     report.append("| Datatype | Operation | fory (ns) | protobuf (ns) | Fastest |\n")
     report.append("|----------|-----------|-----------|---------------|---------|\n")

     for datatype in DATATYPE_ORDER:
         if datatype not in results:
             continue
         for operation in OPERATIONS:
             times = {
                 serializer: results[datatype][operation].get(serializer, 0)
                 for serializer in SERIALIZER_ORDER
             }
             positive = {name: value for name, value in times.items() if value > 0}
             fastest = min(positive, key=positive.get) if positive else "N/A"
             report.append(
                 "| "
                 + f"{datatype_title(datatype)} | {operation.capitalize()} | "
                 + " | ".join(
                     f"{times[serializer]:.1f}" if times[serializer] > 0 else "N/A"
                     for serializer in SERIALIZER_ORDER
                 )
                 + f" | {fastest} |\n"
             )

     report.append("\n### Throughput Results (ops/sec)\n\n")
     report.append("| Datatype | Operation | fory TPS | protobuf TPS | Fastest |\n")
     report.append("|----------|-----------|----------|--------------|---------|\n")

     for datatype in DATATYPE_ORDER:
         if datatype not in results:
             continue
         for operation in OPERATIONS:
             throughput = {}
             for serializer in SERIALIZER_ORDER:
                 time_ns = results[datatype][operation].get(serializer, 0)
                 throughput[serializer] = 1e9 / time_ns if time_ns > 0 else 0
             positive = {name: value for name, value in throughput.items() if value > 0}
             fastest = max(positive, key=positive.get) if positive else "N/A"
             report.append(
                 "| "
                 + f"{datatype_title(datatype)} | {operation.capitalize()} | "
                 + " | ".join(
                     f"{throughput[serializer]:,.0f}"
                     if throughput[serializer] > 0
                     else "N/A"
                     for serializer in SERIALIZER_ORDER
                 )
                 + f" | {fastest} |\n"
             )

     if sizes:
         report.append("\n### Serialized Data Sizes (bytes)\n\n")
         report.append("| Datatype | fory | protobuf |\n")
         report.append("|----------|------|----------|\n")
         for datatype in DATATYPE_ORDER:
             title = datatype_title(datatype)
             if title not in sizes:
                 continue
             entry = sizes[title]
             report.append(f"| {title} | {entry['fory']} | {entry['protobuf']} |\n")

     report_path = os.path.join(output_dir, "README.md")
     with open(report_path, "w", encoding="utf-8") as file:
         file.writelines(report)
     return report_path


 def main():
     args = parse_args()
     results = load_benchmark_results(args.log_file)
     sizes = load_serialized_sizes(args.size_file)
     system_info = get_system_info(args.log_file)
     plot_images = generate_plots(results, args.output_dir)
     report_path = write_report(
         system_info, results, sizes, plot_images, args.output_dir, args.plot_prefix
     )
     print(f"✅ Plots saved in: {args.output_dir}")
     print(f"📄 Markdown report generated at: {report_path}")


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import argparse
	import os
	import platform
	import re
	from collections import defaultdict
	from datetime import datetime

	import matplotlib.pyplot as plt
	import numpy as np

	try:
	import psutil

	HAS_PSUTIL = True
	except ImportError:
	HAS_PSUTIL = False

	COLORS = {
	"fory": "#FF6f01",
	"protobuf": "#55BCC2",
	}
	SERIALIZER_ORDER = ["fory", "protobuf"]
	SERIALIZER_LABELS = {
	"fory": "fory",
	"protobuf": "protobuf",
	}
	DATATYPE_ORDER = [
	"struct",
	"sample",
	"mediacontent",
	"structlist",
	"samplelist",
	"mediacontentlist",
	]
	OPERATIONS = ["serialize", "deserialize"]
	UNIT_TO_NS = {
	"ps": 1e-3,
	"ns": 1.0,
	"us": 1e3,
	"µs": 1e3,
	"ms": 1e6,
	"s": 1e9,
	}


	def parse_args():
	parser = argparse.ArgumentParser(
	description="Generate Rust benchmark report from Criterion output"
	)
	parser.add_argument(
	"--log-file",
	default="results/cargo_bench.log",
	help="Criterion cargo bench log file",
	)
	parser.add_argument(
	"--size-file",
	default="results/serialized_sizes.txt",
	help="Serialized size table generated by fory_profiler",
	)
	parser.add_argument(
	"--output-dir",
	default="results",
	help="Output directory for report artifacts",
	)
	parser.add_argument(
	"--plot-prefix",
	default="",
	help="Image path prefix inside the markdown report",
	)
	return parser.parse_args()


	def datatype_title(datatype):
	if datatype == "mediacontent":
	return "MediaContent"
	if datatype == "mediacontentlist":
	return "MediaContentList"
	if datatype.endswith("list"):
	return f"{datatype[:-4].capitalize()}List"
	return datatype.capitalize()


	def datatype_plot_label(datatype):
	if datatype == "mediacontent":
	return "MediaContent"
	if datatype == "mediacontentlist":
	return "MediaContent\nList"
	if datatype.endswith("list"):
	return f"{datatype[:-4].capitalize()}\nList"
	return datatype.capitalize()


	def get_system_info(log_file):
	info = {
	"OS": f"{platform.system()} {platform.release()}",
	"Machine": platform.machine(),
	"Processor": platform.processor() or "Unknown",
	}
	if HAS_PSUTIL:
	info["CPU Cores (Physical)"] = psutil.cpu_count(logical=False)
	info["CPU Cores (Logical)"] = psutil.cpu_count(logical=True)
	info["Total RAM (GB)"] = round(psutil.virtual_memory().total / (1024**3), 2)
	if os.path.exists(log_file):
	info["Benchmark Date"] = datetime.fromtimestamp(
	os.path.getmtime(log_file)
	).isoformat(timespec="seconds")
	return info


	def parse_time_ns(measurement):
	tokens = measurement.replace("μ", "µ").split()
	if len(tokens) < 4:
	raise ValueError(f"unexpected criterion timing format: {measurement}")
	median_value = float(tokens[2])
	median_unit = tokens[3]
	return median_value * UNIT_TO_NS[median_unit]


	def load_benchmark_results(log_file):
	pattern = re.compile(
	r"Benchmarking\s+([A-Za-z0-9_]+)/([A-Za-z0-9_]+).*?time:\s+\[([^\]]+)\]",
	re.DOTALL,
	)
	results = defaultdict(lambda: defaultdict(dict))

	with open(log_file, "r", encoding="utf-8") as file:
	content = file.read()

	for datatype, benchmark_name, measurement in pattern.findall(content):
	if datatype not in DATATYPE_ORDER:
	continue
	if "_" not in benchmark_name:
	continue
	serializer, operation = benchmark_name.split("_", 1)
	if serializer not in SERIALIZER_ORDER or operation not in OPERATIONS:
	continue
	time_ns = parse_time_ns(measurement)
	results[datatype][operation][serializer] = time_ns
	return results


	def load_serialized_sizes(size_file):
	if not os.path.exists(size_file):
	return {}

	pattern = re.compile(r"^\\|\s([^\|]+?)\s\\|\s(\d+)\s\\|\s(\d+)\s\\|$")
	sizes = {}
	with open(size_file, "r", encoding="utf-8") as file:
	for line in file:
	match = pattern.match(line.strip())
	if not match:
	continue
	datatype, fory_size, protobuf_size = match.groups()
	if datatype == "Datatype":
	continue
	sizes[datatype] = {
	"fory": int(fory_size),
	"protobuf": int(protobuf_size),
	}
	return sizes


	def format_tps_label(tps):
	if tps >= 1e9:
	return f"{tps / 1e9:.2f}G"
	if tps >= 1e6:
	return f"{tps / 1e6:.2f}M"
	if tps >= 1e3:
	return f"{tps / 1e3:.2f}K"
	return f"{tps:.0f}"


	def plot_datatype(ax, results, datatype, operation):
	if datatype not in results or operation not in results[datatype]:
	ax.set_title(f"{datatype} {operation} - No Data")
	ax.axis("off")
	return

	libs = [
	serializer
	for serializer in SERIALIZER_ORDER
	if results[datatype][operation].get(serializer, 0) > 0
	]
	throughput = [1e9 / results[datatype][operation][serializer] for serializer in libs]
	x = np.arange(len(libs))
	bars = ax.bar(
	x,
	throughput,
	color=[COLORS.get(serializer, "#888888") for serializer in libs],
	width=0.6,
	)

	ax.set_title(f"{operation.capitalize()} Throughput (higher is better)")
	ax.set_xticks(x)
	ax.set_xticklabels([SERIALIZER_LABELS[serializer] for serializer in libs])
	ax.set_ylabel("Throughput (ops/sec)")
	ax.grid(True, axis="y", linestyle="--", alpha=0.5)
	ax.ticklabel_format(style="scientific", axis="y", scilimits=(0, 0))

	for bar, value in zip(bars, throughput):
	ax.annotate(
	format_tps_label(value),
	xy=(bar.get_x() + bar.get_width() / 2, value),
	xytext=(0, 3),
	textcoords="offset points",
	ha="center",
	va="bottom",
	fontsize=9,
	)


	def plot_combined_subplot(ax, results, datatypes, operation, title):
	if not datatypes:
	ax.set_title(f"{title}\nNo Data")
	ax.axis("off")
	return

	available = [
	serializer
	for serializer in SERIALIZER_ORDER
	if any(
	results[datatype][operation].get(serializer, 0) > 0
	for datatype in datatypes
	)
	]
	if not available:
	ax.set_title(f"{title}\nNo Data")
	ax.axis("off")
	return

	x = np.arange(len(datatypes))
	width = 0.8 / len(available)
	for index, serializer in enumerate(available):
	throughput = []
	for datatype in datatypes:
	time_ns = results[datatype][operation].get(serializer, 0)
	throughput.append(1e9 / time_ns if time_ns > 0 else 0)
	offset = (index - (len(available) - 1) / 2) * width
	ax.bar(
	x + offset,
	throughput,
	width,
	label=SERIALIZER_LABELS[serializer],
	color=COLORS.get(serializer, "#888888"),
	)

	ax.set_title(title)
	ax.set_xticks(x)
	ax.set_xticklabels([datatype_plot_label(datatype) for datatype in datatypes])
	ax.grid(True, axis="y", linestyle="--", alpha=0.5)
	ax.legend()
	ax.ticklabel_format(style="scientific", axis="y", scilimits=(0, 0))


	def generate_plots(results, output_dir):
	os.makedirs(output_dir, exist_ok=True)
	plot_images = []

	for datatype in DATATYPE_ORDER:
	if datatype not in results:
	continue
	fig, axes = plt.subplots(1, 2, figsize=(12, 5))
	for index, operation in enumerate(OPERATIONS):
	plot_datatype(axes[index], results, datatype, operation)
	fig.suptitle(f"{datatype_title(datatype)} Throughput", fontsize=14)
	fig.tight_layout(rect=[0, 0, 1, 0.95])
	plot_path = os.path.join(output_dir, f"{datatype}.png")
	plt.savefig(plot_path, dpi=150)
	plt.close(fig)
	plot_images.append((datatype, plot_path))

	non_list = [
	datatype
	for datatype in DATATYPE_ORDER
	if datatype in results and not datatype.endswith("list")
	]
	list_only = [
	datatype
	for datatype in DATATYPE_ORDER
	if datatype in results and datatype.endswith("list")
	]

	fig, axes = plt.subplots(1, 4, figsize=(28, 6))
	fig.supylabel("Throughput (ops/sec)")
	plot_combined_subplot(
	axes[0],
	results,
	non_list,
	"serialize",
	"Serialize Throughput (higher is better)",
	)
	plot_combined_subplot(
	axes[1],
	results,
	non_list,
	"deserialize",
	"Deserialize Throughput (higher is better)",
	)
	plot_combined_subplot(
	axes[2], results, list_only, "serialize", "Serialize Throughput (*List)"
	)
	plot_combined_subplot(
	axes[3], results, list_only, "deserialize", "Deserialize Throughput (*List)"
	)
	fig.tight_layout()
	throughput_path = os.path.join(output_dir, "throughput.png")
	plt.savefig(throughput_path, dpi=150)
	plt.close(fig)
	plot_images.append(("throughput", throughput_path))

	return plot_images


	def write_report(system_info, results, sizes, plot_images, output_dir, plot_prefix):
	report = [
	"# Rust Benchmark Performance Report\n\n",
	f"_Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_\n\n",
	"## How to Generate This Report\n\n",
	"```bash\n",
	"cd benchmarks/rust\n",
	"cargo bench --bench serialization_bench 2>&1 \| tee results/cargo_bench.log\n",
	"cargo run --release --bin fory_profiler -- --print-all-serialized-sizes \| tee results/serialized_sizes.txt\n",
	"python benchmark_report.py --log-file results/cargo_bench.log --size-file results/serialized_sizes.txt --output-dir results\n",
	"```\n\n",
	"## Hardware & OS Info\n\n",
	"\| Key \| Value \|\n",
	"\|-----\|-------\|\n",
	]

	for key, value in system_info.items():
	report.append(f"\| {key} \| {value} \|\n")

	report.append("\n## Benchmark Plots\n")
	report.append("\nAll class-level plots below show throughput (ops/sec).\n")

	sorted_plots = sorted(
	plot_images, key=lambda item: (0 if item[0] == "throughput" else 1, item[0])
	)
	for datatype, image_path in sorted_plots:
	plot_title = datatype_title(datatype)
	report.append(f"\n### {plot_title}\n\n")
	report.append(f"![{plot_title}]({plot_prefix}{os.path.basename(image_path)})\n")

	report.append("\n## Benchmark Results\n\n")
	report.append("### Timing Results (nanoseconds)\n\n")
	report.append("\| Datatype \| Operation \| fory (ns) \| protobuf (ns) \| Fastest \|\n")
	report.append("\|----------\|-----------\|-----------\|---------------\|---------\|\n")

	for datatype in DATATYPE_ORDER:
	if datatype not in results:
	continue
	for operation in OPERATIONS:
	times = {
	serializer: results[datatype][operation].get(serializer, 0)
	for serializer in SERIALIZER_ORDER
	}
	positive = {name: value for name, value in times.items() if value > 0}
	fastest = min(positive, key=positive.get) if positive else "N/A"
	report.append(
	"\| "
	+ f"{datatype_title(datatype)} \| {operation.capitalize()} \| "
	+ " \| ".join(
	f"{times[serializer]:.1f}" if times[serializer] > 0 else "N/A"
	for serializer in SERIALIZER_ORDER
	)
	+ f" \| {fastest} \|\n"
	)

	report.append("\n### Throughput Results (ops/sec)\n\n")
	report.append("\| Datatype \| Operation \| fory TPS \| protobuf TPS \| Fastest \|\n")
	report.append("\|----------\|-----------\|----------\|--------------\|---------\|\n")

	for datatype in DATATYPE_ORDER:
	if datatype not in results:
	continue
	for operation in OPERATIONS:
	throughput = {}
	for serializer in SERIALIZER_ORDER:
	time_ns = results[datatype][operation].get(serializer, 0)
	throughput[serializer] = 1e9 / time_ns if time_ns > 0 else 0
	positive = {name: value for name, value in throughput.items() if value > 0}
	fastest = max(positive, key=positive.get) if positive else "N/A"
	report.append(
	"\| "
	+ f"{datatype_title(datatype)} \| {operation.capitalize()} \| "
	+ " \| ".join(
	f"{throughput[serializer]:,.0f}"
	if throughput[serializer] > 0
	else "N/A"
	for serializer in SERIALIZER_ORDER
	)
	+ f" \| {fastest} \|\n"
	)

	if sizes:
	report.append("\n### Serialized Data Sizes (bytes)\n\n")
	report.append("\| Datatype \| fory \| protobuf \|\n")
	report.append("\|----------\|------\|----------\|\n")
	for datatype in DATATYPE_ORDER:
	title = datatype_title(datatype)
	if title not in sizes:
	continue
	entry = sizes[title]
	report.append(f"\| {title} \| {entry['fory']} \| {entry['protobuf']} \|\n")

	report_path = os.path.join(output_dir, "README.md")
	with open(report_path, "w", encoding="utf-8") as file:
	file.writelines(report)
	return report_path


	def main():
	args = parse_args()
	results = load_benchmark_results(args.log_file)
	sizes = load_serialized_sizes(args.size_file)
	system_info = get_system_info(args.log_file)
	plot_images = generate_plots(results, args.output_dir)
	report_path = write_report(
	system_info, results, sizes, plot_images, args.output_dir, args.plot_prefix
	)
	print(f"✅ Plots saved in: {args.output_dir}")
	print(f"📄 Markdown report generated at: {report_path}")


	if __name__ == "__main__":
	main()