benchmarks/javascript/benchmark_report.py - fory - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import argparse
 import json
 import os
 import platform
 import sys
 from collections import defaultdict
 from datetime import datetime
 from pathlib import Path

 import matplotlib.pyplot as plt
 from matplotlib.ticker import FuncFormatter

 sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
 from plot_style import (  # noqa: E402
     BAR_EDGE_COLOR,
     GROUP_BAR_WIDTH,
     GROUP_X,
     add_compact_legend,
     apply_benchmark_style,
     format_markdown_with_prettier,
     format_throughput_tick,
     save_benchmark_figure,
     serializer_offset,
     set_grouped_operation_axis,
     style_throughput_axis,
 )

 apply_benchmark_style(plt)

 try:
     import psutil

     HAS_PSUTIL = True
 except ImportError:
     HAS_PSUTIL = False

 COLORS = {
     "fory": "#FF6F01",
     "protobuf": "#55BCC2",
     "json": (0.55, 0.40, 0.45),
 }
 SERIALIZER_ORDER = ["fory", "protobuf", "json"]
 SERIALIZER_LABELS = {
     "fory": "fory",
     "protobuf": "protobuf",
     "json": "json",
 }
 DATATYPE_ORDER = [
     "struct",
     "sample",
     "mediacontent",
     "structlist",
     "samplelist",
     "mediacontentlist",
 ]

 parser = argparse.ArgumentParser(
     description="Generate plots and Markdown report for JavaScript benchmark results"
 )
 parser.add_argument(
     "--json-file", default="benchmark_results.json", help="Benchmark JSON output file"
 )
 parser.add_argument(
     "--output-dir",
     default="",
     help="Output directory for plots and report",
 )
 parser.add_argument(
     "--plot-prefix", default="", help="Image path prefix in Markdown report"
 )
 args = parser.parse_args()

 output_dir = args.output_dir.strip() or datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
 os.makedirs(output_dir, exist_ok=True)


 def get_system_info():
     info = {
         "OS": f"{platform.system()} {platform.release()}",
         "Machine": platform.machine(),
         "Processor": platform.processor() or "Unknown",
     }
     if HAS_PSUTIL:
         info["CPU Cores (Physical)"] = psutil.cpu_count(logical=False)
         info["CPU Cores (Logical)"] = psutil.cpu_count(logical=True)
         info["Total RAM (GB)"] = round(psutil.virtual_memory().total / (1024**3), 2)
     return info


 def parse_benchmark_name(name):
     if name.startswith("BM_"):
         name = name[3:]
     parts = name.split("_")
     if len(parts) >= 3:
         datatype = parts[1].lower()
         if datatype == "numericstruct":
             datatype = "struct"
         elif datatype == "numericstructlist":
             datatype = "structlist"
         return parts[0].lower(), datatype, parts[2].lower()
     return None, None, None


 def format_datatype_label(datatype):
     if datatype == "struct":
         return "NumericStruct"
     if datatype == "structlist":
         return "NumericStruct\nList"
     if datatype.endswith("list"):
         base = datatype[: -len("list")]
         if base == "mediacontent":
             return "MediaContent\nList"
         return f"{base.capitalize()}\nList"
     if datatype == "mediacontent":
         return "MediaContent"
     return datatype.capitalize()


 def format_datatype_table_label(datatype):
     if datatype == "struct":
         return "NumericStruct"
     if datatype == "structlist":
         return "NumericStructList"
     if datatype.endswith("list"):
         base = datatype[: -len("list")]
         if base == "mediacontent":
             return "MediaContentList"
         return f"{base.capitalize()}List"
     if datatype == "mediacontent":
         return "MediaContent"
     return datatype.capitalize()


 with open(args.json_file, "r", encoding="utf-8") as handle:
     benchmark_data = json.load(handle)

 data = defaultdict(lambda: defaultdict(dict))
 sizes = {}

 for bench in benchmark_data.get("benchmarks", []):
     name = bench.get("name", "")
     if "PrintSerializedSizes" in name:
         for key, value in bench.items():
             if key.endswith("_size"):
                 sizes[key] = int(value)
         continue
     serializer, datatype, operation = parse_benchmark_name(name)
     if serializer and datatype and operation:
         time_ns = bench.get("real_time", bench.get("cpu_time", 0))
         data[datatype][operation][serializer] = time_ns

 system_info = get_system_info()
 context = benchmark_data.get("context", {})
 if context.get("date"):
     system_info["Benchmark Date"] = context["date"]
 if context.get("num_cpus"):
     system_info["CPU Cores (from benchmark)"] = context["num_cpus"]
 if context.get("node_version"):
     system_info["Node.js"] = context["node_version"]
 if context.get("v8_version"):
     system_info["V8"] = context["v8_version"]


 def format_tps_tick(tps, _position):
     return format_throughput_tick(tps, _position)


 datatypes = [datatype for datatype in DATATYPE_ORDER if datatype in data]
 operations = ["serialize", "deserialize"]


 def plot_throughput_grid_subplot(ax, datatype):
     if datatype not in data:
         ax.set_title(f"{format_datatype_table_label(datatype)}\nNo Data")
         ax.axis("off")
         return

     available_libs = [
         lib
         for lib in SERIALIZER_ORDER
         if any(
             data[datatype][operation].get(lib, 0) > 0
             for operation in ["serialize", "deserialize"]
         )
     ]
     if not available_libs:
         ax.set_title(f"{format_datatype_table_label(datatype)}\nNo Data")
         ax.axis("off")
         return

     operations = ["serialize", "deserialize"]
     x = GROUP_X
     for idx, lib in enumerate(available_libs):
         times = [data[datatype][operation].get(lib, 0) for operation in operations]
         throughput = [1e9 / value if value > 0 else 0 for value in times]
         offset = serializer_offset(idx, len(available_libs))
         ax.bar(
             x + offset,
             throughput,
             GROUP_BAR_WIDTH,
             label=SERIALIZER_LABELS[lib],
             color=COLORS[lib],
             edgecolor=BAR_EDGE_COLOR,
             linewidth=0.8,
         )

     max_tps = max(
         1e9 / data[datatype][operation][lib]
         for operation in operations
         for lib in available_libs
         if data[datatype][operation].get(lib, 0) > 0
     )
     ax.set_ylim(0, max_tps * 1.12)
     ax.set_title(format_datatype_table_label(datatype), pad=8)
     set_grouped_operation_axis(ax)
     style_throughput_axis(ax)
     ax.yaxis.set_major_formatter(FuncFormatter(format_tps_tick))
     add_compact_legend(ax)


 fig, axes = plt.subplots(2, 3, figsize=(16.5, 9.0))
 for index, (ax, datatype) in enumerate(zip(axes.flat, DATATYPE_ORDER)):
     plot_throughput_grid_subplot(ax, datatype)
     if index % 3 == 0:
         ax.set_ylabel("Throughput (ops/sec)", labelpad=10)
 fig.suptitle(
     "JavaScript Serialization Throughput", fontsize=15, fontweight="normal", y=0.955
 )
 fig.tight_layout(rect=[0.02, 0.02, 0.995, 0.965], w_pad=1.2, h_pad=1.25)
 combined_plot_path = os.path.join(output_dir, "throughput.png")
 save_benchmark_figure(fig, combined_plot_path)
 plt.close()

 md_report = [
     "# JavaScript Benchmark Performance Report\n\n",
     f"_Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_\n\n",
     "## How to Generate This Report\n\n",
     "```bash\n",
     "cd benchmarks/javascript\n",
     "./run.sh\n",
     "```\n\n",
     "## Benchmark Semantics\n\n",
     "The timed serializer loops use serializer-native typed values. Fory receives "
     "the pre-normalized Fory value used by its schema, protobuf receives the "
     "prebuilt protobuf-shaped value, and JSON receives the benchmark JavaScript "
     "object. Protobuf timings do not include `toProto`, `fromProto`, "
     "`protobufjs.create`, or `toObject` conversion work.\n\n",
     "## Benchmark Plot\n\n",
     "The plot shows throughput (ops/sec); higher is better.\n\n",
     f"![Throughput]({args.plot_prefix}throughput.png)\n\n",
     "## Hardware & OS Info\n\n",
     "| Key | Value |\n",
     "|-----|-------|\n",
 ]

 for key, value in system_info.items():
     md_report.append(f"| {key} | {value} |\n")

 md_report.append("\n## Benchmark Results\n\n")
 md_report.append("### Timing Results (nanoseconds)\n\n")
 md_report.append(
     "| Datatype | Operation | fory (ns) | protobuf (ns) | json (ns) | Fastest |\n"
 )
 md_report.append(
     "|----------|-----------|-----------|---------------|-----------|---------|\n"
 )

 for datatype in datatypes:
     for operation in operations:
         times = {lib: data[datatype][operation].get(lib, 0) for lib in SERIALIZER_ORDER}
         valid = {lib: value for lib, value in times.items() if value > 0}
         fastest = min(valid, key=valid.get) if valid else None
         md_report.append(
             "| "
             + f"{format_datatype_table_label(datatype)} | {operation.capitalize()} | "
             + " | ".join(
                 f"{times[lib]:.1f}" if times[lib] > 0 else "N/A"
                 for lib in SERIALIZER_ORDER
             )
             + f" | {SERIALIZER_LABELS[fastest] if fastest else 'N/A'} |\n"
         )

 md_report.append("\n### Throughput Results (ops/sec)\n\n")
 md_report.append(
     "| Datatype | Operation | fory TPS | protobuf TPS | json TPS | Fastest |\n"
 )
 md_report.append(
     "|----------|-----------|----------|--------------|----------|---------|\n"
 )

 for datatype in datatypes:
     for operation in operations:
         times = {lib: data[datatype][operation].get(lib, 0) for lib in SERIALIZER_ORDER}
         tps = {lib: (1e9 / value if value > 0 else 0) for lib, value in times.items()}
         valid = {lib: value for lib, value in tps.items() if value > 0}
         fastest = max(valid, key=valid.get) if valid else None
         md_report.append(
             "| "
             + f"{format_datatype_table_label(datatype)} | {operation.capitalize()} | "
             + " | ".join(
                 f"{tps[lib]:,.0f}" if tps[lib] > 0 else "N/A"
                 for lib in SERIALIZER_ORDER
             )
             + f" | {SERIALIZER_LABELS[fastest] if fastest else 'N/A'} |\n"
         )

 if sizes:
     md_report.append("\n### Serialized Data Sizes (bytes)\n\n")
     md_report.append("| Datatype | fory | protobuf | json |\n")
     md_report.append("|----------|------|----------|------|\n")
     size_datatypes = [
         ("struct", "NumericStruct"),
         ("sample", "Sample"),
         ("media", "MediaContent"),
         ("struct_list", "NumericStructList"),
         ("sample_list", "SampleList"),
         ("media_list", "MediaContentList"),
     ]
     for datatype_key, datatype_label in size_datatypes:
         row = []
         has_value = False
         for serializer in SERIALIZER_ORDER:
             value = sizes.get(f"{serializer}_{datatype_key}_size")
             if value is None:
                 row.append("N/A")
             else:
                 row.append(str(value))
                 has_value = True
         if has_value:
             md_report.append(f"| {datatype_label} | " + " | ".join(row) + " |\n")

 report_path = os.path.join(output_dir, "README.md")
 with open(report_path, "w", encoding="utf-8") as handle:
     handle.writelines(md_report)

 format_markdown_with_prettier(report_path)

 print(f"Plots saved in: {output_dir}")
 print(f"Markdown report generated at: {report_path}")
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import argparse
	import json
	import os
	import platform
	import sys
	from collections import defaultdict
	from datetime import datetime
	from pathlib import Path

	import matplotlib.pyplot as plt
	from matplotlib.ticker import FuncFormatter

	sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
	from plot_style import ( # noqa: E402
	BAR_EDGE_COLOR,
	GROUP_BAR_WIDTH,
	GROUP_X,
	add_compact_legend,
	apply_benchmark_style,
	format_markdown_with_prettier,
	format_throughput_tick,
	save_benchmark_figure,
	serializer_offset,
	set_grouped_operation_axis,
	style_throughput_axis,
	)

	apply_benchmark_style(plt)

	try:
	import psutil

	HAS_PSUTIL = True
	except ImportError:
	HAS_PSUTIL = False

	COLORS = {
	"fory": "#FF6F01",
	"protobuf": "#55BCC2",
	"json": (0.55, 0.40, 0.45),
	}
	SERIALIZER_ORDER = ["fory", "protobuf", "json"]
	SERIALIZER_LABELS = {
	"fory": "fory",
	"protobuf": "protobuf",
	"json": "json",
	}
	DATATYPE_ORDER = [
	"struct",
	"sample",
	"mediacontent",
	"structlist",
	"samplelist",
	"mediacontentlist",
	]

	parser = argparse.ArgumentParser(
	description="Generate plots and Markdown report for JavaScript benchmark results"
	)
	parser.add_argument(
	"--json-file", default="benchmark_results.json", help="Benchmark JSON output file"
	)
	parser.add_argument(
	"--output-dir",
	default="",
	help="Output directory for plots and report",
	)
	parser.add_argument(
	"--plot-prefix", default="", help="Image path prefix in Markdown report"
	)
	args = parser.parse_args()

	output_dir = args.output_dir.strip() or datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
	os.makedirs(output_dir, exist_ok=True)


	def get_system_info():
	info = {
	"OS": f"{platform.system()} {platform.release()}",
	"Machine": platform.machine(),
	"Processor": platform.processor() or "Unknown",
	}
	if HAS_PSUTIL:
	info["CPU Cores (Physical)"] = psutil.cpu_count(logical=False)
	info["CPU Cores (Logical)"] = psutil.cpu_count(logical=True)
	info["Total RAM (GB)"] = round(psutil.virtual_memory().total / (1024**3), 2)
	return info


	def parse_benchmark_name(name):
	if name.startswith("BM_"):
	name = name[3:]
	parts = name.split("_")
	if len(parts) >= 3:
	datatype = parts[1].lower()
	if datatype == "numericstruct":
	datatype = "struct"
	elif datatype == "numericstructlist":
	datatype = "structlist"
	return parts[0].lower(), datatype, parts[2].lower()
	return None, None, None


	def format_datatype_label(datatype):
	if datatype == "struct":
	return "NumericStruct"
	if datatype == "structlist":
	return "NumericStruct\nList"
	if datatype.endswith("list"):
	base = datatype[: -len("list")]
	if base == "mediacontent":
	return "MediaContent\nList"
	return f"{base.capitalize()}\nList"
	if datatype == "mediacontent":
	return "MediaContent"
	return datatype.capitalize()


	def format_datatype_table_label(datatype):
	if datatype == "struct":
	return "NumericStruct"
	if datatype == "structlist":
	return "NumericStructList"
	if datatype.endswith("list"):
	base = datatype[: -len("list")]
	if base == "mediacontent":
	return "MediaContentList"
	return f"{base.capitalize()}List"
	if datatype == "mediacontent":
	return "MediaContent"
	return datatype.capitalize()


	with open(args.json_file, "r", encoding="utf-8") as handle:
	benchmark_data = json.load(handle)

	data = defaultdict(lambda: defaultdict(dict))
	sizes = {}

	for bench in benchmark_data.get("benchmarks", []):
	name = bench.get("name", "")
	if "PrintSerializedSizes" in name:
	for key, value in bench.items():
	if key.endswith("_size"):
	sizes[key] = int(value)
	continue
	serializer, datatype, operation = parse_benchmark_name(name)
	if serializer and datatype and operation:
	time_ns = bench.get("real_time", bench.get("cpu_time", 0))
	data[datatype][operation][serializer] = time_ns

	system_info = get_system_info()
	context = benchmark_data.get("context", {})
	if context.get("date"):
	system_info["Benchmark Date"] = context["date"]
	if context.get("num_cpus"):
	system_info["CPU Cores (from benchmark)"] = context["num_cpus"]
	if context.get("node_version"):
	system_info["Node.js"] = context["node_version"]
	if context.get("v8_version"):
	system_info["V8"] = context["v8_version"]


	def format_tps_tick(tps, _position):
	return format_throughput_tick(tps, _position)


	datatypes = [datatype for datatype in DATATYPE_ORDER if datatype in data]
	operations = ["serialize", "deserialize"]


	def plot_throughput_grid_subplot(ax, datatype):
	if datatype not in data:
	ax.set_title(f"{format_datatype_table_label(datatype)}\nNo Data")
	ax.axis("off")
	return

	available_libs = [
	lib
	for lib in SERIALIZER_ORDER
	if any(
	data[datatype][operation].get(lib, 0) > 0
	for operation in ["serialize", "deserialize"]
	)
	]
	if not available_libs:
	ax.set_title(f"{format_datatype_table_label(datatype)}\nNo Data")
	ax.axis("off")
	return

	operations = ["serialize", "deserialize"]
	x = GROUP_X
	for idx, lib in enumerate(available_libs):
	times = [data[datatype][operation].get(lib, 0) for operation in operations]
	throughput = [1e9 / value if value > 0 else 0 for value in times]
	offset = serializer_offset(idx, len(available_libs))
	ax.bar(
	x + offset,
	throughput,
	GROUP_BAR_WIDTH,
	label=SERIALIZER_LABELS[lib],
	color=COLORS[lib],
	edgecolor=BAR_EDGE_COLOR,
	linewidth=0.8,
	)

	max_tps = max(
	1e9 / data[datatype][operation][lib]
	for operation in operations
	for lib in available_libs
	if data[datatype][operation].get(lib, 0) > 0
	)
	ax.set_ylim(0, max_tps * 1.12)
	ax.set_title(format_datatype_table_label(datatype), pad=8)
	set_grouped_operation_axis(ax)
	style_throughput_axis(ax)
	ax.yaxis.set_major_formatter(FuncFormatter(format_tps_tick))
	add_compact_legend(ax)


	fig, axes = plt.subplots(2, 3, figsize=(16.5, 9.0))
	for index, (ax, datatype) in enumerate(zip(axes.flat, DATATYPE_ORDER)):
	plot_throughput_grid_subplot(ax, datatype)
	if index % 3 == 0:
	ax.set_ylabel("Throughput (ops/sec)", labelpad=10)
	fig.suptitle(
	"JavaScript Serialization Throughput", fontsize=15, fontweight="normal", y=0.955
	)
	fig.tight_layout(rect=[0.02, 0.02, 0.995, 0.965], w_pad=1.2, h_pad=1.25)
	combined_plot_path = os.path.join(output_dir, "throughput.png")
	save_benchmark_figure(fig, combined_plot_path)
	plt.close()

	md_report = [
	"# JavaScript Benchmark Performance Report\n\n",
	f"_Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_\n\n",
	"## How to Generate This Report\n\n",
	"```bash\n",
	"cd benchmarks/javascript\n",
	"./run.sh\n",
	"```\n\n",
	"## Benchmark Semantics\n\n",
	"The timed serializer loops use serializer-native typed values. Fory receives "
	"the pre-normalized Fory value used by its schema, protobuf receives the "
	"prebuilt protobuf-shaped value, and JSON receives the benchmark JavaScript "
	"object. Protobuf timings do not include `toProto`, `fromProto`, "
	"`protobufjs.create`, or `toObject` conversion work.\n\n",
	"## Benchmark Plot\n\n",
	"The plot shows throughput (ops/sec); higher is better.\n\n",
	f"![Throughput]({args.plot_prefix}throughput.png)\n\n",
	"## Hardware & OS Info\n\n",
	"\| Key \| Value \|\n",
	"\|-----\|-------\|\n",
	]

	for key, value in system_info.items():
	md_report.append(f"\| {key} \| {value} \|\n")

	md_report.append("\n## Benchmark Results\n\n")
	md_report.append("### Timing Results (nanoseconds)\n\n")
	md_report.append(
	"\| Datatype \| Operation \| fory (ns) \| protobuf (ns) \| json (ns) \| Fastest \|\n"
	)
	md_report.append(
	"\|----------\|-----------\|-----------\|---------------\|-----------\|---------\|\n"
	)

	for datatype in datatypes:
	for operation in operations:
	times = {lib: data[datatype][operation].get(lib, 0) for lib in SERIALIZER_ORDER}
	valid = {lib: value for lib, value in times.items() if value > 0}
	fastest = min(valid, key=valid.get) if valid else None
	md_report.append(
	"\| "
	+ f"{format_datatype_table_label(datatype)} \| {operation.capitalize()} \| "
	+ " \| ".join(
	f"{times[lib]:.1f}" if times[lib] > 0 else "N/A"
	for lib in SERIALIZER_ORDER
	)
	+ f" \| {SERIALIZER_LABELS[fastest] if fastest else 'N/A'} \|\n"
	)

	md_report.append("\n### Throughput Results (ops/sec)\n\n")
	md_report.append(
	"\| Datatype \| Operation \| fory TPS \| protobuf TPS \| json TPS \| Fastest \|\n"
	)
	md_report.append(
	"\|----------\|-----------\|----------\|--------------\|----------\|---------\|\n"
	)

	for datatype in datatypes:
	for operation in operations:
	times = {lib: data[datatype][operation].get(lib, 0) for lib in SERIALIZER_ORDER}
	tps = {lib: (1e9 / value if value > 0 else 0) for lib, value in times.items()}
	valid = {lib: value for lib, value in tps.items() if value > 0}
	fastest = max(valid, key=valid.get) if valid else None
	md_report.append(
	"\| "
	+ f"{format_datatype_table_label(datatype)} \| {operation.capitalize()} \| "
	+ " \| ".join(
	f"{tps[lib]:,.0f}" if tps[lib] > 0 else "N/A"
	for lib in SERIALIZER_ORDER
	)
	+ f" \| {SERIALIZER_LABELS[fastest] if fastest else 'N/A'} \|\n"
	)

	if sizes:
	md_report.append("\n### Serialized Data Sizes (bytes)\n\n")
	md_report.append("\| Datatype \| fory \| protobuf \| json \|\n")
	md_report.append("\|----------\|------\|----------\|------\|\n")
	size_datatypes = [
	("struct", "NumericStruct"),
	("sample", "Sample"),
	("media", "MediaContent"),
	("struct_list", "NumericStructList"),
	("sample_list", "SampleList"),
	("media_list", "MediaContentList"),
	]
	for datatype_key, datatype_label in size_datatypes:
	row = []
	has_value = False
	for serializer in SERIALIZER_ORDER:
	value = sizes.get(f"{serializer}_{datatype_key}_size")
	if value is None:
	row.append("N/A")
	else:
	row.append(str(value))
	has_value = True
	if has_value:
	md_report.append(f"\| {datatype_label} \| " + " \| ".join(row) + " \|\n")

	report_path = os.path.join(output_dir, "README.md")
	with open(report_path, "w", encoding="utf-8") as handle:
	handle.writelines(md_report)

	format_markdown_with_prettier(report_path)

	print(f"Plots saved in: {output_dir}")
	print(f"Markdown report generated at: {report_path}")