| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import argparse |
| import json |
| import os |
| import platform |
| import sys |
| from collections import defaultdict |
| from datetime import datetime |
| from pathlib import Path |
| |
| import matplotlib.pyplot as plt |
| from matplotlib.ticker import FuncFormatter |
| |
| sys.path.insert(0, str(Path(__file__).resolve().parents[1])) |
| from plot_style import ( # noqa: E402 |
| BAR_EDGE_COLOR, |
| GROUP_BAR_WIDTH, |
| GROUP_X, |
| add_compact_legend, |
| apply_benchmark_style, |
| format_markdown_with_prettier, |
| format_throughput_tick, |
| save_benchmark_figure, |
| serializer_offset, |
| set_grouped_operation_axis, |
| style_throughput_axis, |
| ) |
| |
| apply_benchmark_style(plt) |
| |
| try: |
| import psutil |
| |
| HAS_PSUTIL = True |
| except ImportError: |
| HAS_PSUTIL = False |
| |
| COLORS = { |
| "fory": "#FF6F01", |
| "protobuf": "#55BCC2", |
| "json": (0.55, 0.40, 0.45), |
| } |
| SERIALIZER_ORDER = ["fory", "protobuf", "json"] |
| SERIALIZER_LABELS = { |
| "fory": "fory", |
| "protobuf": "protobuf", |
| "json": "json", |
| } |
| DATATYPE_ORDER = [ |
| "struct", |
| "sample", |
| "mediacontent", |
| "structlist", |
| "samplelist", |
| "mediacontentlist", |
| ] |
| |
| parser = argparse.ArgumentParser( |
| description="Generate plots and Markdown report for JavaScript benchmark results" |
| ) |
| parser.add_argument( |
| "--json-file", default="benchmark_results.json", help="Benchmark JSON output file" |
| ) |
| parser.add_argument( |
| "--output-dir", |
| default="", |
| help="Output directory for plots and report", |
| ) |
| parser.add_argument( |
| "--plot-prefix", default="", help="Image path prefix in Markdown report" |
| ) |
| args = parser.parse_args() |
| |
| output_dir = args.output_dir.strip() or datetime.now().strftime("%Y_%m_%d_%H_%M_%S") |
| os.makedirs(output_dir, exist_ok=True) |
| |
| |
| def get_system_info(): |
| info = { |
| "OS": f"{platform.system()} {platform.release()}", |
| "Machine": platform.machine(), |
| "Processor": platform.processor() or "Unknown", |
| } |
| if HAS_PSUTIL: |
| info["CPU Cores (Physical)"] = psutil.cpu_count(logical=False) |
| info["CPU Cores (Logical)"] = psutil.cpu_count(logical=True) |
| info["Total RAM (GB)"] = round(psutil.virtual_memory().total / (1024**3), 2) |
| return info |
| |
| |
| def parse_benchmark_name(name): |
| if name.startswith("BM_"): |
| name = name[3:] |
| parts = name.split("_") |
| if len(parts) >= 3: |
| datatype = parts[1].lower() |
| if datatype == "numericstruct": |
| datatype = "struct" |
| elif datatype == "numericstructlist": |
| datatype = "structlist" |
| return parts[0].lower(), datatype, parts[2].lower() |
| return None, None, None |
| |
| |
| def format_datatype_label(datatype): |
| if datatype == "struct": |
| return "NumericStruct" |
| if datatype == "structlist": |
| return "NumericStruct\nList" |
| if datatype.endswith("list"): |
| base = datatype[: -len("list")] |
| if base == "mediacontent": |
| return "MediaContent\nList" |
| return f"{base.capitalize()}\nList" |
| if datatype == "mediacontent": |
| return "MediaContent" |
| return datatype.capitalize() |
| |
| |
| def format_datatype_table_label(datatype): |
| if datatype == "struct": |
| return "NumericStruct" |
| if datatype == "structlist": |
| return "NumericStructList" |
| if datatype.endswith("list"): |
| base = datatype[: -len("list")] |
| if base == "mediacontent": |
| return "MediaContentList" |
| return f"{base.capitalize()}List" |
| if datatype == "mediacontent": |
| return "MediaContent" |
| return datatype.capitalize() |
| |
| |
| with open(args.json_file, "r", encoding="utf-8") as handle: |
| benchmark_data = json.load(handle) |
| |
| data = defaultdict(lambda: defaultdict(dict)) |
| sizes = {} |
| |
| for bench in benchmark_data.get("benchmarks", []): |
| name = bench.get("name", "") |
| if "PrintSerializedSizes" in name: |
| for key, value in bench.items(): |
| if key.endswith("_size"): |
| sizes[key] = int(value) |
| continue |
| serializer, datatype, operation = parse_benchmark_name(name) |
| if serializer and datatype and operation: |
| time_ns = bench.get("real_time", bench.get("cpu_time", 0)) |
| data[datatype][operation][serializer] = time_ns |
| |
| system_info = get_system_info() |
| context = benchmark_data.get("context", {}) |
| if context.get("date"): |
| system_info["Benchmark Date"] = context["date"] |
| if context.get("num_cpus"): |
| system_info["CPU Cores (from benchmark)"] = context["num_cpus"] |
| if context.get("node_version"): |
| system_info["Node.js"] = context["node_version"] |
| if context.get("v8_version"): |
| system_info["V8"] = context["v8_version"] |
| |
| |
| def format_tps_tick(tps, _position): |
| return format_throughput_tick(tps, _position) |
| |
| |
| datatypes = [datatype for datatype in DATATYPE_ORDER if datatype in data] |
| operations = ["serialize", "deserialize"] |
| |
| |
| def plot_throughput_grid_subplot(ax, datatype): |
| if datatype not in data: |
| ax.set_title(f"{format_datatype_table_label(datatype)}\nNo Data") |
| ax.axis("off") |
| return |
| |
| available_libs = [ |
| lib |
| for lib in SERIALIZER_ORDER |
| if any( |
| data[datatype][operation].get(lib, 0) > 0 |
| for operation in ["serialize", "deserialize"] |
| ) |
| ] |
| if not available_libs: |
| ax.set_title(f"{format_datatype_table_label(datatype)}\nNo Data") |
| ax.axis("off") |
| return |
| |
| operations = ["serialize", "deserialize"] |
| x = GROUP_X |
| for idx, lib in enumerate(available_libs): |
| times = [data[datatype][operation].get(lib, 0) for operation in operations] |
| throughput = [1e9 / value if value > 0 else 0 for value in times] |
| offset = serializer_offset(idx, len(available_libs)) |
| ax.bar( |
| x + offset, |
| throughput, |
| GROUP_BAR_WIDTH, |
| label=SERIALIZER_LABELS[lib], |
| color=COLORS[lib], |
| edgecolor=BAR_EDGE_COLOR, |
| linewidth=0.8, |
| ) |
| |
| max_tps = max( |
| 1e9 / data[datatype][operation][lib] |
| for operation in operations |
| for lib in available_libs |
| if data[datatype][operation].get(lib, 0) > 0 |
| ) |
| ax.set_ylim(0, max_tps * 1.12) |
| ax.set_title(format_datatype_table_label(datatype), pad=8) |
| set_grouped_operation_axis(ax) |
| style_throughput_axis(ax) |
| ax.yaxis.set_major_formatter(FuncFormatter(format_tps_tick)) |
| add_compact_legend(ax) |
| |
| |
| fig, axes = plt.subplots(2, 3, figsize=(16.5, 9.0)) |
| for index, (ax, datatype) in enumerate(zip(axes.flat, DATATYPE_ORDER)): |
| plot_throughput_grid_subplot(ax, datatype) |
| if index % 3 == 0: |
| ax.set_ylabel("Throughput (ops/sec)", labelpad=10) |
| fig.suptitle( |
| "JavaScript Serialization Throughput", fontsize=15, fontweight="normal", y=0.955 |
| ) |
| fig.tight_layout(rect=[0.02, 0.02, 0.995, 0.965], w_pad=1.2, h_pad=1.25) |
| combined_plot_path = os.path.join(output_dir, "throughput.png") |
| save_benchmark_figure(fig, combined_plot_path) |
| plt.close() |
| |
| md_report = [ |
| "# JavaScript Benchmark Performance Report\n\n", |
| f"_Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_\n\n", |
| "## How to Generate This Report\n\n", |
| "```bash\n", |
| "cd benchmarks/javascript\n", |
| "./run.sh\n", |
| "```\n\n", |
| "## Benchmark Semantics\n\n", |
| "The timed serializer loops use serializer-native typed values. Fory receives " |
| "the pre-normalized Fory value used by its schema, protobuf receives the " |
| "prebuilt protobuf-shaped value, and JSON receives the benchmark JavaScript " |
| "object. Protobuf timings do not include `toProto`, `fromProto`, " |
| "`protobufjs.create`, or `toObject` conversion work.\n\n", |
| "## Benchmark Plot\n\n", |
| "The plot shows throughput (ops/sec); higher is better.\n\n", |
| f"\n\n", |
| "## Hardware & OS Info\n\n", |
| "| Key | Value |\n", |
| "|-----|-------|\n", |
| ] |
| |
| for key, value in system_info.items(): |
| md_report.append(f"| {key} | {value} |\n") |
| |
| md_report.append("\n## Benchmark Results\n\n") |
| md_report.append("### Timing Results (nanoseconds)\n\n") |
| md_report.append( |
| "| Datatype | Operation | fory (ns) | protobuf (ns) | json (ns) | Fastest |\n" |
| ) |
| md_report.append( |
| "|----------|-----------|-----------|---------------|-----------|---------|\n" |
| ) |
| |
| for datatype in datatypes: |
| for operation in operations: |
| times = {lib: data[datatype][operation].get(lib, 0) for lib in SERIALIZER_ORDER} |
| valid = {lib: value for lib, value in times.items() if value > 0} |
| fastest = min(valid, key=valid.get) if valid else None |
| md_report.append( |
| "| " |
| + f"{format_datatype_table_label(datatype)} | {operation.capitalize()} | " |
| + " | ".join( |
| f"{times[lib]:.1f}" if times[lib] > 0 else "N/A" |
| for lib in SERIALIZER_ORDER |
| ) |
| + f" | {SERIALIZER_LABELS[fastest] if fastest else 'N/A'} |\n" |
| ) |
| |
| md_report.append("\n### Throughput Results (ops/sec)\n\n") |
| md_report.append( |
| "| Datatype | Operation | fory TPS | protobuf TPS | json TPS | Fastest |\n" |
| ) |
| md_report.append( |
| "|----------|-----------|----------|--------------|----------|---------|\n" |
| ) |
| |
| for datatype in datatypes: |
| for operation in operations: |
| times = {lib: data[datatype][operation].get(lib, 0) for lib in SERIALIZER_ORDER} |
| tps = {lib: (1e9 / value if value > 0 else 0) for lib, value in times.items()} |
| valid = {lib: value for lib, value in tps.items() if value > 0} |
| fastest = max(valid, key=valid.get) if valid else None |
| md_report.append( |
| "| " |
| + f"{format_datatype_table_label(datatype)} | {operation.capitalize()} | " |
| + " | ".join( |
| f"{tps[lib]:,.0f}" if tps[lib] > 0 else "N/A" |
| for lib in SERIALIZER_ORDER |
| ) |
| + f" | {SERIALIZER_LABELS[fastest] if fastest else 'N/A'} |\n" |
| ) |
| |
| if sizes: |
| md_report.append("\n### Serialized Data Sizes (bytes)\n\n") |
| md_report.append("| Datatype | fory | protobuf | json |\n") |
| md_report.append("|----------|------|----------|------|\n") |
| size_datatypes = [ |
| ("struct", "NumericStruct"), |
| ("sample", "Sample"), |
| ("media", "MediaContent"), |
| ("struct_list", "NumericStructList"), |
| ("sample_list", "SampleList"), |
| ("media_list", "MediaContentList"), |
| ] |
| for datatype_key, datatype_label in size_datatypes: |
| row = [] |
| has_value = False |
| for serializer in SERIALIZER_ORDER: |
| value = sizes.get(f"{serializer}_{datatype_key}_size") |
| if value is None: |
| row.append("N/A") |
| else: |
| row.append(str(value)) |
| has_value = True |
| if has_value: |
| md_report.append(f"| {datatype_label} | " + " | ".join(row) + " |\n") |
| |
| report_path = os.path.join(output_dir, "README.md") |
| with open(report_path, "w", encoding="utf-8") as handle: |
| handle.writelines(md_report) |
| |
| format_markdown_with_prettier(report_path) |
| |
| print(f"Plots saved in: {output_dir}") |
| print(f"Markdown report generated at: {report_path}") |