blob: 6fe360993ab8be0cccacd2ffac268f58cea4133d [file]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Generate plots and a markdown report from Swift benchmark JSON output."""
from __future__ import annotations
import argparse
import json
import os
import sys
from collections import defaultdict
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from plot_style import ( # noqa: E402
BAR_EDGE_COLOR,
GROUP_BAR_WIDTH,
GROUP_X,
add_compact_legend,
apply_benchmark_style,
format_markdown_with_prettier,
format_throughput_label,
format_throughput_tick,
save_benchmark_figure,
serializer_offset,
set_grouped_operation_axis,
style_throughput_axis,
)
apply_benchmark_style(plt)
SERIALIZER_ORDER = ["fory", "protobuf", "json"]
COLORS = {
"fory": "#FF6f01",
"protobuf": "#55BCC2",
"json": "#8C6F6D",
}
DATATYPE_ORDER = [
"struct",
"sample",
"mediacontent",
"structlist",
"samplelist",
"mediacontentlist",
]
OPERATIONS = ["serialize", "deserialize"]
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Generate report for Swift benchmark results"
)
parser.add_argument(
"--json-file",
default="results/benchmark_results.json",
help="Benchmark JSON output file",
)
parser.add_argument(
"--output-dir",
default="results",
help="Directory for report output",
)
parser.add_argument(
"--plot-prefix",
default="",
help="Prefix for image paths in markdown report",
)
return parser.parse_args()
def load_json(path: str) -> dict:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def normalize_datatype(datatype: str) -> str:
key = datatype.lower()
if key == "numericstruct":
return "struct"
if key == "numericstructlist":
return "structlist"
return key
def datatype_title(datatype: str) -> str:
datatype = normalize_datatype(datatype)
if datatype == "struct":
return "NumericStruct"
if datatype == "structlist":
return "NumericStructList"
if datatype == "mediacontent":
return "MediaContent"
if datatype == "mediacontentlist":
return "MediaContentList"
if datatype.endswith("list"):
return f"{datatype[:-4].capitalize()}List"
return datatype.capitalize()
def datatype_plot_label(datatype: str) -> str:
datatype = normalize_datatype(datatype)
if datatype == "struct":
return "NumericStruct"
if datatype == "structlist":
return "NumericStruct\nList"
if datatype == "mediacontent":
return "MediaContent"
if datatype == "mediacontentlist":
return "MediaContent\nList"
if datatype.endswith("list"):
return f"{datatype[:-4].capitalize()}\nList"
return datatype.capitalize()
def format_tps(value) -> str:
return f"{value:,.0f}" if value is not None and value > 0 else "N/A"
def format_size(value) -> str:
return str(value) if isinstance(value, int) and value >= 0 else "N/A"
def format_tps_label(value: float) -> str:
return format_throughput_label(value)
def format_tps_tick(value: float, _position) -> str:
return format_throughput_tick(value, _position)
def collect_results(payload: dict) -> dict:
results: dict = defaultdict(lambda: defaultdict(dict))
for bench in payload.get("benchmarks", []):
serializer = bench.get("serializer", "")
datatype = bench.get("dataType", "")
operation = bench.get("operation", "")
ops = float(bench.get("opsPerSec", 0.0))
if serializer and datatype and operation:
results[datatype][operation][serializer] = ops
return results
def plot_group(ax, results: dict, datatype: str) -> None:
if datatype not in results:
ax.set_title(f"{datatype_title(datatype)}\nNo Data")
ax.axis("off")
return
available_serializers = [
serializer
for serializer in SERIALIZER_ORDER
if any(
results.get(datatype, {}).get(operation, {}).get(serializer, 0.0) > 0
for operation in OPERATIONS
)
]
if not available_serializers:
ax.set_title(f"{datatype_title(datatype)}\nNo Data")
ax.axis("off")
return
x = GROUP_X
for index, serializer in enumerate(available_serializers):
values = [
results.get(datatype, {}).get(operation, {}).get(serializer, 0.0)
for operation in OPERATIONS
]
offset = serializer_offset(index, len(available_serializers))
ax.bar(
x + offset,
values,
width=GROUP_BAR_WIDTH,
label=serializer,
color=COLORS.get(serializer, "#888888"),
edgecolor=BAR_EDGE_COLOR,
linewidth=0.8,
)
max_value = max(
results.get(datatype, {}).get(operation, {}).get(serializer, 0.0)
for operation in OPERATIONS
for serializer in available_serializers
)
ax.set_ylim(0, max_value * 1.12)
ax.set_title(datatype_title(datatype), pad=8)
set_grouped_operation_axis(ax)
style_throughput_axis(ax)
ax.yaxis.set_major_formatter(FuncFormatter(format_tps_tick))
add_compact_legend(ax)
def render_plot(results: dict, output_dir: str) -> str:
fig, axes = plt.subplots(2, 3, figsize=(16.5, 9.0))
fig.suptitle(
"Swift Serialization Throughput", fontsize=15, fontweight="normal", y=0.955
)
for index, (ax, datatype) in enumerate(zip(axes.flat, DATATYPE_ORDER)):
plot_group(ax, results, datatype)
if index % 3 == 0:
ax.set_ylabel("Throughput (ops/sec)", labelpad=10)
fig.tight_layout(rect=[0.02, 0.02, 0.995, 0.965], w_pad=1.2, h_pad=1.25)
output_path = os.path.join(output_dir, "throughput.png")
save_benchmark_figure(fig, output_path)
plt.close(fig)
return output_path
def winner_cell(throughputs: dict) -> str:
rows = [
(serializer, value) for serializer, value in throughputs.items() if value > 0
]
if not rows:
return "-"
rows.sort(key=lambda pair: pair[1], reverse=True)
best_serializer, best_value = rows[0]
if len(rows) == 1:
return f"{best_serializer}"
ratio = best_value / rows[1][1] if rows[1][1] > 0 else 0
return f"{best_serializer} ({ratio:.2f}x)"
def write_report(
payload: dict,
results: dict,
throughput_plot: str,
output_dir: str,
plot_prefix: str,
) -> str:
context = payload.get("context", {})
sizes = payload.get("serializedSizes", [])
lines: list[str] = []
lines.append("# Fory Swift Benchmark")
lines.append("")
lines.append(
"This benchmark compares serialization and deserialization throughput for "
"Apache Fory, Protocol Buffers, and JSON in Swift."
)
lines.append("")
lines.append("## Throughput Plot")
lines.append("")
plot_name = os.path.basename(throughput_plot)
if plot_prefix:
image_path = f"{plot_prefix.rstrip('/')}/{plot_name}"
else:
image_path = plot_name
lines.append(f"![Throughput]({image_path})")
lines.append("")
lines.append("## Hardware and Runtime Info")
lines.append("")
lines.append("| Key | Value |")
lines.append("| --- | --- |")
lines.append(f"| Timestamp | {context.get('timestamp', '-')} |")
lines.append(f"| OS | {context.get('os', '-')} |")
lines.append(f"| Host | {context.get('host', '-')} |")
lines.append(f"| CPU Cores (Logical) | {context.get('cpuCoresLogical', '-')} |")
memory = context.get("memoryGB")
memory_str = f"{memory:.2f}" if isinstance(memory, (int, float)) else "-"
lines.append(f"| Memory (GB) | {memory_str} |")
lines.append(f"| Duration per case (s) | {context.get('durationSeconds', '-')} |")
lines.append("")
lines.append("## Throughput Results")
lines.append("")
lines.append(
"| Datatype | Operation | Fory TPS | Protobuf TPS | JSON TPS | Fastest |"
)
lines.append("| --- | --- | ---: | ---: | ---: | --- |")
for datatype in DATATYPE_ORDER:
if datatype not in results:
continue
for operation in OPERATIONS:
throughputs = results.get(datatype, {}).get(operation, {})
fory = throughputs.get("fory")
protobuf = throughputs.get("protobuf")
json_tps = throughputs.get("json")
lines.append(
"| "
+ f"{datatype_title(datatype)} | {operation.capitalize()} | "
+ f"{format_tps(fory)} | {format_tps(protobuf)} | {format_tps(json_tps)} | "
+ f"{winner_cell(throughputs)} |"
)
lines.append("")
lines.append("## Serialized Size (bytes)")
lines.append("")
lines.append("| Datatype | Fory | Protobuf | JSON |")
lines.append("| --- | ---: | ---: | ---: |")
sizes_by_datatype = {
normalize_datatype(str(entry.get("dataType", ""))): entry for entry in sizes
}
for datatype in DATATYPE_ORDER:
entry = sizes_by_datatype.get(datatype)
if entry is None:
continue
datatype_label = datatype_title(datatype)
lines.append(
"| "
+ f"{datatype_label} | "
+ f"{format_size(entry.get('fory'))} | "
+ f"{format_size(entry.get('protobuf'))} | "
+ f"{format_size(entry.get('json'))} |"
)
report_path = os.path.join(output_dir, "README.md")
legacy_report_path = os.path.join(output_dir, "REPORT.md")
report_text = "\n".join(lines) + "\n"
with open(report_path, "w", encoding="utf-8") as f:
f.write(report_text)
with open(legacy_report_path, "w", encoding="utf-8") as f:
f.write(report_text)
format_markdown_with_prettier(report_path, legacy_report_path)
return report_path
def main() -> int:
args = parse_args()
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
payload = load_json(args.json_file)
results = collect_results(payload)
throughput_plot = render_plot(results, args.output_dir)
report = write_report(
payload, results, throughput_plot, args.output_dir, args.plot_prefix
)
print(f"Generated report: {report}")
print(f"Generated plot: {throughput_plot}")
return 0
if __name__ == "__main__":
raise SystemExit(main())