blob: eb0327a6bfbb0c4de1fad9dcacf1194de7010b2b [file]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Generate a Java xlang throughput plot from JMH JSON output."""
from __future__ import annotations
import argparse
import json
import os
import re
import shutil
import sys
from collections import defaultdict
from pathlib import Path
from typing import Any
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from plot_style import ( # noqa: E402
BAR_EDGE_COLOR,
GROUP_BAR_WIDTH,
GROUP_X,
add_compact_legend,
apply_benchmark_style,
format_markdown_with_prettier,
format_throughput_tick,
save_benchmark_figure,
serializer_offset,
set_grouped_operation_axis,
style_throughput_axis,
)
apply_benchmark_style(plt)
SERIALIZER_ORDER = ["fory-codegen=true", "fory-codegen=false", "protobuf", "flatbuffer"]
COLORS = {
"fory-codegen=true": "#FF6f01",
"fory-codegen=false": "#C94700",
"protobuf": "#55BCC2",
"flatbuffer": (0.55, 0.40, 0.45),
}
DATATYPE_ORDER = [
"struct",
"sample",
"mediacontent",
"structlist",
"samplelist",
"mediacontentlist",
]
OPERATIONS = ["serialize", "deserialize"]
BENCHMARK_PATTERN = re.compile(
r"(?:^|[.])BM_(?P<serializer>Fory|Protobuf|Flatbuffer)_"
r"(?P<datatype>NumericStruct|Sample|MediaContent|NumericStructList|SampleList|MediaContentList)_"
r"(?P<operation>Serialize|Deserialize)$"
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Generate throughput.png for Java xlang benchmark results"
)
parser.add_argument(
"--json-file",
default="reports/benchmark_results.json",
help="JMH JSON output file",
)
parser.add_argument(
"--output-dir",
default="reports",
help="Local directory for throughput.png",
)
parser.add_argument(
"--docs-output-dir",
default=None,
help="Optional docs directory to receive only a copied throughput.png",
)
return parser.parse_args()
def load_json(path: str) -> Any:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def datatype_key(name: str) -> str:
key = name.lower()
if key == "numericstruct":
return "struct"
if key == "numericstructlist":
return "structlist"
return key
def datatype_title(datatype: str) -> str:
if datatype == "struct":
return "NumericStruct"
if datatype == "structlist":
return "NumericStructList"
if datatype == "mediacontent":
return "MediaContent"
if datatype == "mediacontentlist":
return "MediaContentList"
if datatype.endswith("list"):
return f"{datatype[:-4].capitalize()}List"
return datatype.capitalize()
def score_to_ops_per_sec(score: float, unit: str) -> float:
if unit == "ops/s":
return score
if unit == "ops/ms":
return score * 1_000
if unit == "ops/us":
return score * 1_000_000
if unit == "ops/ns":
return score * 1_000_000_000
return score
def collect_results(payload: Any) -> dict:
results: dict = defaultdict(lambda: defaultdict(dict))
benchmarks = payload if isinstance(payload, list) else payload.get("benchmarks", [])
for bench in benchmarks:
benchmark_name = bench.get("benchmark") or bench.get("name", "")
match = BENCHMARK_PATTERN.search(benchmark_name)
if match is None:
continue
metric = bench.get("primaryMetric", {})
score = float(metric.get("score", bench.get("opsPerSec", 0.0)))
unit = metric.get("scoreUnit", "ops/s")
serializer = match.group("serializer").lower()
if serializer == "fory":
codegen = str(bench.get("params", {}).get("codegen", "unknown")).lower()
serializer = f"fory-codegen={codegen}"
datatype = datatype_key(match.group("datatype"))
operation = match.group("operation").lower()
results[datatype][operation][serializer] = score_to_ops_per_sec(score, unit)
return results
def format_tps(value: float, _position) -> str:
return format_throughput_tick(value, _position)
def plot_group(ax, results: dict, datatype: str) -> None:
if datatype not in results:
ax.set_title(f"{datatype_title(datatype)}\nNo Data")
ax.axis("off")
return
serializers = [
serializer
for serializer in SERIALIZER_ORDER
if any(
results.get(datatype, {}).get(operation, {}).get(serializer, 0.0) > 0
for operation in OPERATIONS
)
]
if not serializers:
ax.set_title(f"{datatype_title(datatype)}\nNo Data")
ax.axis("off")
return
x = GROUP_X
for index, serializer in enumerate(serializers):
values = [
results.get(datatype, {}).get(operation, {}).get(serializer, 0.0)
for operation in OPERATIONS
]
offset = serializer_offset(index, len(serializers))
ax.bar(
x + offset,
values,
width=GROUP_BAR_WIDTH,
label=serializer,
color=COLORS.get(serializer, "#888888"),
edgecolor=BAR_EDGE_COLOR,
linewidth=0.8,
)
max_value = max(
results.get(datatype, {}).get(operation, {}).get(serializer, 0.0)
for operation in OPERATIONS
for serializer in serializers
)
ax.set_ylim(0, max_value * 1.12)
ax.set_title(datatype_title(datatype), fontweight="normal", pad=8)
set_grouped_operation_axis(ax)
style_throughput_axis(ax)
ax.yaxis.set_major_formatter(FuncFormatter(format_tps))
add_compact_legend(ax)
def render_plot(results: dict, output_dir: str) -> str:
fig, axes = plt.subplots(2, 3, figsize=(16.5, 9.0))
fig.suptitle(
"Java Xlang Serialization Throughput",
fontsize=15,
fontweight="normal",
y=0.955,
)
for index, (ax, datatype) in enumerate(zip(axes.flat, DATATYPE_ORDER)):
plot_group(ax, results, datatype)
if index % 3 == 0:
ax.set_ylabel("Throughput (ops/sec)", labelpad=10)
fig.tight_layout(rect=[0.02, 0.02, 0.995, 0.965], w_pad=1.2, h_pad=1.25)
output_path = os.path.join(output_dir, "throughput.png")
save_benchmark_figure(fig, output_path)
plt.close(fig)
return output_path
def format_table_value(value: float) -> str:
return f"{value:,.0f}" if value > 0 else "N/A"
def serializer_title(serializer: str) -> str:
if serializer.startswith("fory-codegen="):
return "Fory " + serializer[len("fory-") :]
return serializer.capitalize()
def winner_cell(values: dict) -> str:
positive = {name: value for name, value in values.items() if value > 0}
if not positive:
return "N/A"
winner = max(positive, key=positive.get)
return serializer_title(winner)
def build_xlang_section(results: dict, image_name: str) -> str:
lines = [
"## Xlang Benchmark\n\n",
"Run from `benchmarks/java/run.sh`. Raw JMH JSON stays under the ignored local "
"`benchmarks/java/reports/` directory; `throughput.png` and this xlang "
"section are synced into `docs/benchmarks/java/`.\n\n",
"```bash\n",
"cd benchmarks/java\n",
"./run.sh\n",
"```\n\n",
"JMH parameters: `-f 1 -wi 3 -i 3 -t 1 -w 3s -r 3s -bm thrpt -tu s`. "
"Higher throughput is better.\n\n",
f"![Java Xlang Serialization Throughput]({image_name})\n\n",
"| Data type | Operation | "
+ " | ".join(
f"{serializer_title(serializer)} ops/sec" for serializer in SERIALIZER_ORDER
)
+ " | Fastest |\n",
"|-----------|-----------|"
+ "|".join("---:" for _ in SERIALIZER_ORDER)
+ "|---------|\n",
]
for datatype in DATATYPE_ORDER:
for operation in OPERATIONS:
values = {
serializer: results.get(datatype, {})
.get(operation, {})
.get(serializer, 0.0)
for serializer in SERIALIZER_ORDER
}
lines.append(
f"| {datatype_title(datatype)} | {operation.capitalize()} | "
+ " | ".join(
format_table_value(values[serializer])
for serializer in SERIALIZER_ORDER
)
+ f" | {winner_cell(values)} |\n"
)
return "".join(lines)
def write_local_readme(output_dir: Path, section: str) -> Path:
report_path = output_dir / "README.md"
report_path.write_text(
"# Java Xlang Benchmark Report\n\n" + section, encoding="utf-8"
)
run_prettier(report_path)
return report_path
def update_docs_readme(docs_output_dir: Path, section: str) -> Path:
docs_readme = docs_output_dir / "README.md"
if docs_readme.exists():
content = docs_readme.read_text(encoding="utf-8").rstrip()
marker = "\n## Xlang Benchmark\n"
if marker in content:
prefix = content.split(marker, 1)[0].rstrip()
content = prefix + "\n\n" + section
else:
content = content + "\n\n" + section
else:
content = "# Java Benchmarks\n\n" + section
docs_readme.write_text(content.rstrip() + "\n", encoding="utf-8")
run_prettier(docs_readme)
return docs_readme
def run_prettier(path: Path) -> None:
format_markdown_with_prettier(path)
def main() -> None:
args = parse_args()
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
results = collect_results(load_json(args.json_file))
output_path = render_plot(results, str(output_dir))
section = build_xlang_section(results, os.path.basename(output_path))
report_path = write_local_readme(output_dir, section)
print(f"Generated {report_path}")
print(f"Generated {output_path}")
if args.docs_output_dir is not None:
docs_output_dir = Path(args.docs_output_dir)
docs_output_dir.mkdir(parents=True, exist_ok=True)
docs_output_path = docs_output_dir / "throughput.png"
shutil.copy2(output_path, docs_output_path)
print(f"Copied {docs_output_path}")
docs_readme = update_docs_readme(docs_output_dir, section)
print(f"Updated {docs_readme}")
if __name__ == "__main__":
main()