blob: 75e09b662e3e1b2be8ea38ae19130f18c83df8f6 [file] [log] [blame]
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Converts a given json to LineProtocol format that can be
visualised by grafana/other systems that support LineProtocol.
Usage example:
$ python3 lineprotocol.py sort.json
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=0,row_count=10838832,elapsed_ms=85626006 1691105678000000000
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=1,row_count=10838832,elapsed_ms=68694468 1691105678000000000
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=2,row_count=10838832,elapsed_ms=63392883 1691105678000000000
benchmark,name=sort,version=28.0.0,datafusion_version=28.0.0,num_cpus=8 query="sort utf8",iteration=3,row_count=10838832,elapsed_ms=66388367 1691105678000000000
"""
# sort.json
"""
{
"queries": [
{
"iterations": [
{
"elapsed": 85626.006132,
"row_count": 10838832
},
{
"elapsed": 68694.467851,
"row_count": 10838832
},
{
"elapsed": 63392.883406,
"row_count": 10838832
},
{
"elapsed": 66388.367387,
"row_count": 10838832
},
],
"query": "sort utf8",
"start_time": 1691105678
},
],
"context": {
"arguments": [
"sort",
"--path",
"benchmarks/data",
"--scale-factor",
"1.0",
"--iterations",
"4",
"-o",
"sort.json"
],
"benchmark_version": "28.0.0",
"datafusion_version": "28.0.0",
"num_cpus": 8,
"start_time": 1691105678
}
}
"""
from __future__ import annotations
import json
from dataclasses import dataclass
from typing import Dict, List, Any
from pathlib import Path
from argparse import ArgumentParser
import sys
print = sys.stdout.write
@dataclass
class QueryResult:
elapsed: float
row_count: int
@classmethod
def load_from(cls, data: Dict[str, Any]) -> QueryResult:
return cls(elapsed=data["elapsed"], row_count=data["row_count"])
@dataclass
class QueryRun:
query: int
iterations: List[QueryResult]
start_time: int
@classmethod
def load_from(cls, data: Dict[str, Any]) -> QueryRun:
return cls(
query=data["query"],
iterations=[QueryResult(**iteration) for iteration in data["iterations"]],
start_time=data["start_time"],
)
@property
def execution_time(self) -> float:
assert len(self.iterations) >= 1
# Use minimum execution time to account for variations / other
# things the system was doing
return min(iteration.elapsed for iteration in self.iterations)
@dataclass
class Context:
benchmark_version: str
datafusion_version: str
num_cpus: int
start_time: int
arguments: List[str]
name: str
@classmethod
def load_from(cls, data: Dict[str, Any]) -> Context:
return cls(
benchmark_version=data["benchmark_version"],
datafusion_version=data["datafusion_version"],
num_cpus=data["num_cpus"],
start_time=data["start_time"],
arguments=data["arguments"],
name=data["arguments"][0]
)
@dataclass
class BenchmarkRun:
context: Context
queries: List[QueryRun]
@classmethod
def load_from(cls, data: Dict[str, Any]) -> BenchmarkRun:
return cls(
context=Context.load_from(data["context"]),
queries=[QueryRun.load_from(result) for result in data["queries"]],
)
@classmethod
def load_from_file(cls, path: Path) -> BenchmarkRun:
with open(path, "r") as f:
return cls.load_from(json.load(f))
def lineformat(
baseline: Path,
) -> None:
baseline = BenchmarkRun.load_from_file(baseline)
context = baseline.context
benchamrk_str = f"benchmark,name={context.name},version={context.benchmark_version},datafusion_version={context.datafusion_version},num_cpus={context.num_cpus}"
for query in baseline.queries:
query_str = f"query=\"{query.query}\""
timestamp = f"{query.start_time*10**9}"
for iter_num, result in enumerate(query.iterations):
print(f"{benchamrk_str} {query_str},iteration={iter_num},row_count={result.row_count},elapsed_ms={result.elapsed*1000:.0f} {timestamp}\n")
def main() -> None:
parser = ArgumentParser()
parser.add_argument(
"path",
type=Path,
help="Path to the benchmark file.",
)
options = parser.parse_args()
lineformat(options.baseline_path)
if __name__ == "__main__":
main()