blob: c0cd70cc8cf46bf7327d629a8bbc3f1f88a4f245 [file]
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Simple helper script to create graphs based on multiple
# test_run.json files (it's a summary of the results of
# a single test_run which is
# stored in ~/.solr-orbit/results/<test_run_id>/).
# There is no specific integration into solr-orbit and it is also not
# installed with solr-orbit.
#
# It requires matplotlib (install with pip3 install matplotlib).
#
#
# Usage:
# python3 analyze.py [--label=LABEL] /path1/to/test_run.json /path2/to/test_run.json
#
# Output: A bunch of .png files in the current directory.
# Each graph shows one data series per test_run.
# The label key is chosen based on the
# command line parameter `--label`
#
import argparse
import json
import sys
try:
import matplotlib.pyplot as plt
except ImportError:
print("This script requires matplotlib. Please install with 'pip3 install matplotlib' and retry.", file=sys.stderr)
sys.exit(1)
def create_plot():
plt.rcdefaults()
fig, ax = plt.subplots()
fig.set_size_inches(18, 10)
return fig, ax
def present(a_plot, name):
a_plot.savefig("%s.png" % name, bbox_inches='tight')
# plt.show() # alternatively only show it
# explicitly close to free resources
a_plot.close()
def decode_percentile_key(k):
return float(k.replace("_", "."))
def data_series_name(d, label_key):
data_series = []
for lbl in label_key.split(","):
path = lbl.split(".")
doc = d
for k in path:
doc = doc[k]
data_series.append(doc)
return ",".join(data_series)
def include(series):
return True
def plot_service_time(raw_data, label_key):
service_time_per_op = {}
for d in raw_data:
data_series = data_series_name(d, label_key)
for op_metrics in d["results"]["op_metrics"]:
operation = op_metrics["operation"]
service_time_metrics = op_metrics["service_time"]
if operation not in service_time_per_op:
service_time_per_op[operation] = []
service_time_per_op[operation].append({
"data_series": data_series,
"percentiles": [decode_percentile_key(p) for p in service_time_metrics.keys()],
"percentile_values": list(service_time_metrics.values()),
})
for op, results in service_time_per_op.items():
_, ax = create_plot()
legend_handles = []
legend_labels = []
for candidate in results:
label = candidate["data_series"]
series = ax.plot(candidate["percentiles"], candidate["percentile_values"], marker='.', label=label)
legend_handles.append(series[0])
legend_labels.append(label)
ax.set_ylabel("Service Time [ms]")
ax.set_xlabel("Percentile")
ax.set_title("Service Time of %s" % op)
ax.set_ylim(ymin=0)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(legend_handles, legend_labels, loc='center left', bbox_to_anchor=(1, 0.5))
present(plt, "service_time_%s" % op)
def plot_throughput(raw_data, label_key):
throughput_per_op = {}
unit = ""
for d in raw_data:
data_series = data_series_name(d, label_key)
for op_metrics in d["results"]["op_metrics"]:
operation = op_metrics["operation"]
throughput_metrics = op_metrics["throughput"]
if operation not in throughput_per_op:
throughput_per_op[operation] = []
throughput_per_op[operation].append({
"data_series": data_series,
"max": throughput_metrics["max"],
"median": throughput_metrics["median"],
"min": throughput_metrics["min"],
"unit": throughput_metrics["unit"]
})
for op, results in throughput_per_op.items():
_, ax = create_plot()
x_tick_labels = []
throughput = []
min_throughput = []
max_throughput = []
width = 0.35
unit = ""
for candidate in results:
x_tick_labels.append(candidate["data_series"])
cmin = candidate["min"]
cmedian = candidate["median"]
cmax = candidate["max"]
# all units per op are the same but they can change across operations.
unit = candidate["unit"]
if cmin and cmedian and cmax:
min_throughput.append(cmedian - cmin)
throughput.append(cmedian)
max_throughput.append(cmax - cmedian)
else:
min_throughput.append(0)
throughput.append(0)
max_throughput.append(0)
indices = range(len(throughput))
ax.bar(indices, throughput, width, yerr=[min_throughput, max_throughput])
ax.set_xticks(indices)
ax.set_xticklabels(x_tick_labels)
ax.set_ylabel("Throughput [%s]" % unit)
ax.set_title("Throughput of %s" % op)
ax.set_ylim(ymin=0)
present(plt, "throughput_%s" % op)
def plot_gc_times(raw_data, label_key):
_, ax = create_plot()
x_tick_labels = []
old_gc_times = []
young_gc_times = []
width = 0.35
for d in raw_data:
data_series = data_series_name(d, label_key)
x_tick_labels.append(data_series)
old_gc_times.append(d["results"]["old_gc_time"])
young_gc_times.append(d["results"]["young_gc_time"])
indices = range(len(old_gc_times))
old_bar = ax.bar(indices, old_gc_times, width)
ax.set_xticks([x + width / 2 for x in indices])
ax.set_xticklabels(x_tick_labels)
ax.set_ylabel("Total Duration [ms]")
ax.set_title("GC Times")
indices = [x + width for x in indices]
young_bar = ax.bar(indices, young_gc_times, width)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend([old_bar[0], young_bar[0]], ["Old GC", "Young GC"], loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylim(ymin=0)
present(plt, "gc_times")
def plot(raw_data, label_key):
plot_gc_times(raw_data, label_key)
plot_throughput(raw_data, label_key)
plot_service_time(raw_data, label_key)
def parse_args():
parser = argparse.ArgumentParser(description="Turns test_run.json files into graphs")
parser.add_argument(
"--label",
help="defines which attribute to use for labelling data series (default: test-run-timestamp).",
# choices=["environment", "test-run-timestamp", "user-tags", "test_procedure", "cluster-config-instance"],
default="test-run-timestamp")
parser.add_argument("path",
nargs="+",
help="Full path to one or more test_run.json files")
return parser.parse_args()
def main():
args = parse_args()
series = []
for f in args.path:
a_series = json.load(open(f, "rt"))
if include(a_series):
series.append(a_series)
plot(series, args.label)
if __name__ == '__main__':
main()