| # SPDX-License-Identifier: Apache-2.0 |
| # |
| # The OpenSearch Contributors require contributions made to |
| # this file be licensed under the Apache-2.0 license or a |
| # compatible open source license. |
| # Modifications Copyright OpenSearch Contributors. See |
| # GitHub history for details. |
| # Licensed to Elasticsearch B.V. under one or more contributor |
| # license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright |
| # ownership. Elasticsearch B.V. licenses this file to you under |
| # the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Simple helper script to create graphs based on multiple |
| # test_run.json files (it's a summary of the results of |
| # a single test_run which is |
| # stored in ~/.solr-orbit/results/<test_run_id>/). |
| # There is no specific integration into solr-orbit and it is also not |
| # installed with solr-orbit. |
| # |
| # It requires matplotlib (install with pip3 install matplotlib). |
| # |
| # |
| # Usage: |
| # python3 analyze.py [--label=LABEL] /path1/to/test_run.json /path2/to/test_run.json |
| # |
| # Output: A bunch of .png files in the current directory. |
| # Each graph shows one data series per test_run. |
| # The label key is chosen based on the |
| # command line parameter `--label` |
| # |
| |
| |
| import argparse |
| import json |
| import sys |
| |
| try: |
| import matplotlib.pyplot as plt |
| except ImportError: |
| print("This script requires matplotlib. Please install with 'pip3 install matplotlib' and retry.", file=sys.stderr) |
| sys.exit(1) |
| |
| |
| def create_plot(): |
| plt.rcdefaults() |
| fig, ax = plt.subplots() |
| fig.set_size_inches(18, 10) |
| return fig, ax |
| |
| |
| def present(a_plot, name): |
| a_plot.savefig("%s.png" % name, bbox_inches='tight') |
| # plt.show() # alternatively only show it |
| # explicitly close to free resources |
| a_plot.close() |
| |
| |
| def decode_percentile_key(k): |
| return float(k.replace("_", ".")) |
| |
| |
| def data_series_name(d, label_key): |
| data_series = [] |
| for lbl in label_key.split(","): |
| path = lbl.split(".") |
| doc = d |
| for k in path: |
| doc = doc[k] |
| data_series.append(doc) |
| return ",".join(data_series) |
| |
| |
| def include(series): |
| return True |
| |
| |
| def plot_service_time(raw_data, label_key): |
| service_time_per_op = {} |
| |
| for d in raw_data: |
| data_series = data_series_name(d, label_key) |
| for op_metrics in d["results"]["op_metrics"]: |
| operation = op_metrics["operation"] |
| service_time_metrics = op_metrics["service_time"] |
| if operation not in service_time_per_op: |
| service_time_per_op[operation] = [] |
| service_time_per_op[operation].append({ |
| "data_series": data_series, |
| "percentiles": [decode_percentile_key(p) for p in service_time_metrics.keys()], |
| "percentile_values": list(service_time_metrics.values()), |
| }) |
| |
| for op, results in service_time_per_op.items(): |
| _, ax = create_plot() |
| legend_handles = [] |
| legend_labels = [] |
| |
| for candidate in results: |
| label = candidate["data_series"] |
| series = ax.plot(candidate["percentiles"], candidate["percentile_values"], marker='.', label=label) |
| legend_handles.append(series[0]) |
| legend_labels.append(label) |
| |
| ax.set_ylabel("Service Time [ms]") |
| ax.set_xlabel("Percentile") |
| ax.set_title("Service Time of %s" % op) |
| ax.set_ylim(ymin=0) |
| |
| box = ax.get_position() |
| ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) |
| ax.legend(legend_handles, legend_labels, loc='center left', bbox_to_anchor=(1, 0.5)) |
| |
| present(plt, "service_time_%s" % op) |
| |
| |
| def plot_throughput(raw_data, label_key): |
| throughput_per_op = {} |
| unit = "" |
| |
| for d in raw_data: |
| data_series = data_series_name(d, label_key) |
| for op_metrics in d["results"]["op_metrics"]: |
| operation = op_metrics["operation"] |
| throughput_metrics = op_metrics["throughput"] |
| if operation not in throughput_per_op: |
| throughput_per_op[operation] = [] |
| throughput_per_op[operation].append({ |
| "data_series": data_series, |
| "max": throughput_metrics["max"], |
| "median": throughput_metrics["median"], |
| "min": throughput_metrics["min"], |
| "unit": throughput_metrics["unit"] |
| }) |
| |
| for op, results in throughput_per_op.items(): |
| _, ax = create_plot() |
| x_tick_labels = [] |
| throughput = [] |
| min_throughput = [] |
| max_throughput = [] |
| width = 0.35 |
| unit = "" |
| |
| for candidate in results: |
| x_tick_labels.append(candidate["data_series"]) |
| cmin = candidate["min"] |
| cmedian = candidate["median"] |
| cmax = candidate["max"] |
| # all units per op are the same but they can change across operations. |
| unit = candidate["unit"] |
| if cmin and cmedian and cmax: |
| min_throughput.append(cmedian - cmin) |
| throughput.append(cmedian) |
| max_throughput.append(cmax - cmedian) |
| else: |
| min_throughput.append(0) |
| throughput.append(0) |
| max_throughput.append(0) |
| |
| indices = range(len(throughput)) |
| |
| ax.bar(indices, throughput, width, yerr=[min_throughput, max_throughput]) |
| ax.set_xticks(indices) |
| ax.set_xticklabels(x_tick_labels) |
| ax.set_ylabel("Throughput [%s]" % unit) |
| ax.set_title("Throughput of %s" % op) |
| ax.set_ylim(ymin=0) |
| |
| present(plt, "throughput_%s" % op) |
| |
| |
| def plot_gc_times(raw_data, label_key): |
| _, ax = create_plot() |
| |
| x_tick_labels = [] |
| old_gc_times = [] |
| young_gc_times = [] |
| width = 0.35 |
| |
| for d in raw_data: |
| data_series = data_series_name(d, label_key) |
| |
| x_tick_labels.append(data_series) |
| |
| old_gc_times.append(d["results"]["old_gc_time"]) |
| young_gc_times.append(d["results"]["young_gc_time"]) |
| |
| indices = range(len(old_gc_times)) |
| |
| old_bar = ax.bar(indices, old_gc_times, width) |
| ax.set_xticks([x + width / 2 for x in indices]) |
| ax.set_xticklabels(x_tick_labels) |
| ax.set_ylabel("Total Duration [ms]") |
| ax.set_title("GC Times") |
| |
| indices = [x + width for x in indices] |
| young_bar = ax.bar(indices, young_gc_times, width) |
| |
| box = ax.get_position() |
| ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) |
| |
| ax.legend([old_bar[0], young_bar[0]], ["Old GC", "Young GC"], loc='center left', bbox_to_anchor=(1, 0.5)) |
| ax.set_ylim(ymin=0) |
| |
| present(plt, "gc_times") |
| |
| |
| def plot(raw_data, label_key): |
| plot_gc_times(raw_data, label_key) |
| plot_throughput(raw_data, label_key) |
| plot_service_time(raw_data, label_key) |
| |
| |
| def parse_args(): |
| parser = argparse.ArgumentParser(description="Turns test_run.json files into graphs") |
| |
| parser.add_argument( |
| "--label", |
| help="defines which attribute to use for labelling data series (default: test-run-timestamp).", |
| # choices=["environment", "test-run-timestamp", "user-tags", "test_procedure", "cluster-config-instance"], |
| default="test-run-timestamp") |
| |
| parser.add_argument("path", |
| nargs="+", |
| help="Full path to one or more test_run.json files") |
| |
| return parser.parse_args() |
| |
| |
| def main(): |
| args = parse_args() |
| series = [] |
| |
| for f in args.path: |
| a_series = json.load(open(f, "rt")) |
| if include(a_series): |
| series.append(a_series) |
| plot(series, args.label) |
| |
| |
| if __name__ == '__main__': |
| main() |