blob: 065fafaa64d57e94a36223b04759692725685bc5 [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
'''
This script takes the result of the network_profiling script, and parses the results into hierarchical trees.
This takes the result of the qperf library, containing information between each pair of nodes about the network
bandwidth and latency, and takes them to measure the distance ( = latency / bandwidth) and writes the results to
labeldict.json. The labeldict contains information as a [nodename/groupname, distance] pair, where nodename always
has distance of 0, and the following pairs describe the tree dependency between the groups, each labeled with its index.
'''
import json
from scipy.cluster.hierarchy import dendrogram, linkage
with open('result.json') as json_file:
dic = json.load(json_file)
# Sort according to node ids
# each item looks like: {"willy-3/willy-4": {"bw": "922517504 bytes/sec", "latency": "36828 ns"}}
sorted_dic = {k: v for k, v in
sorted([item for item in dic.items() if 'latency' in item[1] and 'bw' in item[1]],
key=lambda a: (item[0].split('/')[0], item[0].split('/')[1]))}
slaves = sorted(dic['slaves'].split('/'))
# Upper triangle of the connection graph
dist = [int(v['latency'].split()[0])/int(v['bw'].split()[0]) for v in sorted_dic.values()]
# Use library to generate the linkage matrix
Z = linkage(dist)
# Visualize result
# from matplotlib import pyplot as plt
# fig = plt.figure(figsize=(25, 10))
# dn = dendrogram(Z, labels=slaves)
# plt.show()
labeldict = {idx: (v, "0") for idx, v in enumerate(slaves)}
for item in Z:
labeldict[len(labeldict)] = ("{}+{}".format(int(item[0]), int(item[1])), "{:.8f}".format(item[2]))
with open('labeldict.json', 'w') as fp:
json.dump(labeldict, fp)
# def closest_clusters_between(source, destination, labeldict):
# for v in labeldict.values():
# lst = v.split('+')
# if source in lst and destination in lst:
# return lst
#
# print("Closest clusters between {} and {} are {}".format('willy-10', 'willy-13', closest_clusters_between('willy-10', 'willy-13', labeldict)))