blob: 5a4cc0749974eb3d35877541dadf162aff85d441 [file] [log] [blame]
#!/usr/bin/env impala-python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script provides help with parsing and reporting of perf results. It currently
# provides three main capabilities:
# 1) Printing perf results to console in 'pretty' format
# 2) Comparing two perf result sets together and displaying comparison results to console
# 3) Outputting the perf results in JUnit format which is useful for plugging in to
# Jenkins perf reporting.
# By default in Python if you divide an int by another int (5 / 2), the result will also
# be an int (2). The following line changes this behavior so that float will be returned
# if necessary (2.5).
from __future__ import division
import difflib
import json
import logging
import os
import prettytable
import re
from collections import defaultdict
from datetime import date, datetime
from optparse import OptionParser
from tests.util.calculation_util import (
calculate_tval, calculate_avg, calculate_stddev, calculate_geomean, calculate_mwu)
LOG = logging.getLogger(__name__)
# String constants
AVG = 'avg'
AVG_TIME = 'avg_time'
BASELINE_AVG = 'baseline_avg'
BASELINE_MAX = 'baseline_max'
CLIENT_NAME = 'client_name'
COMPRESSION_CODEC = 'compression_codec'
COMPRESSION_TYPE = 'compression_type'
DELTA_AVG = 'delta_avg'
DELTA_MAX = 'delta_max'
DELTA_RSTD = 'delta_rstd'
DETAIL = 'detail'
EST_NUM_ROWS = 'est_num_rows'
EST_PEAK_MEM = 'est_peak_mem'
EXECUTOR_NAME = 'executor_name'
EXEC_SUMMARY = 'exec_summary'
FILE_FORMAT = 'file_format'
GEOMEAN = 'geomean'
ITERATIONS = 'iterations'
MAX_TIME = 'max_time'
NAME = 'name'
NUM_CLIENTS = 'num_clients'
NUM_HOSTS = 'num_hosts'
NUM_INSTANCES = 'num_instances'
NUM_ROWS = 'num_rows'
OPERATOR = 'operator'
PEAK_MEM = 'peak_mem'
PERCENT_OF_QUERY = 'percent_of_query'
PREFIX = 'prefix'
QUERY = 'query'
QUERY_STR = 'query_str'
REF_RSTD = 'ref_rstd'
RESULT_LIST = 'result_list'
RSTD = 'rstd'
RUNTIME_PROFILE = 'runtime_profile'
SCALE_FACTOR = 'scale_factor'
SORTED = 'sorted'
STDDEV = 'stddev'
STDDEV_TIME = 'stddev_time'
TEST_VECTOR = 'test_vector'
TIME_TAKEN = 'time_taken'
WORKLOAD_NAME = 'workload_name'
parser = OptionParser()
parser.add_option("--input_result_file", dest="result_file",
default=os.environ['IMPALA_HOME'] + '/benchmark_results.json',
help="The input JSON file with benchmark results")
parser.add_option("--hive_results", dest="hive_results", action="store_true",
help="Process results generated from queries ran against Hive")
parser.add_option("--reference_result_file", dest="reference_result_file",
default=os.environ['IMPALA_HOME'] + '/reference_benchmark_results.json',
help="The input JSON file with reference benchmark results")
parser.add_option("--junit_output_file", dest="junit_output_file", default='',
help='If set, outputs results in Junit format to the specified file')
parser.add_option("--no_output_table", dest="no_output_table", action="store_true",
default=False, help='Outputs results in table format to the console')
parser.add_option("--report_description", dest="report_description", default=None,
help='Optional description for the report.')
parser.add_option("--cluster_name", dest="cluster_name", default='UNKNOWN',
help="Name of the cluster the results are from (ex. Bolt)")
parser.add_option("--verbose", "-v", dest="verbose", action="store_true",
default=False, help='Outputs to console with with increased verbosity')
parser.add_option("--output_all_summary_nodes", dest="output_all_summary_nodes",
action="store_true", default=False,
help='Print all execution summary nodes')
parser.add_option("--build_version", dest="build_version", default='UNKNOWN',
help="Build/version info about the Impalad instance results are from.")
parser.add_option("--lab_run_info", dest="lab_run_info", default='UNKNOWN',
help="Information about the lab run (name/id) that published "\
"the results.")
parser.add_option("--run_user_name", dest="run_user_name", default='anonymous',
help="User name that this run is associated with in the perf database")
parser.add_option("--tval_threshold", dest="tval_threshold", default=3.0,
type="float", help="The ttest t-value at which a performance change "\
"will be flagged as sigificant.")
parser.add_option("--zval_threshold", dest="zval_threshold", default=3.0, type="float",
help="The Mann-Whitney Z-value at which a performance change will be "
"flagged as sigificant.")
dest="min_percent_change_threshold", default=5.0,
type="float", help="Any performance changes below this threshold" \
" will not be classified as significant. If the user specifies an" \
" empty value, the threshold will be set to 0")
dest="max_percent_change_threshold", default=float("inf"),
type="float", help="Any performance changes above this threshold"\
" will be classified as significant. If the user specifies an" \
" empty value, the threshold will be set to positive infinity")
dest="allowed_latency_diff_secs", default=0.0, type="float",
help="If specified, only a timing change that differs by more than\
this value will be considered significant.")
options, args = parser.parse_args()
def get_dict_from_json(filename):
"""Given a JSON file, return a nested dictionary.
Everything in this file is based on the nested dictionary data structure. The dictionary
is structured as follows: Top level maps to workload. Each workload maps to file_format.
Each file_format maps to queries. Each query contains a key "result_list" that maps to a
list of ImpalaQueryResult (look at dictionaries. The compute stats method
add additional keys such as "avg" or "stddev" here.
Here's how the keys are structred:
To get a workload, the key looks like this:
(('workload_name', 'tpch'), ('scale_factor', '300gb'))
Each workload has a key that looks like this:
(('file_format', 'text'), ('compression_codec', 'zip'),
('compression_type', 'block'))
Each Query has a key like this:
(('name', 'TPCH_Q10'))
This is useful for finding queries in a certain category and computing stats
filename (str): path to the JSON file
dict: a nested dictionary with grouped queries
def add_result(query_result):
"""Add query to the dictionary.
Automatically finds the path in the nested dictionary and adds the result to the
appropriate list.
TODO: This method is hard to reason about, so it needs to be made more streamlined.
def get_key(level_num):
"""Build a key for a particular nesting level.
The key is built by extracting the appropriate values from query_result.
level = list()
# In the outer layer, we group by workload name and scale factor
level.append([('query', 'workload_name'), ('query', 'scale_factor')])
# In the middle layer, we group by file format and compression type
level.append([('query', 'test_vector', 'file_format'),
('query', 'test_vector', 'compression_codec'),
('query', 'test_vector', 'compression_type')])
# In the bottom layer, we group by query name
level.append([('query', 'name')])
key = []
def get_nested_val(path):
"""given a path to a variable in query result, extract the value.
For example, to extract compression_type from the query_result, we need to follow
the this path in the nested dictionary:
"query_result" -> "query" -> "test_vector" -> "compression_type"
cur = query_result
for step in path:
cur = cur[step]
return cur
for path in level[level_num]:
key.append((path[-1], get_nested_val(path)))
return tuple(key)
# grouped is the nested dictionary defined in the outer function get_dict_from_json.
# It stores all the results grouped by query name and other parameters.
cur = grouped
# range(3) because there are 3 levels of nesting, as defined in get_key
for level_num in range(3):
cur = cur[get_key(level_num)]
with open(filename, "r") as f:
data = json.load(f)
grouped = defaultdict( lambda: defaultdict(
lambda: defaultdict(lambda: defaultdict(list))))
for workload_name, workload in data.items():
for query_result in workload:
if query_result['success']:
return grouped
def all_query_results(grouped):
for workload_scale, workload in grouped.items():
for file_format, queries in workload.items():
for query_name, results in queries.items():
def get_commit_date(commit_sha):
import urllib2
url = '' + commit_sha
request = urllib2.Request(url)
response = urllib2.urlopen(request).read()
data = json.loads(response.decode('utf8'))
return data['commit']['committer']['date'][:10]
return ''
def get_impala_version(grouped):
"""Figure out Impala version by looking at query profile."""
first_result = all_query_results(grouped).next()
profile = first_result['result_list'][0]['runtime_profile']
match ='Impala Version:\s(.*)\s\(build\s(.*)\)', profile)
version =
commit_sha =
commit_date = get_commit_date(commit_sha)
return '{0} ({1})'.format(version, commit_date)
def calculate_time_stats(grouped):
Add statistics to the nested dictionary.
Each query name is supplemented with the average, standard deviation, number of clients,
iterations, and a sorted list of the time taken to complete each run.
def remove_first_run(result_list):
"""We want to remove the first result because the performance is much worse on the
first run.
if len(result_list) > 1:
# We want to remove the first result only if there is more that one result
result_list.remove(min(result_list, key=lambda result: result['start_time']))
for workload_scale, workload in grouped.items():
for file_format, queries in workload.items():
for query_name, results in queries.items():
result_list = results[RESULT_LIST]
avg = calculate_avg(
[query_results[TIME_TAKEN] for query_results in result_list])
dev = calculate_stddev(
[query_results[TIME_TAKEN] for query_results in result_list])
num_clients = max(
int(query_results[CLIENT_NAME]) for query_results in result_list)
iterations = int((len(result_list) + 1) / num_clients)
results[AVG] = avg
results[STDDEV] = dev
results[NUM_CLIENTS] = num_clients
results[ITERATIONS] = iterations
results[SORTED] = [query_results[TIME_TAKEN] for query_results in result_list]
class Report(object):
significant_perf_change = False
class FileFormatComparisonRow(object):
"""Represents a row in the overview table, where queries are grouped together and
average and geomean are calculated per workload and file format (first table in the
def __init__(self, workload_scale, file_format, queries, ref_queries):
time_list = []
ref_time_list = []
for query_name, results in queries.items():
if query_name in ref_queries:
# We want to calculate the average and geomean of the query only if it is both
# results and reference results
for query_results in results[RESULT_LIST]:
ref_results = ref_queries[query_name]
for ref_query_results in ref_results[RESULT_LIST]:
self.workload_name = '{0}({1})'.format(
workload_scale[0][1].upper(), workload_scale[1][1])
self.file_format = '{0} / {1} / {2}'.format(
file_format[0][1], file_format[1][1], file_format[2][1])
self.avg = calculate_avg(time_list)
ref_avg = calculate_avg(ref_time_list)
self.delta_avg = calculate_change(self.avg, ref_avg)
self.geomean = calculate_geomean(time_list)
ref_geomean = calculate_geomean(ref_time_list)
self.delta_geomean = calculate_change(self.geomean, ref_geomean)
class QueryComparisonRow(object):
"""Represents a row in the table where individual queries are shown (second table in
the report).
def __init__(self, results, ref_results):
self.workload_name = '{0}({1})'.format(
self.query_name = results[RESULT_LIST][0][QUERY][NAME]
self.file_format = '{0} / {1} / {2}'.format(
self.avg = results[AVG]
self.rsd = results[STDDEV] / self.avg if self.avg > 0 else 0.0
self.significant_variability = True if self.rsd > 0.1 else False
self.num_clients = results[NUM_CLIENTS]
self.iters = results[ITERATIONS]
if ref_results is None:
self.perf_change = False
self.zval = 0
self.tval = 0
# If reference results are not present, comparison columns will have inf in them
self.base_avg = float('-inf')
self.base_rsd = float('-inf')
self.delta_avg = float('-inf')
self.perf_change_str = ''
median = results[SORTED][int(len(results[SORTED]) / 2)]
all_diffs = [x - y for x in results[SORTED] for y in ref_results[SORTED]]
self.median_diff = all_diffs[int(len(all_diffs) / 2)] / median
self.perf_change, self.zval, self.tval = (
self.__check_perf_change_significance(results, ref_results))
self.base_avg = ref_results[AVG]
self.base_rsd = ref_results[STDDEV] / self.base_avg if self.base_avg > 0 else 0.0
self.delta_avg = calculate_change(self.avg, self.base_avg)
if self.perf_change:
self.perf_change_str = self.__build_perf_change_str(
results, ref_results, self.zval, self.tval)
Report.significant_perf_change = True
self.perf_change_str = ''
if not options.hive_results:
save_runtime_diffs(results, ref_results, self.perf_change, self.zval, self.tval)
except Exception as e:
LOG.error('Could not generate an html diff: {0}'.format(e))
def __check_perf_change_significance(self, stat, ref_stat):
zval = calculate_mwu(stat[SORTED], ref_stat[SORTED])
tval = calculate_tval(stat[AVG], stat[STDDEV], stat[ITERATIONS],
ref_stat[AVG], ref_stat[STDDEV], ref_stat[ITERATIONS])
percent_difference = abs(ref_stat[AVG] - stat[AVG]) * 100 / ref_stat[AVG]
except ZeroDivisionError:
percent_difference = 0.0
absolute_difference = abs(ref_stat[AVG] - stat[AVG])
if absolute_difference < options.allowed_latency_diff_secs:
return False, zval, tval
if percent_difference < options.min_percent_change_threshold:
return False, zval, tval
return (abs(zval) > options.zval_threshold or abs(tval) > options.tval_threshold,
zval, tval)
def __build_perf_change_str(self, result, ref_result, zval, tval):
"""Build a performance change string.
For example:
Regression: TPCDS-Q52 [parquet/none/none] (1.390s -> 1.982s [+42.59%])
perf_change_type = ("(R) Regression" if zval >= 0 and tval >= 0
else "(I) Improvement" if zval <= 0 and tval <= 0
else "(?) Anomoly")
query = result[RESULT_LIST][0][QUERY]
workload_name = '{0}({1})'.format(
query_name = query[NAME]
file_format = query[TEST_VECTOR][FILE_FORMAT]
compression_codec = query[TEST_VECTOR][COMPRESSION_CODEC]
compression_type = query[TEST_VECTOR][COMPRESSION_TYPE]
template = ("{perf_change_type}: "
"{workload_name} {query_name} "
"[{file_format} / {compression_codec} / {compression_type}] "
"({ref_avg:.2f}s -> {avg:.2f}s [{delta:+.2%}])\n")
perf_change_str = template.format(
perf_change_type = perf_change_type,
workload_name = workload_name,
query_name = query_name,
file_format = file_format,
compression_codec = compression_codec,
compression_type = compression_type,
ref_avg = ref_result[AVG],
avg = result[AVG],
delta = calculate_change(result[AVG], ref_result[AVG]))
perf_change_str += build_exec_summary_str(result, ref_result)
return perf_change_str + '\n'
class QueryVariabilityRow(object):
"""Represents a row in the query variability table.
def __init__(self, results, ref_results):
if ref_results is None:
self.base_rel_stddev = float('inf')
self.base_rel_stddev = ref_results[STDDEV] / ref_results[AVG]\
if ref_results > 0 else 0.0
self.workload_name = '{0}({1})'.format(
self.query_name = results[RESULT_LIST][0][QUERY][NAME]
self.file_format = results[RESULT_LIST][0][QUERY][TEST_VECTOR][FILE_FORMAT]
self.compression = results[RESULT_LIST][0][QUERY][TEST_VECTOR][COMPRESSION_CODEC]\
self.rel_stddev = results[STDDEV] / results[AVG] if results[AVG] > 0 else 0.0
self.significant_variability = self.rel_stddev > 0.1
variability_template = ("(V) Significant Variability: "
"{workload_name} {query_name} [{file_format} / {compression}] "
"({base_rel_stddev:.2%} -> {rel_stddev:.2%})\n")
if self.significant_variability and ref_results:
#If ref_results do not exist, variability analysis will not be conducted
self.variability_str = variability_template.format(
workload_name = self.workload_name,
query_name = self.query_name,
file_format = self.file_format,
compression = self.compression,
base_rel_stddev = self.base_rel_stddev,
rel_stddev = self.rel_stddev)
self.exec_summary_str = build_exec_summary_str(
results, ref_results, for_variability = True)
self.variability_str = str()
self.exec_summary_str = str()
def __str__(self):
return self.variability_str + self.exec_summary_str
def __init__(self, grouped, ref_grouped):
self.grouped = grouped
self.ref_grouped = ref_grouped
self.query_comparison_rows = []
self.file_format_comparison_rows = []
self.query_variability_rows = []
def __analyze(self):
"""Generates a comparison data that can be printed later"""
for workload_scale, workload in self.grouped.items():
for file_format, queries in workload.items():
if self.ref_grouped is not None and workload_scale in self.ref_grouped and\
file_format in self.ref_grouped[ workload_scale]:
ref_queries = self.ref_grouped[workload_scale][file_format]
workload_scale, file_format, queries, ref_queries))
#If not present in reference results, set to None
ref_queries = None
for query_name, results in queries.items():
if self.ref_grouped is not None and workload_scale in self.ref_grouped and\
file_format in self.ref_grouped[workload_scale] and query_name in\
ref_results = self.ref_grouped[workload_scale][file_format][query_name]
query_comparison_row = Report.QueryComparisonRow(results, ref_results)
query_variability_row = Report.QueryVariabilityRow(results, ref_results)
#If not present in reference results, set to None
ref_results = None
def __str__(self):
output = str()
#per file format analysis overview table
table = prettytable.PrettyTable(['Workload', 'File Format', 'Avg (s)', 'Delta(Avg)',
'GeoMean(s)', 'Delta(GeoMean)'])
table.float_format = '.2'
table.align = 'l'
key = lambda row: row.delta_geomean, reverse = True)
for row in self.file_format_comparison_rows:
table_row = [
output += str(table) + '\n\n'
#main comparison table
detailed_performance_change_analysis_str = str()
table = prettytable.PrettyTable(['Workload', 'Query', 'File Format', 'Avg(s)',
'Base Avg(s)', 'Delta(Avg)', 'StdDev(%)',
'Base StdDev(%)', 'Iters', 'Median Diff(%)',
'MW Zval', 'Tval'])
table.float_format = '.2'
table.align = 'l'
#Sort table from worst to best regression
self.query_comparison_rows.sort(key=lambda row: row.delta_avg + row.median_diff,
for row in self.query_comparison_rows:
delta_avg_template = ' {0:+.2%}' if not row.perf_change else (
'R {0:+.2%}' if row.zval >= 0 and row.tval >= 0 else 'I {0:+.2%}' if row.zval <= 0
and row.tval <= 0 else '? {0:+.2%}')
table_row = [
row.base_avg if row.base_avg != float('-inf') else 'N/A',
' N/A' if row.delta_avg == float('-inf') else delta_avg_template.format(
('* {0:.2%} *' if row.rsd > 0.1 else ' {0:.2%} ').format(row.rsd),
' N/A' if row.base_rsd == float('-inf') else (
'* {0:.2%} *' if row.base_rsd > 0.1 else ' {0:.2%} ').format(row.base_rsd),
' N/A' if row.median_diff == float('-inf') else delta_avg_template.format(
detailed_performance_change_analysis_str += row.perf_change_str
output += str(table) + '\n\n'
output += detailed_performance_change_analysis_str
variability_analysis_str = str()
self.query_variability_rows.sort(key = lambda row: row.rel_stddev, reverse = True)
for row in self.query_variability_rows:
variability_analysis_str += str(row)
output += variability_analysis_str
if Report.significant_perf_change:
output += 'Significant perf change detected'
return output
class CombinedExecSummaries(object):
"""All execution summaries for each query are combined into this object.
The overall average time is calculated for each node by averaging the average time
from each execution summary. The max time time is calculated by getting the max time
of max times.
This object can be compared to another one and ExecSummaryComparison can be generated.
exec_summaries (list of list of dict): A list of exec summaries (list of dict is how
it is received from the beeswax client.
rows (list of dict): each dict represents a row in the summary table. Each row in rows
is a dictionary. Each dictionary has the following keys:
prefix (str)
operator (str)
num_hosts (int)
num_instances (int)
num_rows (int)
est_num_rows (int)
detail (str)
avg_time (float): averge of average times in all the execution summaries
stddev_time: standard deviation of times in all the execution summaries
max_time: maximum of max times in all the execution summaries
peak_mem (int)
est_peak_mem (int)
def __init__(self, exec_summaries):
# We want to make sure that all execution summaries have the same structure before
# we can combine them. If not, err_str will contain the reason why we can't combine
# the exec summaries.
ok, err_str = self.__check_exec_summary_schema(exec_summaries)
self.error_str = err_str
self.rows = []
if ok:
def __build_rows(self, exec_summaries):
first_exec_summary = exec_summaries[0]
for row_num, row in enumerate(first_exec_summary):
combined_row = {}
# Copy fixed values from the first exec summary
combined_row[key] = row[key]
avg_times = [exec_summary[row_num][AVG_TIME] for exec_summary in exec_summaries]
max_times = [exec_summary[row_num][MAX_TIME] for exec_summary in exec_summaries]
peak_mems = [exec_summary[row_num][PEAK_MEM] for exec_summary in exec_summaries]
est_peak_mems = [exec_summary[row_num][EST_PEAK_MEM]
for exec_summary in exec_summaries]
# Set the calculated values
combined_row[AVG_TIME] = calculate_avg(avg_times)
combined_row[STDDEV_TIME] = calculate_stddev(avg_times)
combined_row[MAX_TIME] = max(max_times)
combined_row[PEAK_MEM] = max(peak_mems)
combined_row[EST_PEAK_MEM] = max(est_peak_mems)
def is_same_schema(self, reference):
"""Check if the reference CombinedExecSummaries summary has the same schema as this
one. (For example, the operator names are the same for each node).
The purpose of this is to check if it makes sense to combine this object with a
reference one to produce ExecSummaryComparison.
reference (CombinedExecSummaries): comparison
bool: True if the schama's are similar enough to be compared, False otherwise.
if len(self.rows) != len(reference.rows): return False
for row_num, row in enumerate(self.rows):
ref_row = reference.rows[row_num]
if row[OPERATOR] != ref_row[OPERATOR]:
return False
return True
def __str__(self):
if self.error_str: return self.error_str
table = prettytable.PrettyTable(
"Avg Time",
"Std Dev",
"Max Time",
"Est #Rows"])
table.align = 'l'
table.float_format = '.2'
for row in self.rows:
table_row = [ row[PREFIX] + row[OPERATOR],
return str(table)
def total_runtime(self):
return sum([row[AVG_TIME] for row in self.rows])
def __check_exec_summary_schema(self, exec_summaries):
"""Check if all given exec summaries have the same structure.
This method is called to check if it is possible a single CombinedExecSummaries from
the list of exec_summaries. (For example all exec summaries must have the same
number of nodes.)
This method is somewhat similar to is_same_schema. The difference is that
is_same_schema() checks if two CombinedExecSummaries have the same structure and this
method checks if all exec summaries in the list have the same structure.
exec_summaries (list of dict): each dict represents an exec_summary
(bool, str): True if all exec summaries have the same structure, otherwise False
followed by a string containing the explanation.
err = 'Summaries cannot be combined: '
if len(exec_summaries) < 1:
return False, err + 'no exec summaries Found'
first_exec_summary = exec_summaries[0]
# This check is for Metadata queries which don't have summaries
if len(first_exec_summary) < 1:
return False, err + 'exec summary contains no nodes'
for exec_summary in exec_summaries:
if len(exec_summary) != len(first_exec_summary):
return False, err + 'different number of nodes in exec summaries'
for row_num, row in enumerate(exec_summary):
comp_row = first_exec_summary[row_num]
if row[OPERATOR] != comp_row[OPERATOR]:
return False, err + 'different operator'
return True, str()
class ExecSummaryComparison(object):
"""Represents a comparison between two CombinedExecSummaries.
combined_summary (CombinedExecSummaries): current summary.
ref_combined_summary (CombinedExecSummaries): reference summaries.
rows (list of dict): Each dict represents a single row. Each dict has the following
prefix (str)
operator (str)
num_hosts (int)
num_instances (int)
avg_time (float)
stddev_time (float)
avg_time_change (float): % change in avg time compared to reference
avg_time_change_total (float): % change in avg time compared to total of the query
max_time (float)
max_time_change (float): % change in max time compared to reference
peak_mem (int)
peak_mem_change (float): % change compared to reference
num_rows (int)
est_num_rows (int)
est_peak_mem (int)
detail (str)
combined_summary (CombinedExecSummaries): original combined summary
ref_combined_summary (CombinedExecSummaries): original reference combined summary.
If the comparison cannot be constructed, these summaries can be printed.
Another possible way to implement this is to generate this object when we call
def __init__(self, combined_summary, ref_combined_summary, for_variability = False):
# Store the original summaries, in case we can't build a comparison
self.combined_summary = combined_summary
self.ref_combined_summary = ref_combined_summary
# If some error happened during calculations, store it here
self.error_str = str()
self.for_variability = for_variability
self.rows = []
def __build_rows(self):
if self.combined_summary.is_same_schema(self.ref_combined_summary):
for i, row in enumerate(self.combined_summary.rows):
ref_row = self.ref_combined_summary.rows[i]
comparison_row = {}
comparison_row[key] = row[key]
comparison_row[PERCENT_OF_QUERY] = row[AVG_TIME] /\
if self.combined_summary.total_runtime > 0 else 0.0
comparison_row[RSTD] = row[STDDEV_TIME] / row[AVG_TIME]\
if row[AVG_TIME] > 0 else 0.0
comparison_row[BASELINE_AVG] = ref_row[AVG_TIME]
comparison_row[DELTA_AVG] = calculate_change(
row[AVG_TIME], ref_row[AVG_TIME])
comparison_row[BASELINE_MAX] = ref_row[MAX_TIME]
comparison_row[DELTA_MAX] = calculate_change(
row[MAX_TIME], ref_row[MAX_TIME])
self.error_str = 'Execution summary structures are different'
def __str__(self):
"""Construct a PrettyTable containing the comparison"""
if self.for_variability:
return str(self.__build_table_variability())
return str(self.__build_table())
def __build_rows_variability(self):
if self.ref_combined_summary and self.combined_summary.is_same_schema(
for i, row in enumerate(self.combined_summary.rows):
ref_row = self.ref_combined_summary.rows[i]
comparison_row = {}
comparison_row[OPERATOR] = row[OPERATOR]
comparison_row[PERCENT_OF_QUERY] = row[AVG_TIME] /\
if self.combined_summary.total_runtime > 0 else 0.0
comparison_row[RSTD] = row[STDDEV_TIME] / row[AVG_TIME]\
if row[AVG_TIME] > 0 else 0.0
comparison_row[REF_RSTD] = ref_row[STDDEV_TIME] / ref_row[AVG_TIME]\
if ref_row[AVG_TIME] > 0 else 0.0
comparison_row[DELTA_RSTD] = calculate_change(
comparison_row[RSTD], comparison_row[REF_RSTD])
comparison_row[NUM_HOSTS] = row[NUM_HOSTS]
comparison_row[NUM_INSTANCES] = row[NUM_INSTANCES]
comparison_row[NUM_ROWS] = row[NUM_ROWS]
comparison_row[EST_NUM_ROWS] = row[EST_NUM_ROWS]
self.error_str = 'Execution summary structures are different'
def __build_table_variability(self):
def is_significant(row):
"""Check if the performance change in the row was significant"""
return options.output_all_summary_nodes or (
row[RSTD] > 0.1 and row[PERCENT_OF_QUERY] > 0.02)
if self.error_str:
# If the summary comparison could not be constructed, output both summaries
output = self.error_str + '\n'
output += 'Execution Summary: \n'
output += str(self.combined_summary) + '\n'
output += 'Reference Execution Summary: \n'
output += str(self.ref_combined_summary)
return output
table = prettytable.PrettyTable(
'% of Query',
'Base StdDev(%)',
'Est #Rows'])
table.align = 'l'
table.float_format = '.2'
table_contains_at_least_one_row = False
for row in filter(lambda row: is_significant(row), self.rows):
table_row = [row[OPERATOR],
prettyprint_values(row[EST_NUM_ROWS]) ]
table_contains_at_least_one_row = True
if table_contains_at_least_one_row:
return str(table) + '\n'
return 'No Nodes with significant StdDev %\n'
def __build_table(self):
def is_significant(row):
"""Check if the performance change in the row was significant"""
return options.output_all_summary_nodes or (
row[MAX_TIME] > 100000000 and
row[PERCENT_OF_QUERY] > 0.02)
if self.error_str:
# If the summary comparison could not be constructed, output both summaries
output = self.error_str + '\n'
output += 'Execution Summary: \n'
output += str(self.combined_summary) + '\n'
output += 'Reference Execution Summary: \n'
output += str(self.ref_combined_summary)
return output
table = prettytable.PrettyTable(
'% of Query',
'Base Avg',
'Base Max',
'Est #Rows'])
table.align = 'l'
table.float_format = '.2'
for row in self.rows:
if is_significant(row):
table_row = [row[OPERATOR],
('* {0:.2%} *' if row[RSTD] > 0.1 else ' {0:.2%} ').format(row[RSTD]),
return str(table)
def calculate_change(val, ref_val):
"""Calculate how big the change in val compared to ref_val is compared to total"""
return (val - ref_val) / ref_val if ref_val != 0 else 0.0
def prettyprint(val, units, divisor):
""" Print a value in human readable format along with it's unit.
We start at the leftmost unit in the list and keep dividing the value by divisor until
the value is less than divisor. The value is then printed along with the unit type.
val (int or float): Value to be printed.
units (list of str): Unit names for different sizes.
divisor (float): ratio between two consecutive units.
for unit in units:
if abs(val) < divisor:
if unit == units[0]:
return "%d%s" % (val, unit)
return "%3.2f%s" % (val, unit)
val /= divisor
def prettyprint_bytes(byte_val):
return prettyprint(byte_val, ['B', 'KB', 'MB', 'GB', 'TB'], 1024.0)
def prettyprint_values(unit_val):
return prettyprint(unit_val, ["", "K", "M", "B"], 1000.0)
def prettyprint_time(time_val):
return prettyprint(time_val, ["ns", "us", "ms", "s"], 1000.0)
def prettyprint_percent(percent_val):
return '{0:+.2%}'.format(percent_val)
def save_runtime_diffs(results, ref_results, change_significant, zval, tval):
"""Given results and reference results, generate and output an HTML file
containing the Runtime Profile diff.
diff = difflib.HtmlDiff(wrapcolumn=90, linejunk=difflib.IS_LINE_JUNK)
# We are comparing last queries in each run because they should have the most
# stable performance (unlike the first queries)
runtime_profile = results[RESULT_LIST][-1][RUNTIME_PROFILE]
ref_runtime_profile = ref_results[RESULT_LIST][-1][RUNTIME_PROFILE]
template = ('{prefix}-{query_name}-{scale_factor}-{file_format}-{compression_codec}'
query = results[RESULT_LIST][-1][QUERY]
# Neutral - no improvement or regression
prefix = 'neu'
if change_significant:
prefix = '???'
if zval >= 0 and tval >= 0:
prefix = 'reg'
elif zval <= 0 and tval <= 0:
prefix = 'imp'
runtime_profile_file_name = template.format(
prefix = prefix,
query_name = query[NAME],
scale_factor = query[SCALE_FACTOR],
file_format = query[TEST_VECTOR][FILE_FORMAT],
compression_codec = query[TEST_VECTOR][COMPRESSION_CODEC],
compression_type = query[TEST_VECTOR][COMPRESSION_TYPE])
# Go into results dir
dir_path = os.path.join(os.environ["IMPALA_HOME"], 'results')
if not os.path.exists(dir_path):
elif not os.path.isdir(dir_path):
raise RuntimeError("Unable to create $IMPALA_HOME/results, results file exists")
runtime_profile_file_path = os.path.join(dir_path, runtime_profile_file_name)
runtime_profile_diff = diff.make_file(
fromdesc = "Baseline Runtime Profile",
todesc = "Current Runtime Profile")
with open(runtime_profile_file_path, 'w+') as f:
def build_exec_summary_str(results, ref_results, for_variability=False):
# There is no summary available for Hive after query execution
# Metadata queries don't have execution summary
exec_summaries = [result[EXEC_SUMMARY] for result in results[RESULT_LIST]]
if options.hive_results or exec_summaries[0] is None:
return ""
combined_summary = CombinedExecSummaries(exec_summaries)
if ref_results is None:
ref_exec_summaries = None
ref_combined_summary = None
ref_exec_summaries = [result[EXEC_SUMMARY]
for result in ref_results[RESULT_LIST]]
ref_combined_summary = CombinedExecSummaries(ref_exec_summaries)
comparison = ExecSummaryComparison(
combined_summary, ref_combined_summary, for_variability)
return str(comparison) + '\n'
def build_summary_header(current_impala_version, ref_impala_version):
summary = "Report Generated on {0}\n".format(
if options.report_description:
summary += 'Run Description: {0}\n'.format(options.report_description)
if options.cluster_name:
summary += '\nCluster Name: {0}\n'.format(options.cluster_name)
if options.lab_run_info:
summary += 'Lab Run Info: {0}\n'.format(options.lab_run_info)
if not options.hive_results:
summary += 'Impala Version: {0}\n'.format(current_impala_version)
summary += 'Baseline Impala Version: {0}\n'.format(ref_impala_version)
return summary
if __name__ == "__main__":
1. Build a nested dictionary for the current result JSON and reference result JSON.
2. Calculate runtime statistics for each query for both results and reference results.
5. Save performance statistics to the performance database.
3. Construct a string with a an overview of workload runtime and detailed performance
comparison for queries with significant performance change.
logging.basicConfig(level=logging.DEBUG if options.verbose else logging.INFO)
# Generate a dictionary based on the JSON file
grouped = get_dict_from_json(options.result_file)
# Generate a dictionary based on the reference JSON file
ref_grouped = get_dict_from_json(options.reference_result_file)
except Exception as e:
# If reference result file could not be read we can still continue. The result can
# be saved to the performance database.
LOG.error('Could not read reference result file: {0}'.format(e))
ref_grouped = None
report = Report(grouped, ref_grouped)
ref_impala_version = 'N/A'
if options.hive_results:
current_impala_version = 'N/A'
current_impala_version = get_impala_version(grouped)
if ref_grouped:
ref_impala_version = get_impala_version(ref_grouped)
print build_summary_header(current_impala_version, ref_impala_version)
print report