IMPALA-9215: report_benchmark_results.py fails with missing key report_benchmark_results.py failed with missing key because it tried to lookup 'num_instances' from the perf result json file. The JSON file contained exec summary generated by impala_beeswax.py::__build_summary_table() which omitted number of instances. This patch adds 'num_instances' to the summary table created by impala_beeswax.py. To keep report_benchmark_results.py simple it assumes that both perf json files contain 'num_instances', i.e. if a user issues single_node_perf_run.py to compare two commits, both of them must contain this fix. I tested the PS manually. Change-Id: I822c86f621f5a348b56d672c263a2cf9321767ee Reviewed-on: http://gerrit.cloudera.org:8080/14830 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>

commit: 92aa2c16f172cf15cf6b026ab7af9c72576772f5 [log] [tgz]
author: Zoltan Borok-Nagy <boroknagyz@cloudera.com> Wed Dec 04 18:49:48 2019 +0100
committer: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Thu Dec 05 17:07:55 2019 +0000
tree: 5e4b39bf1719ad7d13ad1504586cbc446c58515c
parent: 30c7a6a18c85574ff76dc750dfef94475f1c9796 [diff]
diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py
index b788a38..ab10023 100644
--- a/tests/beeswax/impala_beeswax.py
+++ b/tests/beeswax/impala_beeswax.py

@@ -284,7 +284,8 @@
       else:
         avg_time = 0
 
-      row["num_hosts"] = len(node.exec_stats)
+      row["num_instances"] = len(node.exec_stats)
+      row["num_hosts"] = node.num_hosts
       row["avg_time"] = avg_time
 
     is_sink = node.node_id == -1

diff --git a/tests/benchmark/report_benchmark_results.py b/tests/benchmark/report_benchmark_results.py
index 0274e64..5a4cc07 100755
--- a/tests/benchmark/report_benchmark_results.py
+++ b/tests/benchmark/report_benchmark_results.py

@@ -338,7 +338,6 @@
     the report).
     """
     def __init__(self, results, ref_results):
-
       self.workload_name = '{0}({1})'.format(
           results[RESULT_LIST][0][QUERY][WORKLOAD_NAME].upper(),
           results[RESULT_LIST][0][QUERY][SCALE_FACTOR])
@@ -407,7 +406,6 @@
       For example:
       Regression: TPCDS-Q52 [parquet/none/none] (1.390s -> 1.982s [+42.59%])
       """
-
       perf_change_type = ("(R) Regression" if zval >= 0 and tval >= 0
                           else "(I) Improvement" if zval <= 0 and tval <= 0
                           else "(?) Anomoly")
@@ -613,6 +611,7 @@
       prefix (str)
       operator (str)
       num_hosts (int)
+      num_instances (int)
       num_rows (int)
       est_num_rows (int)
       detail (str)
@@ -641,7 +640,8 @@
     for row_num, row in enumerate(first_exec_summary):
       combined_row = {}
       # Copy fixed values from the first exec summary
-      for key in [PREFIX, OPERATOR, NUM_HOSTS, NUM_ROWS, EST_NUM_ROWS, DETAIL]:
+      for key in [PREFIX, OPERATOR, NUM_HOSTS, NUM_INSTANCES, NUM_ROWS, EST_NUM_ROWS,
+                  DETAIL]:
         combined_row[key] = row[key]
 
       avg_times = [exec_summary[row_num][AVG_TIME] for exec_summary in exec_summaries]
@@ -769,6 +769,7 @@
       prefix (str)
       operator (str)
       num_hosts (int)
+      num_instances (int)
       avg_time (float)
       stddev_time (float)
       avg_time_change (float): % change in avg time compared to reference
commit	92aa2c16f172cf15cf6b026ab7af9c72576772f5	[log] [tgz]
author	Zoltan Borok-Nagy <boroknagyz@cloudera.com>	Wed Dec 04 18:49:48 2019 +0100
committer	Impala Public Jenkins <impala-public-jenkins@cloudera.com>	Thu Dec 05 17:07:55 2019 +0000
tree	5e4b39bf1719ad7d13ad1504586cbc446c58515c
parent	30c7a6a18c85574ff76dc750dfef94475f1c9796 [diff]