dev/archery/archery/benchmark/compare.py - arrow-experimental-rs-parquet2 - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.


 # Define a global regression threshold as 5%. This is purely subjective and
 # flawed. This does not track cumulative regression.
 DEFAULT_THRESHOLD = 0.05


 def items_per_seconds_fmt(value):
     if value < 1000:
         return "{} items/sec".format(value)
     if value < 1000**2:
         return "{:.3f}K items/sec".format(value / 1000)
     if value < 1000**3:
         return "{:.3f}M items/sec".format(value / 1000**2)
     else:
         return "{:.3f}G items/sec".format(value / 1000**3)


 def bytes_per_seconds_fmt(value):
     if value < 1024:
         return "{} bytes/sec".format(value)
     if value < 1024**2:
         return "{:.3f} KiB/sec".format(value / 1024)
     if value < 1024**3:
         return "{:.3f} MiB/sec".format(value / 1024**2)
     if value < 1024**4:
         return "{:.3f} GiB/sec".format(value / 1024**3)
     else:
         return "{:.3f} TiB/sec".format(value / 1024**4)


 def change_fmt(value):
     return "{:.3%}".format(value)


 def formatter_for_unit(unit):
     if unit == "bytes_per_second":
         return bytes_per_seconds_fmt
     elif unit == "items_per_second":
         return items_per_seconds_fmt
     else:
         return lambda x: x


 class BenchmarkComparator:
     """ Compares two benchmarks.

     Encodes the logic of comparing two benchmarks and taking a decision on
     if it induce a regression.
     """

     def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD,
                  suite_name=None):
         self.contender = contender
         self.baseline = baseline
         self.threshold = threshold
         self.suite_name = suite_name

     @property
     def name(self):
         return self.baseline.name

     @property
     def less_is_better(self):
         return self.baseline.less_is_better

     @property
     def unit(self):
         return self.baseline.unit

     @property
     def change(self):
         new = self.contender.value
         old = self.baseline.value

         if old == 0 and new == 0:
             return 0.0
         if old == 0:
             return 0.0

         return float(new - old) / abs(old)

     @property
     def confidence(self):
         """ Indicate if a comparison of benchmarks should be trusted. """
         return True

     @property
     def regression(self):
         change = self.change
         adjusted_change = change if self.less_is_better else -change
         return (self.confidence and adjusted_change > self.threshold)

     @property
     def formatted(self):
         fmt = formatter_for_unit(self.unit)
         return {
             "benchmark": self.name,
             "change": change_fmt(self.change),
             "regression": self.regression,
             "baseline": fmt(self.baseline.value),
             "contender": fmt(self.contender.value),
             "unit": self.unit,
             "less_is_better": self.less_is_better,
             "counters": str(self.baseline.counters)
         }

     def compare(self, comparator=None):
         return {
             "benchmark": self.name,
             "change": self.change,
             "regression": self.regression,
             "baseline": self.baseline.value,
             "contender": self.contender.value,
             "unit": self.unit,
             "less_is_better": self.less_is_better,
             "counters": self.baseline.counters
         }

     def __call__(self, **kwargs):
         return self.compare(**kwargs)


 def pairwise_compare(contender, baseline):
     dict_contender = {e.name: e for e in contender}
     dict_baseline = {e.name: e for e in baseline}

     for name in (dict_contender.keys() & dict_baseline.keys()):
         yield name, (dict_contender[name], dict_baseline[name])


 class RunnerComparator:
     """ Compares suites/benchmarks from runners.

     It is up to the caller that ensure that runners are compatible (both from
     the same language implementation).
     """

     def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD):
         self.contender = contender
         self.baseline = baseline
         self.threshold = threshold

     @property
     def comparisons(self):
         contender = self.contender.suites
         baseline = self.baseline.suites
         suites = pairwise_compare(contender, baseline)

         for suite_name, (suite_cont, suite_base) in suites:
             benchmarks = pairwise_compare(
                 suite_cont.benchmarks, suite_base.benchmarks)

             for _, (bench_cont, bench_base) in benchmarks:
                 yield BenchmarkComparator(bench_cont, bench_base,
                                           threshold=self.threshold,
                                           suite_name=suite_name)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.


	# Define a global regression threshold as 5%. This is purely subjective and
	# flawed. This does not track cumulative regression.
	DEFAULT_THRESHOLD = 0.05


	def items_per_seconds_fmt(value):
	if value < 1000:
	return "{} items/sec".format(value)
	if value < 1000**2:
	return "{:.3f}K items/sec".format(value / 1000)
	if value < 1000**3:
	return "{:.3f}M items/sec".format(value / 1000**2)
	else:
	return "{:.3f}G items/sec".format(value / 1000**3)


	def bytes_per_seconds_fmt(value):
	if value < 1024:
	return "{} bytes/sec".format(value)
	if value < 1024**2:
	return "{:.3f} KiB/sec".format(value / 1024)
	if value < 1024**3:
	return "{:.3f} MiB/sec".format(value / 1024**2)
	if value < 1024**4:
	return "{:.3f} GiB/sec".format(value / 1024**3)
	else:
	return "{:.3f} TiB/sec".format(value / 1024**4)


	def change_fmt(value):
	return "{:.3%}".format(value)


	def formatter_for_unit(unit):
	if unit == "bytes_per_second":
	return bytes_per_seconds_fmt
	elif unit == "items_per_second":
	return items_per_seconds_fmt
	else:
	return lambda x: x


	class BenchmarkComparator:
	""" Compares two benchmarks.

	Encodes the logic of comparing two benchmarks and taking a decision on
	if it induce a regression.
	"""

	def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD,
	suite_name=None):
	self.contender = contender
	self.baseline = baseline
	self.threshold = threshold
	self.suite_name = suite_name

	@property
	def name(self):
	return self.baseline.name

	@property
	def less_is_better(self):
	return self.baseline.less_is_better

	@property
	def unit(self):
	return self.baseline.unit

	@property
	def change(self):
	new = self.contender.value
	old = self.baseline.value

	if old == 0 and new == 0:
	return 0.0
	if old == 0:
	return 0.0

	return float(new - old) / abs(old)

	@property
	def confidence(self):
	""" Indicate if a comparison of benchmarks should be trusted. """
	return True

	@property
	def regression(self):
	change = self.change
	adjusted_change = change if self.less_is_better else -change
	return (self.confidence and adjusted_change > self.threshold)

	@property
	def formatted(self):
	fmt = formatter_for_unit(self.unit)
	return {
	"benchmark": self.name,
	"change": change_fmt(self.change),
	"regression": self.regression,
	"baseline": fmt(self.baseline.value),
	"contender": fmt(self.contender.value),
	"unit": self.unit,
	"less_is_better": self.less_is_better,
	"counters": str(self.baseline.counters)
	}

	def compare(self, comparator=None):
	return {
	"benchmark": self.name,
	"change": self.change,
	"regression": self.regression,
	"baseline": self.baseline.value,
	"contender": self.contender.value,
	"unit": self.unit,
	"less_is_better": self.less_is_better,
	"counters": self.baseline.counters
	}

	def __call__(self, **kwargs):
	return self.compare(**kwargs)


	def pairwise_compare(contender, baseline):
	dict_contender = {e.name: e for e in contender}
	dict_baseline = {e.name: e for e in baseline}

	for name in (dict_contender.keys() & dict_baseline.keys()):
	yield name, (dict_contender[name], dict_baseline[name])


	class RunnerComparator:
	""" Compares suites/benchmarks from runners.

	It is up to the caller that ensure that runners are compatible (both from
	the same language implementation).
	"""

	def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD):
	self.contender = contender
	self.baseline = baseline
	self.threshold = threshold

	@property
	def comparisons(self):
	contender = self.contender.suites
	baseline = self.baseline.suites
	suites = pairwise_compare(contender, baseline)

	for suite_name, (suite_cont, suite_base) in suites:
	benchmarks = pairwise_compare(
	suite_cont.benchmarks, suite_base.benchmarks)

	for _, (bench_cont, bench_base) in benchmarks:
	yield BenchmarkComparator(bench_cont, bench_base,
	threshold=self.threshold,
	suite_name=suite_name)