blob: b56a8bd8f2da94f9cd4dcb3a03f89def4efdd1a8 [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Benchmarks &mdash; Apache Arrow v2.0.0</title>
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/language_data.js"></script>
<script type="text/javascript" src="../_static/js/theme.js"></script>
<link rel="canonical" href="https://arrow.apache.org/docs/developers/benchmarks.html" />
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Building the Documentation" href="documentation.html" />
<link rel="prev" title="Running Docker Builds" href="docker.html" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["setDoNotTrack", true]);
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home" alt="Documentation Home"> Apache Arrow
</a>
<div class="version">
2.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Specifications and Protocols</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../format/Versioning.html">Format Versioning and Stability</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Columnar.html">Arrow Columnar Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Integration.html">Integration Testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/CDataInterface.html">The Arrow C data interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/CStreamInterface.html">The Arrow C stream interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Other.html">Other Data Structures</a></li>
</ul>
<p class="caption"><span class="caption-text">Libraries</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../status.html">Implementation Status</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/c_glib/">C/GLib</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cpp/index.html">C++</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/csharp/README.md">C#</a></li>
<li class="toctree-l1"><a class="reference external" href="https://godoc.org/github.com/apache/arrow/go/arrow">Go</a></li>
<li class="toctree-l1"><a class="reference internal" href="../java/index.html">Java</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/js/">JavaScript</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/matlab/README.md">MATLAB</a></li>
<li class="toctree-l1"><a class="reference internal" href="../python/index.html">Python</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/r/">R</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/ruby/README.md">Ruby</a></li>
<li class="toctree-l1"><a class="reference external" href="https://docs.rs/crate/arrow/">Rust</a></li>
</ul>
<p class="caption"><span class="caption-text">Development</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="contributing.html">Contributing to Apache Arrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="cpp/index.html">C++ Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="python.html">Python Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="archery.html">Daily Development using Archery</a></li>
<li class="toctree-l1"><a class="reference internal" href="crossbow.html">Packaging and Testing with Crossbow</a></li>
<li class="toctree-l1"><a class="reference internal" href="docker.html">Running Docker Builds</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Benchmarks</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#setup">Setup</a></li>
<li class="toctree-l2"><a class="reference internal" href="#running-the-benchmark-suite">Running the benchmark suite</a></li>
<li class="toctree-l2"><a class="reference internal" href="#comparison">Comparison</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#iterating-efficiently">Iterating efficiently</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#regression-detection">Regression detection</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#writing-a-benchmark">Writing a benchmark</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#scripting">Scripting</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="documentation.html">Building the Documentation</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">Apache Arrow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>Benchmarks</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/developers/benchmarks.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="benchmarks">
<span id="id1"></span><h1>Benchmarks<a class="headerlink" href="#benchmarks" title="Permalink to this headline"></a></h1>
<div class="section" id="setup">
<h2>Setup<a class="headerlink" href="#setup" title="Permalink to this headline"></a></h2>
<p>First install the <a class="reference internal" href="archery.html#archery"><span class="std std-ref">Archery</span></a> utility to run the benchmark suite.</p>
</div>
<div class="section" id="running-the-benchmark-suite">
<h2>Running the benchmark suite<a class="headerlink" href="#running-the-benchmark-suite" title="Permalink to this headline"></a></h2>
<p>The benchmark suites can be ran with the <code class="docutils literal notranslate"><span class="pre">benchmark</span> <span class="pre">run</span></code> sub-command.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="c1"># Run benchmarks in the current git workspace</span>
archery benchmark run
<span class="c1"># Storing the results in a file</span>
archery benchmark run --output<span class="o">=</span>run.json
</pre></div>
</div>
<p>Sometimes, it is required to pass custom CMake flags, e.g.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">CC</span><span class="o">=</span>clang-8 <span class="nv">CXX</span><span class="o">=</span>clang++8
archery benchmark run --cmake-extras<span class="o">=</span><span class="s2">&quot;-DARROW_SIMD_LEVEL=NONE&quot;</span>
</pre></div>
</div>
</div>
<div class="section" id="comparison">
<h2>Comparison<a class="headerlink" href="#comparison" title="Permalink to this headline"></a></h2>
<p>One goal with benchmarking is to detect performance regressions. To this end,
<code class="docutils literal notranslate"><span class="pre">archery</span></code> implements a benchmark comparison facility via the <code class="docutils literal notranslate"><span class="pre">benchmark</span>
<span class="pre">diff</span></code> sub-command.</p>
<p>In the default invocation, it will compare the current source (known as the
current workspace in git) with local master branch.</p>
<p>For more information, invoke the <code class="docutils literal notranslate"><span class="pre">archery</span> <span class="pre">benchmark</span> <span class="pre">diff</span> <span class="pre">--help</span></code> command for
multiple examples of invocation.</p>
<div class="section" id="iterating-efficiently">
<h3>Iterating efficiently<a class="headerlink" href="#iterating-efficiently" title="Permalink to this headline"></a></h3>
<p>Iterating with benchmark development can be a tedious process due to long
build time and long run times. Multiple tricks can be used with
<code class="docutils literal notranslate"><span class="pre">archery</span> <span class="pre">benchmark</span> <span class="pre">diff</span></code> to reduce this overhead.</p>
<p>First, the benchmark command supports comparing existing
build directories, This can be paired with the <code class="docutils literal notranslate"><span class="pre">--preserve</span></code> flag to
avoid rebuilding sources from zero.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="c1"># First invocation clone and checkouts in a temporary directory. The</span>
<span class="c1"># directory is preserved with --preserve</span>
archery benchmark diff --preserve
<span class="c1"># Modify C++ sources</span>
<span class="c1"># Re-run benchmark in the previously created build directory.</span>
archery benchmark diff /tmp/arrow-bench*/<span class="o">{</span>WORKSPACE,master<span class="o">}</span>/build
</pre></div>
</div>
<p>Second, a benchmark run result can be saved in a json file. This also avoids
rebuilding the sources, but also executing the (sometimes) heavy benchmarks.
This technique can be used as a poor’s man caching.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="c1"># Run the benchmarks on a given commit and save the result</span>
archery benchmark run --output<span class="o">=</span>run-head-1.json HEAD~1
<span class="c1"># Compare the previous captured result with HEAD</span>
archery benchmark diff HEAD run-head-1.json
</pre></div>
</div>
<p>Third, the benchmark command supports filtering suites (<code class="docutils literal notranslate"><span class="pre">--suite-filter</span></code>)
and benchmarks (<code class="docutils literal notranslate"><span class="pre">--benchmark-filter</span></code>), both options supports regular
expressions.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span><span class="c1"># Taking over a previous run, but only filtering for benchmarks matching</span>
<span class="c1"># `Kernel` and suite matching `compute-aggregate`.</span>
archery benchmark diff <span class="se">\</span>
--suite-filter<span class="o">=</span>compute-aggregate --benchmark-filter<span class="o">=</span>Kernel <span class="se">\</span>
/tmp/arrow-bench*/<span class="o">{</span>WORKSPACE,master<span class="o">}</span>/build
</pre></div>
</div>
</div>
</div>
<div class="section" id="regression-detection">
<h2>Regression detection<a class="headerlink" href="#regression-detection" title="Permalink to this headline"></a></h2>
<div class="section" id="writing-a-benchmark">
<h3>Writing a benchmark<a class="headerlink" href="#writing-a-benchmark" title="Permalink to this headline"></a></h3>
<ol class="arabic simple">
<li><p>The benchmark command will filter (by default) benchmarks with the regular
expression <code class="docutils literal notranslate"><span class="pre">^Regression</span></code>. This way, not all benchmarks are run by default.
Thus, if you want your benchmark to be verified for regression
automatically, the name must match.</p></li>
<li><p>The benchmark command will run with the <code class="docutils literal notranslate"><span class="pre">--benchmark_repetitions=K</span></code>
options for statistical significance. Thus, a benchmark should not override
the repetitions in the (C++) benchmark’s arguments definition.</p></li>
<li><p>Due to #2, a benchmark should run sufficiently fast. Often, when the input
does not fit in memory (L2/L3), the benchmark will be memory bound instead
of CPU bound. In this case, the input can be downsized.</p></li>
<li><p>By default, google’s benchmark library will use the cputime metric, which
is the sum of runtime dedicated on the CPU for all threads of the process.
By contrast to realtime which is the wall clock time, e.g. the difference
between end_time - start_time. In a single thread model, the cputime is
preferable since it is less affected by context switching. In a multi thread
scenario, the cputime will give incorrect result since the since it’ll
be inflated by the number of threads and can be far off realtime. Thus, if
the benchmark is multi threaded, it might be better to use
<code class="docutils literal notranslate"><span class="pre">SetRealtime()</span></code>, see this <cite>example &lt;https://github.com/apache/arrow/blob/a9582ea6ab2db055656809a2c579165fe6a811ba/cpp/src/arrow/io/memory-benchmark.cc#L223-L227&gt;</cite>.</p></li>
</ol>
</div>
</div>
<div class="section" id="scripting">
<h2>Scripting<a class="headerlink" href="#scripting" title="Permalink to this headline"></a></h2>
<p><code class="docutils literal notranslate"><span class="pre">archery</span></code> is written as a python library with a command line frontend. The
library can be imported to automate some tasks.</p>
<p>Some invocation of the command line interface can be quite verbose due to build
output. This can be controlled/avoided with the <code class="docutils literal notranslate"><span class="pre">--quiet</span></code> option, e.g.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>archery --quiet benchmark diff --benchmark-filter<span class="o">=</span>Kernel
<span class="o">{</span><span class="s2">&quot;benchmark&quot;</span>: <span class="s2">&quot;BenchSumKernel/32768/0&quot;</span>, <span class="s2">&quot;change&quot;</span>: -0.6498, <span class="s2">&quot;regression&quot;</span>: true, ...
<span class="o">{</span><span class="s2">&quot;benchmark&quot;</span>: <span class="s2">&quot;BenchSumKernel/32768/1&quot;</span>, <span class="s2">&quot;change&quot;</span>: <span class="m">0</span>.01553, <span class="s2">&quot;regression&quot;</span>: false, ...
...
</pre></div>
</div>
<p>or the <code class="docutils literal notranslate"><span class="pre">--output=&lt;file&gt;</span></code> can be used, e.g.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>archery benchmark diff --benchmark-filter<span class="o">=</span>Kernel --output<span class="o">=</span>compare.json
...
</pre></div>
</div>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="documentation.html" class="btn btn-neutral float-right" title="Building the Documentation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="docker.html" class="btn btn-neutral float-left" title="Running Docker Builds" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2016-2019 Apache Software Foundation
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script></body>
</html>