blob: 8119e4f8ff8cfe6557e54844f5c2d86bb3724a71 [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Packaging and Testing with Crossbow &mdash; Apache Arrow v2.0.0</title>
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/language_data.js"></script>
<script type="text/javascript" src="../_static/js/theme.js"></script>
<link rel="canonical" href="https://arrow.apache.org/docs/developers/crossbow.html" />
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Running Docker Builds" href="docker.html" />
<link rel="prev" title="Daily Development using Archery" href="archery.html" />
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["setDoNotTrack", true]);
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home" alt="Documentation Home"> Apache Arrow
</a>
<div class="version">
2.0.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Specifications and Protocols</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../format/Versioning.html">Format Versioning and Stability</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Columnar.html">Arrow Columnar Format</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Flight.html">Arrow Flight RPC</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Integration.html">Integration Testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/CDataInterface.html">The Arrow C data interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/CStreamInterface.html">The Arrow C stream interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../format/Other.html">Other Data Structures</a></li>
</ul>
<p class="caption"><span class="caption-text">Libraries</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../status.html">Implementation Status</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/c_glib/">C/GLib</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cpp/index.html">C++</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/csharp/README.md">C#</a></li>
<li class="toctree-l1"><a class="reference external" href="https://godoc.org/github.com/apache/arrow/go/arrow">Go</a></li>
<li class="toctree-l1"><a class="reference internal" href="../java/index.html">Java</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/js/">JavaScript</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/matlab/README.md">MATLAB</a></li>
<li class="toctree-l1"><a class="reference internal" href="../python/index.html">Python</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/docs/r/">R</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/apache/arrow/blob/master/ruby/README.md">Ruby</a></li>
<li class="toctree-l1"><a class="reference external" href="https://docs.rs/crate/arrow/">Rust</a></li>
</ul>
<p class="caption"><span class="caption-text">Development</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="contributing.html">Contributing to Apache Arrow</a></li>
<li class="toctree-l1"><a class="reference internal" href="cpp/index.html">C++ Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="python.html">Python Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="archery.html">Daily Development using Archery</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Packaging and Testing with Crossbow</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#architecture">Architecture</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#executors">Executors</a></li>
<li class="toctree-l3"><a class="reference internal" href="#queue">Queue</a></li>
<li class="toctree-l3"><a class="reference internal" href="#scheduler">Scheduler</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#install">Install</a></li>
<li class="toctree-l2"><a class="reference internal" href="#usage">Usage</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#query-the-build-status">Query the build status</a></li>
<li class="toctree-l3"><a class="reference internal" href="#download-the-build-artifacts">Download the build artifacts</a></li>
<li class="toctree-l3"><a class="reference internal" href="#examples">Examples</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="docker.html">Running Docker Builds</a></li>
<li class="toctree-l1"><a class="reference internal" href="benchmarks.html">Benchmarks</a></li>
<li class="toctree-l1"><a class="reference internal" href="documentation.html">Building the Documentation</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">Apache Arrow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>Packaging and Testing with Crossbow</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/developers/crossbow.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="packaging-and-testing-with-crossbow">
<h1>Packaging and Testing with Crossbow<a class="headerlink" href="#packaging-and-testing-with-crossbow" title="Permalink to this headline"></a></h1>
<p>The content of <code class="docutils literal notranslate"><span class="pre">arrow/dev/tasks</span></code> directory aims for automating the process of
Arrow packaging and integration testing.</p>
<dl class="simple">
<dt>Packages:</dt><dd><ul class="simple">
<li><p>C++ and Python <a class="reference external" href="conda-recipes">conda-forge packages</a> for Linux, Mac and Windows</p></li>
<li><p>Python <a class="reference external" href="python-wheels">Wheels</a> for Linux, Mac and Windows</p></li>
<li><p>C++ and GLib <a class="reference external" href="linux-packages">Linux packages</a> for multiple distributions</p></li>
<li><p>Java for Gandiva</p></li>
</ul>
</dd>
<dt>Integration tests:</dt><dd><ul class="simple">
<li><p>Various docker tests</p></li>
<li><p>Pandas</p></li>
<li><p>Dask</p></li>
<li><p>Turbodbc</p></li>
<li><p>HDFS</p></li>
<li><p>Spark</p></li>
</ul>
</dd>
</dl>
<div class="section" id="architecture">
<h2>Architecture<a class="headerlink" href="#architecture" title="Permalink to this headline"></a></h2>
<div class="section" id="executors">
<h3>Executors<a class="headerlink" href="#executors" title="Permalink to this headline"></a></h3>
<p>Individual jobs are executed on public CI services, currently:</p>
<ul class="simple">
<li><p>Linux: TravisCI, CircleCI, Azure Pipelines</p></li>
<li><p>Mac: TravisCI, Azure Pipelines</p></li>
<li><p>Windows: AppVeyor, Azure Pipelines</p></li>
</ul>
</div>
<div class="section" id="queue">
<h3>Queue<a class="headerlink" href="#queue" title="Permalink to this headline"></a></h3>
<p>Because of the nature of how the CI services work, the scheduling of
jobs happens through an additional git repository, which acts like a job
queue for the tasks. Anyone can host a <code class="docutils literal notranslate"><span class="pre">queue</span></code> repository which is usually
called as <code class="docutils literal notranslate"><span class="pre">crossbow</span></code>.</p>
<p>A job is a git commit on a particular git branch, containing only the required
configuration file to run the requested build (like <code class="docutils literal notranslate"><span class="pre">.travis.yml</span></code>,
<code class="docutils literal notranslate"><span class="pre">appveyor.yml</span></code> or <code class="docutils literal notranslate"><span class="pre">azure-pipelines.yml</span></code>).</p>
</div>
<div class="section" id="scheduler">
<h3>Scheduler<a class="headerlink" href="#scheduler" title="Permalink to this headline"></a></h3>
<p><a class="reference external" href="crossbow.py">Crossbow.py</a> handles version generation, task rendering and
submission. The tasks are defined in <code class="docutils literal notranslate"><span class="pre">tasks.yml</span></code>.</p>
</div>
</div>
<div class="section" id="install">
<h2>Install<a class="headerlink" href="#install" title="Permalink to this headline"></a></h2>
<blockquote>
<div><p>The following guide depends on GitHub, but theoretically any git
server can be used.</p>
</div></blockquote>
<ol class="arabic">
<li><p><a class="reference external" href="https://help.github.com/articles/creating-a-new-repository">Create the queue repository</a></p></li>
<li><p>Enable <a class="reference external" href="https://travis-ci.org/getting_started">TravisCI</a>, <a class="reference external" href="https://www.appveyor.com/docs/">Appveyor</a>, <cite>Azure Pipelines_</cite> and <a class="reference external" href="https://circleci.com/docs/2.0/getting-started/">CircleCI</a>
integrations on for the newly created queue repository.</p>
<ul class="simple">
<li><p>turn off Travis’ <a class="reference external" href="https://docs.travis-ci.com/user/customizing-the-build/#Building-only-the-latest-commit">auto cancellation</a> feature on branches</p></li>
</ul>
</li>
<li><p>Clone the newly created repository next to the arrow repository:</p>
<p>By default the scripts looks for <code class="docutils literal notranslate"><span class="pre">crossbow</span></code> next to arrow repository, but
this can configured through command line arguments.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git clone https://github.com/&lt;user&gt;/crossbow crossbow
</pre></div>
</div>
<p><strong>Important note:</strong> Crossbow only supports GitHub token based
authentication. Although it overwrites the repository urls provided with ssh
protocol, it’s advisable to use the HTTPS repository URLs.</p>
</li>
<li><p><a class="reference external" href="https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/">Create a Personal Access Token</a> with <code class="docutils literal notranslate"><span class="pre">repo</span></code> permissions (other
permissions are not needed)</p></li>
<li><p>Locally export the token as an environment variable:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">CROSSBOW_GITHUB_TOKEN</span><span class="o">=</span>&lt;token&gt;
</pre></div>
</div>
<blockquote>
<div><p>or pass as an argument to the CLI script <code class="docutils literal notranslate"><span class="pre">--github-token</span></code></p>
</div></blockquote>
</li>
<li><p>Export the previously created GitHub token on both CI services:</p>
<p>Use <code class="docutils literal notranslate"><span class="pre">CROSSBOW_GITHUB_TOKEN</span></code> encrypted environment variable. You can
set them at the following URLs, where <code class="docutils literal notranslate"><span class="pre">ghuser</span></code> is the GitHub
username and <code class="docutils literal notranslate"><span class="pre">ghrepo</span></code> is the GitHub repository name (typically
<code class="docutils literal notranslate"><span class="pre">crossbow</span></code>):</p>
<ul class="simple">
<li><p>TravisCI: <code class="docutils literal notranslate"><span class="pre">https://travis-ci.org/&lt;ghuser&gt;/&lt;ghrepo&gt;/settings</span></code></p></li>
<li><p>Appveyor:
<code class="docutils literal notranslate"><span class="pre">https://ci.appveyor.com/project/&lt;ghuser&gt;/&lt;ghrepo&gt;/settings/environment</span></code></p></li>
<li><p>CircleCI:
<code class="docutils literal notranslate"><span class="pre">https://circleci.com/gh/&lt;ghuser&gt;/&lt;ghrepo&gt;/edit#env-vars</span></code></p></li>
</ul>
<p>On Appveyor check the <code class="docutils literal notranslate"><span class="pre">skip</span> <span class="pre">branches</span> <span class="pre">without</span> <span class="pre">appveyor.yml</span></code> checkbox
on the web UI under crossbow repository’s settings.</p>
</li>
<li><p>Install Python (minimum supported version is 3.6):</p>
<p>Miniconda is preferred, see installation instructions:
<a class="reference external" href="https://conda.io/docs/user-guide/install/index.html">https://conda.io/docs/user-guide/install/index.html</a></p>
</li>
<li><p>Install the python dependencies for the script:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>conda install -c conda-forge -y --file arrow/ci/conda_env_crossbow.txt
</pre></div>
</div>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># pygit2 requires libgit2: http://www.pygit2.org/install.html</span>
pip install <span class="se">\</span>
jinja2 <span class="se">\</span>
pygit2 <span class="se">\</span>
click <span class="se">\</span>
ruamel.yaml <span class="se">\</span>
setuptools_scm <span class="se">\</span>
github3.py <span class="se">\</span>
toolz <span class="se">\</span>
jira
</pre></div>
</div>
</li>
<li><p>Try running it:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ python crossbow.py --help
</pre></div>
</div>
</li>
</ol>
</div>
<div class="section" id="usage">
<h2>Usage<a class="headerlink" href="#usage" title="Permalink to this headline"></a></h2>
<p>The script does the following:</p>
<ol class="arabic">
<li><p>Detects the current repository, thus supports forks. The following
snippet will build kszucs’s fork instead of the upstream apache/arrow
repository.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ git clone https://github.com/kszucs/arrow
$ git clone https://github.com/kszucs/crossbow
$ <span class="nb">cd</span> arrow/dev/tasks
$ python crossbow.py submit --help <span class="c1"># show the available options</span>
$ python crossbow.py submit conda-win conda-linux conda-osx
</pre></div>
</div>
</li>
<li><p>Gets the HEAD commit of the currently checked out branch and
generates the version number based on <a class="reference external" href="https://pypi.python.org/pypi/setuptools_scm">setuptools_scm</a>. So to build
a particular branch check out before running the script:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git checkout ARROW-&lt;ticket number&gt;
python dev/tasks/crossbow.py submit --dry-run conda-linux conda-osx
</pre></div>
</div>
<blockquote>
<div><p>Note that the arrow branch must be pushed beforehand, because the
script will clone the selected branch.</p>
</div></blockquote>
</li>
<li><p>Reads and renders the required build configurations with the
parameters substituted.</p></li>
<li><p>Create a branch per task, prefixed with the job id. For example to
build conda recipes on linux it will create a new branch:
<code class="docutils literal notranslate"><span class="pre">crossbow&#64;build-&lt;id&gt;-conda-linux</span></code>.</p></li>
<li><p>Pushes the modified branches to GitHub which triggers the builds. For
authentication it uses GitHub OAuth tokens described in the install
section.</p></li>
</ol>
<div class="section" id="query-the-build-status">
<h3>Query the build status<a class="headerlink" href="#query-the-build-status" title="Permalink to this headline"></a></h3>
<p>Build id (which has a corresponding branch in the queue repository) is returned
by the <code class="docutils literal notranslate"><span class="pre">submit</span></code> command.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python crossbow.py status &lt;build id / branch name&gt;
</pre></div>
</div>
</div>
<div class="section" id="download-the-build-artifacts">
<h3>Download the build artifacts<a class="headerlink" href="#download-the-build-artifacts" title="Permalink to this headline"></a></h3>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python crossbow.py artifacts &lt;build id / branch name&gt;
</pre></div>
</div>
</div>
<div class="section" id="examples">
<h3>Examples<a class="headerlink" href="#examples" title="Permalink to this headline"></a></h3>
<p>Submit command accepts a list of task names and/or a list of task-group names
to select which tasks to build.</p>
<p>Run multiple builds:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ python crossbow.py submit debian-stretch conda-linux-gcc-py37
Repository: https://github.com/kszucs/arrow@tasks
Commit SHA: 810a718836bb3a8cefc053055600bdcc440e6702
Version: <span class="m">0</span>.9.1.dev48+g810a7188.d20180414
Pushed branches:
- debian-stretch
- conda-linux-gcc-py37
</pre></div>
</div>
<p>Just render without applying or committing the changes:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ python crossbow.py submit --dry-run task_name
</pre></div>
</div>
<p>Run only <code class="docutils literal notranslate"><span class="pre">conda</span></code> package builds and a Linux one:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ python crossbow.py submit --group conda centos-7
</pre></div>
</div>
<p>Run <code class="docutils literal notranslate"><span class="pre">wheel</span></code> builds:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ python crossbow.py submit --group wheel
</pre></div>
</div>
<p>There are multiple task groups in the <code class="docutils literal notranslate"><span class="pre">tasks.yml</span></code> like docker, integration
and cpp-python for running docker based tests.</p>
<p><code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">crossbow.py</span> <span class="pre">submit</span></code> supports multiple options and arguments, for more
see its help page:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ python crossbow.py submit --help
</pre></div>
</div>
</div>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="docker.html" class="btn btn-neutral float-right" title="Running Docker Builds" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="archery.html" class="btn btn-neutral float-left" title="Daily Development using Archery" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2016-2019 Apache Software Foundation
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script></body>
</html>