blob: 7f025dbe1768de615409975dc249eb00940174fd [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Auto-scheduling a Neural Network for x86 CPU &mdash; tvm 0.17.dev0 documentation</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../_static/sg_gallery.css" type="text/css" />
<link rel="stylesheet" href="../../_static/sg_gallery-binder.css" type="text/css" />
<link rel="stylesheet" href="../../_static/sg_gallery-dataframe.css" type="text/css" />
<link rel="stylesheet" href="../../_static/sg_gallery-rendered-html.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../../_static/css/tlcpack_theme.css" type="text/css" />
<link rel="shortcut icon" href="../../_static/tvm-logo-square.png"/>
<script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<script type="text/javascript" src="../../_static/js/theme.js"></script>
<script type="text/javascript" src="../../_static/js/tlcpack_theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Auto-scheduling a Neural Network for NVIDIA GPU" href="tune_network_cuda.html" />
<link rel="prev" title="Auto-scheduling a Convolution Layer for GPU" href="tune_conv2d_layer_cuda.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<header class="header">
<div class="innercontainer">
<div class="headerInner d-flex justify-content-between align-items-center">
<div class="headerLogo">
<a href="https://tvm.apache.org/"><img src=https://tvm.apache.org/assets/images/logo.svg alt="logo"></a>
</div>
<div id="headMenu" class="headerNav">
<button type="button" id="closeHeadMenu" class="navCloseBtn"><img src="../../_static/img/close-icon.svg" alt="Close"></button>
<ul class="nav">
<li class="nav-item">
<a class="nav-link" href=https://tvm.apache.org/community>Community</a>
</li>
<li class="nav-item">
<a class="nav-link" href=https://tvm.apache.org/download>Download</a>
</li>
<li class="nav-item">
<a class="nav-link" href=https://tvm.apache.org/vta>VTA</a>
</li>
<li class="nav-item">
<a class="nav-link" href=https://tvm.apache.org/blog>Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href=https://tvm.apache.org/docs>Docs</a>
</li>
<li class="nav-item">
<a class="nav-link" href=https://tvmconf.org>Conference</a>
</li>
<li class="nav-item">
<a class="nav-link" href=https://github.com/apache/tvm/>Github</a>
</li>
</ul>
<div class="responsivetlcdropdown">
<button type="button" class="btn-link">
ASF
</button>
<ul>
<li>
<a href=https://apache.org/>Apache Homepage</a>
</li>
<li>
<a href=https://www.apache.org/licenses/>License</a>
</li>
<li>
<a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
</li>
<li>
<a href=https://www.apache.org/security/>Security</a>
</li>
<li>
<a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
</li>
<li>
<a href=https://www.apache.org/events/current-event>Events</a>
</li>
</ul>
</div>
</div>
<div class="responsiveMenuIcon">
<button type="button" id="menuBtn" class="btn-menu"><img src="../../_static/img/menu-icon.svg" alt="Menu Icon"></button>
</div>
<div class="tlcDropdown">
<div class="dropdown">
<button type="button" class="btn-link dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
ASF
</button>
<div class="dropdown-menu dropdown-menu-right">
<ul>
<li>
<a href=https://apache.org/>Apache Homepage</a>
</li>
<li>
<a href=https://www.apache.org/licenses/>License</a>
</li>
<li>
<a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
</li>
<li>
<a href=https://www.apache.org/security/>Security</a>
</li>
<li>
<a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
</li>
<li>
<a href=https://www.apache.org/events/current-event>Events</a>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<nav data-toggle="wy-nav-shift" class="wy-nav-side fixed">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html">
<img src="../../_static/tvm-logo-small.png" class="logo" alt="Logo"/>
</a>
<input type="checkbox" class="version-toggle-box" hidden id="version-toggle">
<label for="version-toggle" class="version-toggle-label">
<div tabindex="0" class="version version-selector version-selector-show">
0.17.dev0 <span class="chevron versions-hidden"><svg fill="none" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"><path d="m8 4 8 8-8 8" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/></svg></span><span class="chevron versions-shown"><svg fill="none" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"><path d="m4 8 8 8 8-8" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/></svg></span>
</div>
</label>
<div class="version-details wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption" role="heading"><span class="caption-text">Versions</span></p>
<ol style="text-align: left">
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="/">0.17.dev0 (main)</a></div></li>
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.8.0/">v0.8.0</a></div></li>
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.9.0/">v0.9.0</a></div></li>
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.10.0/">v0.10.0</a></div></li>
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.11.0/">v0.11.0</a></div></li>
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.12.0/">v0.12.0</a></div></li>
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.13.0/">v0.13.0</a></div></li>
<li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.14.0/">v0.14.0</a></div></li>
</ol>
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../install/index.html">Installing TVM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contribute/index.html">Contributor Guide</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../tutorial/index.html">User Tutorial</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">How To Guides</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../compile_models/index.html">Compile Deep Learning Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="../deploy/index.html">Deploy Models and Integrate TVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_relay/index.html">Work With Relay</a></li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_schedules/index.html">Work With Tensor Expression and Schedules</a></li>
<li class="toctree-l2"><a class="reference internal" href="../optimize_operators/index.html">Optimize Tensor Operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tune_with_autotvm/index.html">Auto-Tune with Templates and AutoTVM</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="index.html">Use AutoScheduler for Template-Free Scheduling</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="tune_conv2d_layer_cuda.html">Auto-scheduling a Convolution Layer for GPU</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">Auto-scheduling a Neural Network for x86 CPU</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#define-a-network">Define a Network</a></li>
<li class="toctree-l4"><a class="reference internal" href="#extract-search-tasks">Extract Search Tasks</a></li>
<li class="toctree-l4"><a class="reference internal" href="#begin-tuning">Begin Tuning</a></li>
<li class="toctree-l4"><a class="reference internal" href="#compile-and-evaluate">Compile and Evaluate</a></li>
<li class="toctree-l4"><a class="reference internal" href="#other-tips">Other Tips</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="tune_network_cuda.html">Auto-scheduling a Neural Network for NVIDIA GPU</a></li>
<li class="toctree-l3"><a class="reference internal" href="tune_network_arm.html">Auto-scheduling a Neural Network for ARM CPU</a></li>
<li class="toctree-l3"><a class="reference internal" href="tune_network_mali.html">Auto-scheduling a Neural Network for mali GPU</a></li>
<li class="toctree-l3"><a class="reference internal" href="tune_sparse_x86.html">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_microtvm/index.html">Work With microTVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../extend_tvm/index.html">Extend TVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../profile/index.html">Profile Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../errors.html">Handle TVM Errors</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../dev/tutorial/index.html">Developer Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dev/how_to/how_to.html">Developer How-To Guide</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Architecture Guide</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../arch/index.html">Design and Architecture</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Topic Guides</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../topic/microtvm/index.html">microTVM: TVM on bare-metal</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../topic/vta/index.html">VTA: Versatile Tensor Accelerator</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Reference Guide</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../reference/langref/index.html">Language Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/api/python/index.html">Python API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/api/links.html">Other APIs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/publications.html">Publications</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../genindex.html">Index</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation" data-toggle="wy-nav-top">
<div class="togglemenu">
</div>
<div class="nav-content">
<!-- tvm -->
Table of Contents
</div>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html">Docs</a> <span class="br-arrow">></span></li>
<li><a href="../index.html">How To Guides</a> <span class="br-arrow">></span></li>
<li><a href="index.html">Use AutoScheduler for Template-Free Scheduling</a> <span class="br-arrow">></span></li>
<li>Auto-scheduling a Neural Network for x86 CPU</li>
<li class="wy-breadcrumbs-aside">
<a href="https://github.com/apache/tvm/edit/main/docs/how_to/tune_with_autoscheduler/tune_network_x86.rst" class="fa fa-github"> Edit on GitHub</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="sphx-glr-download-link-note admonition note">
<p class="admonition-title">Note</p>
<p>This tutorial can be used interactively with Google Colab! You can also click
<a class="reference internal" href="#sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">here</span></a> to run the Jupyter notebook locally.</p>
<a class="reference external image-reference" href="https://colab.research.google.com/github/apache/tvm-site/blob/asf-site/docs/_downloads/ad2a7f55d615d188ad664d56696815a6/tune_network_x86.ipynb"><img alt="https://raw.githubusercontent.com/tlc-pack/web-data/main/images/utilities/colab_button.svg" class="align-center" src="https://raw.githubusercontent.com/tlc-pack/web-data/main/images/utilities/colab_button.svg" width="300px" /></a>
</div>
<div class="sphx-glr-example-title section" id="auto-scheduling-a-neural-network-for-x86-cpu">
<span id="sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"></span><h1>Auto-scheduling a Neural Network for x86 CPU<a class="headerlink" href="#auto-scheduling-a-neural-network-for-x86-cpu" title="Permalink to this headline"></a></h1>
<p><strong>Author</strong>: <a class="reference external" href="https://github.com/merrymercy">Lianmin Zheng</a>, <a class="reference external" href="https://github.com/jcf94/">Chengfan Jia</a></p>
<p>Auto-tuning for specific devices and workloads is critical for getting the
best performance. This is a tutorial on how to tune a whole neural
network for x86 CPU with the auto-scheduler.</p>
<p>To auto-tune a neural network, we partition the network into small subgraphs and
tune them independently. Each subgraph is treated as one search task.
A task scheduler slices the time and dynamically allocates time resources to
these tasks. The task scheduler predicts the impact of each task on the end-to-end
execution time and prioritizes the one that can reduce the execution time the most.</p>
<p>For each subgraph, we use the compute declaration in <code class="code docutils literal notranslate"><span class="pre">tvm/python/topi</span></code> to
get the computational DAG in the tensor expression form.
We then use the auto-scheduler to construct a search space of this DAG and search
for good schedules (low-level optimizations).</p>
<p>Different from the template-based <a class="reference internal" href="../tune_with_autotvm/index.html#tutorials-autotvm-sec"><span class="std std-ref">autotvm</span></a> which relies on
manual templates to define the search space, the auto-scheduler does not require any
schedule templates. In other words, the auto-scheduler only uses the compute declarations
in <code class="code docutils literal notranslate"><span class="pre">tvm/python/topi</span></code> and does not use existing schedule templates.</p>
<p>Note that this tutorial will not run on Windows or recent versions of macOS. To
get it to run, you will need to wrap the body of this tutorial in a <code class="code docutils literal notranslate"><span class="pre">if</span>
<span class="pre">__name__</span> <span class="pre">==</span> <span class="pre">&quot;__main__&quot;:</span></code> block.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">tvm</span>
<span class="kn">from</span> <span class="nn">tvm</span> <span class="kn">import</span> <span class="n">relay</span><span class="p">,</span> <span class="n">auto_scheduler</span>
<span class="kn">from</span> <span class="nn">tvm.relay</span> <span class="kn">import</span> <span class="n">data_dep_optimization</span> <span class="k">as</span> <span class="n">ddo</span>
<span class="kn">import</span> <span class="nn">tvm.relay.testing</span>
<span class="kn">from</span> <span class="nn">tvm.contrib</span> <span class="kn">import</span> <span class="n">graph_executor</span>
</pre></div>
</div>
<div class="section" id="define-a-network">
<h2>Define a Network<a class="headerlink" href="#define-a-network" title="Permalink to this headline"></a></h2>
<p>First, we need to define the network with relay frontend API.
We can load some pre-defined network from <code class="code docutils literal notranslate"><span class="pre">tvm.relay.testing</span></code>.
We can also load models from MXNet, ONNX, PyTorch, and TensorFlow
(see <a class="reference internal" href="../compile_models/index.html#tutorial-frontend"><span class="std std-ref">front end tutorials</span></a>).</p>
<p>For convolutional neural networks, although auto-scheduler can work correctly
with any layout, we found the best performance is typically achieved with NHWC layout.
We also implemented more optimizations for NHWC layout with the auto-scheduler.
So it is recommended to convert your models to NHWC layout to use the auto-scheduler.
You can use <a class="reference internal" href="../../arch/convert_layout.html#convert-layout-usage"><span class="std std-ref">ConvertLayout</span></a> pass to do the layout conversion in TVM.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">get_network</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="o">=</span><span class="s2">&quot;NHWC&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><span class="s2">&quot;float32&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/functions.html#bool" title="builtins.bool" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">use_sparse</span></a><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Get the symbol definition and random weight of a network&quot;&quot;&quot;</span>
<span class="c1"># auto-scheduler prefers NHWC layout</span>
<span class="k">if</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a> <span class="o">==</span> <span class="s2">&quot;NHWC&quot;</span><span class="p">:</span>
<span class="n">image_shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="k">elif</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a> <span class="o">==</span> <span class="s2">&quot;NCHW&quot;</span><span class="p">:</span>
<span class="n">image_shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Invalid layout: &quot;</span> <span class="o">+</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="p">)</span>
<a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_shape</span></a> <span class="o">=</span> <span class="p">(</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,)</span> <span class="o">+</span> <span class="n">image_shape</span>
<a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">output_shape</span></a> <span class="o">=</span> <span class="p">(</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <span class="mi">1000</span><span class="p">)</span>
<span class="k">if</span> <span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;resnet-&quot;</span><span class="p">):</span>
<span class="n">n_layer</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;-&quot;</span><span class="p">)[</span><span class="mi">1</span><span class="p">])</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/testing.html#tvm.relay.testing.resnet.get_workload" title="tvm.relay.testing.resnet.get_workload" class="sphx-glr-backref-module-tvm-relay-testing-resnet sphx-glr-backref-type-py-function"><span class="n">relay</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">resnet</span><span class="o">.</span><span class="n">get_workload</span></a><span class="p">(</span>
<span class="n">num_layers</span><span class="o">=</span><span class="n">n_layer</span><span class="p">,</span>
<a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">,</span>
<span class="n">image_shape</span><span class="o">=</span><span class="n">image_shape</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;resnet3d-&quot;</span><span class="p">):</span>
<span class="n">n_layer</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;-&quot;</span><span class="p">)[</span><span class="mi">1</span><span class="p">])</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/testing.html#tvm.relay.testing.resnet.get_workload" title="tvm.relay.testing.resnet.get_workload" class="sphx-glr-backref-module-tvm-relay-testing-resnet sphx-glr-backref-type-py-function"><span class="n">relay</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">resnet</span><span class="o">.</span><span class="n">get_workload</span></a><span class="p">(</span>
<span class="n">num_layers</span><span class="o">=</span><span class="n">n_layer</span><span class="p">,</span>
<a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">,</span>
<span class="n">image_shape</span><span class="o">=</span><span class="n">image_shape</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;mobilenet&quot;</span><span class="p">:</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/testing.html#tvm.relay.testing.mobilenet.get_workload" title="tvm.relay.testing.mobilenet.get_workload" class="sphx-glr-backref-module-tvm-relay-testing-mobilenet sphx-glr-backref-type-py-function"><span class="n">relay</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">mobilenet</span><span class="o">.</span><span class="n">get_workload</span></a><span class="p">(</span>
<a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">,</span> <span class="n">image_shape</span><span class="o">=</span><span class="n">image_shape</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;squeezenet_v1.1&quot;</span><span class="p">:</span>
<span class="k">assert</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a> <span class="o">==</span> <span class="s2">&quot;NCHW&quot;</span><span class="p">,</span> <span class="s2">&quot;squeezenet_v1.1 only supports NCHW layout&quot;</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/testing.html#tvm.relay.testing.squeezenet.get_workload" title="tvm.relay.testing.squeezenet.get_workload" class="sphx-glr-backref-module-tvm-relay-testing-squeezenet sphx-glr-backref-type-py-function"><span class="n">relay</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">squeezenet</span><span class="o">.</span><span class="n">get_workload</span></a><span class="p">(</span>
<span class="n">version</span><span class="o">=</span><span class="s2">&quot;1.1&quot;</span><span class="p">,</span>
<a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">,</span>
<span class="n">image_shape</span><span class="o">=</span><span class="n">image_shape</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;inception_v3&quot;</span><span class="p">:</span>
<a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_shape</span></a> <span class="o">=</span> <span class="p">(</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">299</span><span class="p">,</span> <span class="mi">299</span><span class="p">)</span> <span class="k">if</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a> <span class="o">==</span> <span class="s2">&quot;NCHW&quot;</span> <span class="k">else</span> <span class="p">(</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <span class="mi">299</span><span class="p">,</span> <span class="mi">299</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/testing.html#tvm.relay.testing.inception_v3.get_workload" title="tvm.relay.testing.inception_v3.get_workload" class="sphx-glr-backref-module-tvm-relay-testing-inception_v3 sphx-glr-backref-type-py-function"><span class="n">relay</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">inception_v3</span><span class="o">.</span><span class="n">get_workload</span></a><span class="p">(</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">)</span>
<span class="k">elif</span> <span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;mlp&quot;</span><span class="p">:</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/testing.html#tvm.relay.testing.mlp.get_workload" title="tvm.relay.testing.mlp.get_workload" class="sphx-glr-backref-module-tvm-relay-testing-mlp sphx-glr-backref-type-py-function"><span class="n">relay</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">mlp</span><span class="o">.</span><span class="n">get_workload</span></a><span class="p">(</span>
<a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">,</span> <span class="n">image_shape</span><span class="o">=</span><span class="n">image_shape</span><span class="p">,</span> <span class="n">num_classes</span><span class="o">=</span><span class="mi">1000</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Network not found.&quot;</span><span class="p">)</span>
<span class="k">if</span> <a href="https://docs.python.org/3/library/functions.html#bool" title="builtins.bool" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">use_sparse</span></a><span class="p">:</span>
<span class="kn">from</span> <span class="nn">tvm.topi.sparse.utils</span> <span class="kn">import</span> <span class="n">convert_model_dense_to_sparse</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">convert_model_dense_to_sparse</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">,</span> <span class="n">bs_r</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">random_params</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">return</span> <span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_shape</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">output_shape</span></a>
<span class="c1"># Define the neural network and compilation target.</span>
<span class="c1"># If the target machine supports avx512 instructions, replace the</span>
<span class="c1"># &quot;llvm -mcpu=core-avx2&quot; with &quot;llvm -mcpu=skylake-avx512&quot;</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">network</span></a> <span class="o">=</span> <span class="s2">&quot;resnet-50&quot;</span>
<a href="https://docs.python.org/3/library/functions.html#bool" title="builtins.bool" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">use_sparse</span></a> <span class="o">=</span> <span class="kc">False</span>
<a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a> <span class="o">=</span> <span class="mi">1</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a> <span class="o">=</span> <span class="s2">&quot;NHWC&quot;</span>
<a href="../../reference/api/python/target.html#tvm.target.Target" title="tvm.target.Target" class="sphx-glr-backref-module-tvm-target sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a> <span class="o">=</span> <a href="../../reference/api/python/target.html#tvm.target.Target" title="tvm.target.Target" class="sphx-glr-backref-module-tvm-target sphx-glr-backref-type-py-class"><span class="n">tvm</span><span class="o">.</span><span class="n">target</span><span class="o">.</span><span class="n">Target</span></a><span class="p">(</span><span class="s2">&quot;llvm -mcpu=core-avx2&quot;</span><span class="p">)</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a> <span class="o">=</span> <span class="s2">&quot;float32&quot;</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">log_file</span></a> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">%s</span><span class="s2">-</span><span class="si">%s</span><span class="s2">-B</span><span class="si">%d</span><span class="s2">-</span><span class="si">%s</span><span class="s2">.json&quot;</span> <span class="o">%</span> <span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">network</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span> <a href="../../reference/api/python/runtime.html#tvm.runtime.String" title="tvm.runtime.String" class="sphx-glr-backref-module-tvm-runtime sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span><span class="o">.</span><span class="n">kind</span><span class="o">.</span><span class="n">name</span></a><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="extract-search-tasks">
<h2>Extract Search Tasks<a class="headerlink" href="#extract-search-tasks" title="Permalink to this headline"></a></h2>
<p>Next, we extract the search tasks and their weights from a network.
The weight of a task is the number of appearances of the task’s subgraph
in the whole network.
By using the weight, we can approximate the end-to-end latency of the network
as <code class="code docutils literal notranslate"><span class="pre">sum(latency[t]</span> <span class="pre">*</span> <span class="pre">weight[t])</span></code>, where <code class="code docutils literal notranslate"><span class="pre">latency[t]</span></code> is the
latency of a task and <code class="code docutils literal notranslate"><span class="pre">weight[t]</span></code> is the weight of the task.
The task scheduler will just optimize this objective.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># Extract tasks from the network</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Get model...&quot;</span><span class="p">)</span>
<span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_shape</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">output_shape</span></a> <span class="o">=</span> <span class="n">get_network</span><span class="p">(</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">network</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">layout</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">,</span>
<a href="https://docs.python.org/3/library/functions.html#bool" title="builtins.bool" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">use_sparse</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#bool" title="builtins.bool" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">use_sparse</span></a><span class="p">,</span>
<span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Extract tasks...&quot;</span><span class="p">)</span>
<a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">tasks</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">task_weights</span></a> <span class="o">=</span> <a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.extract_tasks" title="tvm.auto_scheduler.extract_tasks" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-function"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">extract_tasks</span></a><span class="p">(</span><span class="n">mod</span><span class="p">[</span><span class="s2">&quot;main&quot;</span><span class="p">],</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">,</span> <a href="../../reference/api/python/target.html#tvm.target.Target" title="tvm.target.Target" class="sphx-glr-backref-module-tvm-target sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="p">)</span>
<span class="k">for</span> <a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">idx</span></a><span class="p">,</span> <a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.SearchTask" title="tvm.auto_scheduler.SearchTask" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">task</span></a> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">tasks</span></a><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;========== Task </span><span class="si">%d</span><span class="s2"> (workload key: </span><span class="si">%s</span><span class="s2">) ==========&quot;</span> <span class="o">%</span> <span class="p">(</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">idx</span></a><span class="p">,</span> <a href="../../reference/api/python/runtime.html#tvm.runtime.String" title="tvm.runtime.String" class="sphx-glr-backref-module-tvm-runtime sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">task</span><span class="o">.</span><span class="n">workload_key</span></a><span class="p">))</span>
<span class="nb">print</span><span class="p">(</span><a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.ComputeDAG" title="tvm.auto_scheduler.ComputeDAG" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">task</span><span class="o">.</span><span class="n">compute_dag</span></a><span class="p">)</span>
</pre></div>
</div>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get model...
Extract tasks...
========== Task 0 (workload key: [&quot;6d628209072e3e3dd8f49359935acea6&quot;, [1, 28, 28, 512], [1, 1, 512, 128], [1, 1, 1, 128], [1, 28, 28, 128]]) ==========
p0 = PLACEHOLDER [1, 28, 28, 512]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 512, 128]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 128]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 1 (workload key: [&quot;3060808fc5c74e18b1276729071fbae0&quot;, [1, 56, 56, 64], [1, 1, 64, 256], [1, 56, 56, 256], [1, 56, 56, 256]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 64]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 64, 256]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 56, 56, 256]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
========== Task 2 (workload key: [&quot;2d10de6646307f0e3e5cf4b31c20e69b&quot;, [1, 56, 56, 64], [1, 1, 64, 256], [1, 56, 56, 256]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 64]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 64, 256]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
========== Task 3 (workload key: [&quot;6d628209072e3e3dd8f49359935acea6&quot;, [1, 56, 56, 64], [1, 1, 64, 64], [1, 1, 1, 64], [1, 56, 56, 64]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 64]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 64, 64]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 64]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 4 (workload key: [&quot;08f7449d79e570b7274174709e5e5e01&quot;, [1, 2048], [1000, 2048], [1, 1000], [1, 1000]]) ==========
p0 = PLACEHOLDER [1, 2048]
p1 = PLACEHOLDER [1000, 2048]
T_matmul_NT(i0, i1) += (p0[i0, k]*p1[i1, k])
p2 = PLACEHOLDER [1, 1000]
T_add(ax0, ax1) = (T_matmul_NT[ax0, ax1] + p2[ax0, ax1])
========== Task 5 (workload key: [&quot;76afb7bf408a1ffa0b8b7bc09d077dc3&quot;, [1, 56, 56, 64], [1, 1, 64, 256], [1, 56, 56, 256], [1, 1, 1, 256], [1, 56, 56, 256]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 64]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 64, 256]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 56, 56, 256]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
p3 = PLACEHOLDER [1, 1, 1, 256]
T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + p3[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 6 (workload key: [&quot;8c53ca2904398da2889aa7508082d7bb&quot;, [1, 7, 7, 2048], [1, 1, 1, 2048]]) ==========
p0 = PLACEHOLDER [1, 7, 7, 2048]
adaptive_pool_sum(ax0, ax1, ax2, ax3) += p0[ax0, ((ax1*7) + rv0), ((ax2*7) + rv1), ax3]
adaptive_pool_avg(ax0, ax1, ax2, ax3) = (adaptive_pool_sum[ax0, ax1, ax2, ax3]/(float32((select((bool)1, ((ax1 + 1)*7), (((ax1 + 1)*7) + 1)) - (ax1*7)))*float32((select((bool)1, ((ax2 + 1)*7), (((ax2 + 1)*7) + 1)) - (ax2*7)))))
========== Task 7 (workload key: [&quot;2beb39e9afe4c74822fffbcbb8533595&quot;, [1, 14, 14, 1024], [1, 1, 1024, 512], [1, 1, 1, 512], [1, 7, 7, 512]]) ==========
p0 = PLACEHOLDER [1, 14, 14, 1024]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 1024, 512]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 512]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 8 (workload key: [&quot;0fad1b42d0d33418e0a8d15d3bbad3c9&quot;, [1, 14, 14, 1024], [1, 1, 1024, 2048], [1, 7, 7, 2048]]) ==========
p0 = PLACEHOLDER [1, 14, 14, 1024]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 1024, 2048]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*p1[ry, rx, rc, ff])
========== Task 9 (workload key: [&quot;3060808fc5c74e18b1276729071fbae0&quot;, [1, 7, 7, 512], [1, 1, 512, 2048], [1, 7, 7, 2048], [1, 7, 7, 2048]]) ==========
p0 = PLACEHOLDER [1, 7, 7, 512]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 512, 2048]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 7, 7, 2048]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
========== Task 10 (workload key: [&quot;2beb39e9afe4c74822fffbcbb8533595&quot;, [1, 56, 56, 256], [1, 1, 256, 128], [1, 1, 1, 128], [1, 28, 28, 128]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 256]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 256, 128]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 128]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 11 (workload key: [&quot;76afb7bf408a1ffa0b8b7bc09d077dc3&quot;, [1, 28, 28, 128], [1, 1, 128, 512], [1, 28, 28, 512], [1, 1, 1, 512], [1, 28, 28, 512]]) ==========
p0 = PLACEHOLDER [1, 28, 28, 128]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 128, 512]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 28, 28, 512]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
p3 = PLACEHOLDER [1, 1, 1, 512]
T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + p3[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 12 (workload key: [&quot;0fad1b42d0d33418e0a8d15d3bbad3c9&quot;, [1, 56, 56, 256], [1, 1, 256, 512], [1, 28, 28, 512]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 256]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 256, 512]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*p1[ry, rx, rc, ff])
========== Task 13 (workload key: [&quot;7d79c516e212fe1d73f5dbb90eaca2cf&quot;, [1, 1000], [1, 1000]]) ==========
p0 = PLACEHOLDER [1, 1000]
T_softmax_maxelem(i0) max= p0[i0, k]
T_softmax_exp(i0, i1) = tir.exp((p0[i0, i1] - T_softmax_maxelem[i0]))
T_softmax_expsum(i0) += T_softmax_exp[i0, k]
T_softmax_norm(i0, i1) = (T_softmax_exp[i0, i1]/T_softmax_expsum[i0])
========== Task 14 (workload key: [&quot;3060808fc5c74e18b1276729071fbae0&quot;, [1, 14, 14, 256], [1, 1, 256, 1024], [1, 14, 14, 1024], [1, 14, 14, 1024]]) ==========
p0 = PLACEHOLDER [1, 14, 14, 256]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 256, 1024]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 14, 14, 1024]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
========== Task 15 (workload key: [&quot;07f9fcad27bdd3233f86fe35a5185d33&quot;, [1, 224, 224, 3], [7, 7, 3, 64], [1, 1, 1, 64], [1, 112, 112, 64]]) ==========
p0 = PLACEHOLDER [1, 224, 224, 3]
pad_temp(i0, i1, i2, i3) = tir.if_then_else(((((i1 &gt;= 3) &amp;&amp; (i1 &lt; 227)) &amp;&amp; (i2 &gt;= 3)) &amp;&amp; (i2 &lt; 227)), p0[i0, (i1 - 3), (i2 - 3), i3], 0f)
p1 = PLACEHOLDER [7, 7, 3, 64]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 64]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 16 (workload key: [&quot;6d012ba18a086c11ee2b85c7324e16f2&quot;, [1, 112, 112, 64], [1, 1, 1, 64], [1, 56, 56, 64]]) ==========
p0 = PLACEHOLDER [1, 112, 112, 64]
pad_temp(ax0, ax1, ax2, ax3) = tir.if_then_else(((((ax1 &gt;= 1) &amp;&amp; (ax1 &lt; 113)) &amp;&amp; (ax2 &gt;= 1)) &amp;&amp; (ax2 &lt; 113)), p0[ax0, (ax1 - 1), (ax2 - 1), ax3], -3.40282e+38f)
pool_max(ax0, ax1, ax2, ax3) max= pad_temp[ax0, ((ax1*2) + rv0), ((ax2*2) + rv1), ax3]
p1 = PLACEHOLDER [1, 1, 1, 64]
T_add(ax0, ax1, ax2, ax3) = (pool_max[ax0, ax1, ax2, ax3] + p1[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 17 (workload key: [&quot;6d628209072e3e3dd8f49359935acea6&quot;, [1, 7, 7, 2048], [1, 1, 2048, 512], [1, 1, 1, 512], [1, 7, 7, 512]]) ==========
p0 = PLACEHOLDER [1, 7, 7, 2048]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 2048, 512]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 512]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 18 (workload key: [&quot;6d628209072e3e3dd8f49359935acea6&quot;, [1, 14, 14, 1024], [1, 1, 1024, 256], [1, 1, 1, 256], [1, 14, 14, 256]]) ==========
p0 = PLACEHOLDER [1, 14, 14, 1024]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 1024, 256]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 256]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 19 (workload key: [&quot;38552500208b25b4035682b0e93cbce3&quot;, [1, 14, 14, 256], [6, 6, 256, 256], [1, 1, 1, 256], [1, 14, 14, 256]]) ==========
p0 = PLACEHOLDER [1, 14, 14, 256]
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 &gt;= 1) &amp;&amp; (i1 &lt; 15)) &amp;&amp; (i2 &gt;= 1)) &amp;&amp; (i2 &lt; 15)), p0[i0, (i1 - 1), (i2 - 1), i3], 0f)
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 16), ((floormod(floordiv(p, 4), 4)*4) + eps), ((floormod(p, 4)*4) + nu), ci]
B(i, j) = select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 6) == 5)), 1f, select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 6) == 4)), ..(OMITTED).. (floormod(j, 6) == 1)), 0f, select(((floormod(i, 6) == 0) &amp;&amp; (floormod(j, 6) == 0)), 1f, 0f))))))))))))))))))))))))))))))))))))
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu])
p1 = PLACEHOLDER [6, 6, 256, 256]
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*p1[eps, nu, co, ci])
A(i, j) = select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 4) == 3)), 1f, select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 4) == 2)), ..(OMITTED).. 6) == 0) &amp;&amp; (floormod(j, 4) == 1)), 0f, select(((floormod(i, 6) == 0) &amp;&amp; (floormod(j, 4) == 0)), 1f, 0f))))))))))))))))))))))))
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw])
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 4), floormod(w, 4), ((((n*4)*4) + (floordiv(h, 4)*4)) + floordiv(w, 4)), co]
p2 = PLACEHOLDER [1, 1, 1, 256]
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 20 (workload key: [&quot;6d628209072e3e3dd8f49359935acea6&quot;, [1, 56, 56, 256], [1, 1, 256, 64], [1, 1, 1, 64], [1, 56, 56, 64]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 256]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 256, 64]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 64]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 21 (workload key: [&quot;f07e228ef5f642b386d23a62df615e7b&quot;, [1, 7, 7, 512], [1, 1, 512, 2048], [1, 7, 7, 2048], [1, 1, 1, 2048], [1, 1, 1, 2048], [1, 7, 7, 2048]]) ==========
p0 = PLACEHOLDER [1, 7, 7, 512]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 512, 2048]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 7, 7, 2048]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
p3 = PLACEHOLDER [1, 1, 1, 2048]
T_multiply(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3]*p3[ax0, 0, 0, ax3])
p4 = PLACEHOLDER [1, 1, 1, 2048]
T_add(ax0, ax1, ax2, ax3) = (T_multiply[ax0, ax1, ax2, ax3] + p4[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 22 (workload key: [&quot;3060808fc5c74e18b1276729071fbae0&quot;, [1, 28, 28, 128], [1, 1, 128, 512], [1, 28, 28, 512], [1, 28, 28, 512]]) ==========
p0 = PLACEHOLDER [1, 28, 28, 128]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 128, 512]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 28, 28, 512]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
========== Task 23 (workload key: [&quot;0fad1b42d0d33418e0a8d15d3bbad3c9&quot;, [1, 28, 28, 512], [1, 1, 512, 1024], [1, 14, 14, 1024]]) ==========
p0 = PLACEHOLDER [1, 28, 28, 512]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 512, 1024]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*p1[ry, rx, rc, ff])
========== Task 24 (workload key: [&quot;2beb39e9afe4c74822fffbcbb8533595&quot;, [1, 28, 28, 512], [1, 1, 512, 256], [1, 1, 1, 256], [1, 14, 14, 256]]) ==========
p0 = PLACEHOLDER [1, 28, 28, 512]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 512, 256]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 256]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 25 (workload key: [&quot;76afb7bf408a1ffa0b8b7bc09d077dc3&quot;, [1, 14, 14, 256], [1, 1, 256, 1024], [1, 14, 14, 1024], [1, 1, 1, 1024], [1, 14, 14, 1024]]) ==========
p0 = PLACEHOLDER [1, 14, 14, 256]
pad_temp(i0, i1, i2, i3) = p0[i0, i1, i2, i3]
p1 = PLACEHOLDER [1, 1, 256, 1024]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 14, 14, 1024]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, ax1, ax2, ax3])
p3 = PLACEHOLDER [1, 1, 1, 1024]
T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + p3[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 26 (workload key: [&quot;d37380659057397544e056461ea3bad3&quot;, [1, 56, 56, 64], [3, 3, 64, 64], [1, 1, 1, 64], [1, 56, 56, 64]]) ==========
p0 = PLACEHOLDER [1, 56, 56, 64]
pad_temp(i0, i1, i2, i3) = tir.if_then_else(((((i1 &gt;= 1) &amp;&amp; (i1 &lt; 57)) &amp;&amp; (i2 &gt;= 1)) &amp;&amp; (i2 &lt; 57)), p0[i0, (i1 - 1), (i2 - 1), i3], 0f)
p1 = PLACEHOLDER [3, 3, 64, 64]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 64]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 27 (workload key: [&quot;cfd09cf1ca9e943f0ee12a18813a5c75&quot;, [1, 28, 28, 128], [6, 6, 128, 128], [1, 1, 1, 128], [1, 28, 28, 128]]) ==========
p0 = PLACEHOLDER [1, 28, 28, 128]
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 &gt;= 1) &amp;&amp; (i1 &lt; 29)) &amp;&amp; (i2 &gt;= 1)) &amp;&amp; (i2 &lt; 29)), p0[i0, (i1 - 1), (i2 - 1), i3], 0f)
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 49), ((floormod(floordiv(p, 7), 7)*4) + eps), ((floormod(p, 7)*4) + nu), ci]
B(i, j) = select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 6) == 5)), 1f, select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 6) == 4)), ..(OMITTED).. (floormod(j, 6) == 1)), 0f, select(((floormod(i, 6) == 0) &amp;&amp; (floormod(j, 6) == 0)), 1f, 0f))))))))))))))))))))))))))))))))))))
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu])
p1 = PLACEHOLDER [6, 6, 128, 128]
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*p1[eps, nu, co, ci])
A(i, j) = select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 4) == 3)), 1f, select(((floormod(i, 6) == 5) &amp;&amp; (floormod(j, 4) == 2)), ..(OMITTED).. 6) == 0) &amp;&amp; (floormod(j, 4) == 1)), 0f, select(((floormod(i, 6) == 0) &amp;&amp; (floormod(j, 4) == 0)), 1f, 0f))))))))))))))))))))))))
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw])
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 4), floormod(w, 4), ((((n*7)*7) + (floordiv(h, 4)*7)) + floordiv(w, 4)), co]
p2 = PLACEHOLDER [1, 1, 1, 128]
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
========== Task 28 (workload key: [&quot;d37380659057397544e056461ea3bad3&quot;, [1, 7, 7, 512], [3, 3, 512, 512], [1, 1, 1, 512], [1, 7, 7, 512]]) ==========
p0 = PLACEHOLDER [1, 7, 7, 512]
pad_temp(i0, i1, i2, i3) = tir.if_then_else(((((i1 &gt;= 1) &amp;&amp; (i1 &lt; 8)) &amp;&amp; (i2 &gt;= 1)) &amp;&amp; (i2 &lt; 8)), p0[i0, (i1 - 1), (i2 - 1), i3], 0f)
p1 = PLACEHOLDER [3, 3, 512, 512]
conv2d_nhwc(nn, yy, xx, ff) += (pad_temp[nn, (yy + ry), (xx + rx), rc]*p1[ry, rx, rc, ff])
p2 = PLACEHOLDER [1, 1, 1, 512]
T_add(ax0, ax1, ax2, ax3) = (conv2d_nhwc[ax0, ax1, ax2, ax3] + p2[ax0, 0, 0, ax3])
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
</pre></div>
</div>
</div>
<div class="section" id="begin-tuning">
<h2>Begin Tuning<a class="headerlink" href="#begin-tuning" title="Permalink to this headline"></a></h2>
<p>Now, we set some options for tuning and launch the search tasks</p>
<ul class="simple">
<li><p><code class="code docutils literal notranslate"><span class="pre">num_measure_trials</span></code> is the number of measurement trials we can use during the tuning.
You can set it to a small number (e.g., 200) for a fast demonstrative run.
In practice, we recommend setting it around <code class="code docutils literal notranslate"><span class="pre">800</span> <span class="pre">*</span> <span class="pre">len(tasks)</span></code>,
which is typically enough for the search to converge.
For example, there are 29 tasks in resnet-50, so we can set it as 20000.
You can adjust this parameter according to your time budget.</p></li>
<li><p>In addition, we use <code class="code docutils literal notranslate"><span class="pre">RecordToFile</span></code> to dump measurement records into a log file,
The measurement records can be used to query the history best, resume the search,
and do more analyses later.</p></li>
<li><p>see <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.TuningOptions" title="tvm.auto_scheduler.TuningOptions"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.TuningOptions</span></code></a>,
<a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.LocalRunner" title="tvm.auto_scheduler.LocalRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.LocalRunner</span></code></a> for more parameters.</p></li>
</ul>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">run_tuning</span><span class="p">():</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Begin tuning...&quot;</span><span class="p">)</span>
<span class="n">tuner</span> <span class="o">=</span> <a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.TaskScheduler" title="tvm.auto_scheduler.TaskScheduler" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">TaskScheduler</span></a><span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">tasks</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">task_weights</span></a><span class="p">)</span>
<span class="n">tune_option</span> <span class="o">=</span> <a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.TuningOptions" title="tvm.auto_scheduler.TuningOptions" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">TuningOptions</span></a><span class="p">(</span>
<span class="n">num_measure_trials</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span> <span class="c1"># change this to 20000 to achieve the best performance</span>
<span class="n">runner</span><span class="o">=</span><a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.LocalRunner" title="tvm.auto_scheduler.LocalRunner" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">LocalRunner</span></a><span class="p">(</span><span class="n">repeat</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">enable_cpu_cache_flush</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span>
<span class="n">measure_callbacks</span><span class="o">=</span><span class="p">[</span><a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RecordToFile" title="tvm.auto_scheduler.RecordToFile" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">RecordToFile</span></a><span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">log_file</span></a><span class="p">)],</span>
<span class="p">)</span>
<span class="k">if</span> <a href="https://docs.python.org/3/library/functions.html#bool" title="builtins.bool" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">use_sparse</span></a><span class="p">:</span>
<span class="kn">from</span> <span class="nn">tvm.topi.sparse.utils</span> <span class="kn">import</span> <span class="n">sparse_sketch_rules</span>
<span class="n">search_policy</span> <span class="o">=</span> <span class="p">[</span>
<a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.SketchPolicy" title="tvm.auto_scheduler.SketchPolicy" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">SketchPolicy</span></a><span class="p">(</span>
<a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.SearchTask" title="tvm.auto_scheduler.SearchTask" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">task</span></a><span class="p">,</span>
<span class="n">program_cost_model</span><span class="o">=</span><a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.XGBModel" title="tvm.auto_scheduler.XGBModel" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">XGBModel</span></a><span class="p">(),</span>
<span class="n">init_search_callbacks</span><span class="o">=</span><span class="n">sparse_sketch_rules</span><span class="p">(),</span>
<span class="p">)</span>
<span class="k">for</span> <a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.SearchTask" title="tvm.auto_scheduler.SearchTask" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">task</span></a> <span class="ow">in</span> <a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">tasks</span></a>
<span class="p">]</span>
<span class="n">tuner</span><span class="o">.</span><span class="n">tune</span><span class="p">(</span><span class="n">tune_option</span><span class="p">,</span> <span class="n">search_policy</span><span class="o">=</span><span class="n">search_policy</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">tuner</span><span class="o">.</span><span class="n">tune</span><span class="p">(</span><span class="n">tune_option</span><span class="p">)</span>
<span class="c1"># We do not run the tuning in our webpage server since it takes too long.</span>
<span class="c1"># Uncomment the following line to run it by yourself.</span>
<span class="c1"># run_tuning()</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Explain the printed information during tuning</p>
<p>During the tuning, a lot of information will be printed on the console.
They are used for debugging purposes. The most important info is the output
of the task scheduler. The following table is a sample output.</p>
<div class="highlight-c notranslate"><div class="highlight"><pre><span></span><span class="o">----------------------------------------------------------------------</span>
<span class="o">------------------------------</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="n">Task</span><span class="w"> </span><span class="n">Scheduler</span><span class="w"> </span><span class="p">]</span>
<span class="o">----------------------------------------------------------------------</span>
<span class="o">|</span><span class="w"> </span><span class="n">ID</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="n">Latency</span><span class="w"> </span><span class="p">(</span><span class="n">ms</span><span class="p">)</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="n">Speed</span><span class="w"> </span><span class="p">(</span><span class="n">GFLOPS</span><span class="p">)</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="n">Trials</span><span class="w"> </span><span class="o">|</span>
<span class="o">-------------------------------------------------</span>
<span class="o">|</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.010</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.40</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.087</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">47.19</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.008</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">-0.00</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">3</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.177</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">582.07</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">4</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.268</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">862.37</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">256</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">5</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.166</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">621.13</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">6</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.170</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">605.10</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">7</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.128</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">403.20</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">8</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.189</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">545.71</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">9</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.231</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">1001.01</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">448</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">10</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.155</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">664.80</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">256</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">11</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.155</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">662.86</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">256</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">12</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.119</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">434.08</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">13</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.199</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">522.13</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">14</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.235</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">986.56</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">320</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">15</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.149</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">689.13</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">16</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.155</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">664.80</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">192</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">17</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.151</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">340.64</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">18</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.176</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">597.55</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">19</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.220</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">1054.37</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">192</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">20</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.150</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">686.01</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">21</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.159</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">650.88</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">22</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.073</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">358.19</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">23</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.031</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">70.63</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">64</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">24</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.251</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">947.73</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">25</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.157</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">652.47</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">26</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.215</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">954.84</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">27</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.237</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">868.92</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">|</span><span class="w"> </span><span class="mi">28</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">0.266</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mf">774.06</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="mi">128</span><span class="w"> </span><span class="o">|</span>
<span class="o">-------------------------------------------------</span>
<span class="n">Estimated</span><span class="w"> </span><span class="n">total</span><span class="w"> </span><span class="n">latency</span><span class="o">:</span><span class="w"> </span><span class="mf">10.016</span><span class="w"> </span><span class="n">ms</span><span class="w"> </span><span class="n">Trials</span><span class="o">:</span><span class="w"> </span><span class="mi">3992</span><span class="w"> </span><span class="n">Used</span><span class="w"> </span><span class="n">time</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="mi">1131</span><span class="w"> </span><span class="n">s</span><span class="w"> </span><span class="n">Next</span><span class="w"> </span><span class="n">ID</span><span class="o">:</span><span class="w"> </span><span class="mi">15</span>
</pre></div>
</div>
<p>This table lists the latency and (estimated) speed of all tasks.
It also lists the allocation of measurement trials for all tasks.
The last line prints the total weighted latency of these tasks,
which can be a rough estimation of the end-to-end execution time
of the network.
The last line also prints the total number of measurement trials,
total time spent on auto-tuning and the id of the next task to tune.</p>
<p>There will also be some “tvm::Error”s errors, because the
auto-scheduler will try some invalid schedules.
You can safely ignore them if the tuning can continue, because these
errors are isolated from the main process.</p>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Terminate the tuning earlier</p>
<p>You can terminate the tuning earlier by forcibly killing this process.
As long as you get at least one valid schedule for each task in the log file,
you should be able to do the compilation (the secion below).</p>
</div>
</div>
<div class="section" id="compile-and-evaluate">
<h2>Compile and Evaluate<a class="headerlink" href="#compile-and-evaluate" title="Permalink to this headline"></a></h2>
<p>After auto-tuning, we can compile the network with the best schedules we found.
All measurement records are dumped into the log file during auto-tuning,
so we can read the log file and load the best schedules.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># Compile with the history best</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Compile...&quot;</span><span class="p">)</span>
<span class="k">with</span> <a href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.ApplyHistoryBest" title="tvm.auto_scheduler.ApplyHistoryBest" class="sphx-glr-backref-module-tvm-auto_scheduler sphx-glr-backref-type-py-class"><span class="n">auto_scheduler</span><span class="o">.</span><span class="n">ApplyHistoryBest</span></a><span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">log_file</span></a><span class="p">):</span>
<span class="k">with</span> <a href="../../reference/api/python/ir.html#tvm.transform.PassContext" title="tvm.transform.PassContext" class="sphx-glr-backref-module-tvm-transform sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">tvm</span><span class="o">.</span><span class="n">transform</span><span class="o">.</span><span class="n">PassContext</span></a><span class="p">(</span><span class="n">opt_level</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">config</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;relay.backend.use_auto_scheduler&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}):</span>
<span class="n">lib</span> <span class="o">=</span> <span class="n">relay</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <a href="../../reference/api/python/target.html#tvm.target.Target" title="tvm.target.Target" class="sphx-glr-backref-module-tvm-target sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="o">=</span><a href="../../reference/api/python/target.html#tvm.target.Target" title="tvm.target.Target" class="sphx-glr-backref-module-tvm-target sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">)</span>
<span class="c1"># Create graph executor</span>
<span class="n">dev</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><a href="../../reference/api/python/target.html#tvm.target.Target" title="tvm.target.Target" class="sphx-glr-backref-module-tvm-target sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="p">),</span> <span class="mi">0</span><span class="p">)</span>
<a href="../../reference/api/python/graph_executor.html#tvm.contrib.graph_executor.GraphModule" title="tvm.contrib.graph_executor.GraphModule" class="sphx-glr-backref-module-tvm-contrib-graph_executor sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">module</span></a> <span class="o">=</span> <a href="../../reference/api/python/graph_executor.html#tvm.contrib.graph_executor.GraphModule" title="tvm.contrib.graph_executor.GraphModule" class="sphx-glr-backref-module-tvm-contrib-graph_executor sphx-glr-backref-type-py-class"><span class="n">graph_executor</span><span class="o">.</span><span class="n">GraphModule</span></a><span class="p">(</span><span class="n">lib</span><span class="p">[</span><span class="s2">&quot;default&quot;</span><span class="p">](</span><span class="n">dev</span><span class="p">))</span>
<span class="n">data_tvm</span> <span class="o">=</span> <a href="../../reference/api/python/ndarray.html#tvm.nd.array" title="tvm.nd.array" class="sphx-glr-backref-module-tvm-nd sphx-glr-backref-type-py-function"><span class="n">tvm</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span></a><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_shape</span></a><span class="p">))</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">dtype</span></a><span class="p">))</span>
<a href="../../reference/api/python/graph_executor.html#tvm.contrib.graph_executor.GraphModule.set_input" title="tvm.contrib.graph_executor.GraphModule.set_input" class="sphx-glr-backref-module-tvm-contrib-graph_executor sphx-glr-backref-type-py-method"><span class="n">module</span><span class="o">.</span><span class="n">set_input</span></a><span class="p">(</span><span class="s2">&quot;data&quot;</span><span class="p">,</span> <span class="n">data_tvm</span><span class="p">)</span>
<span class="c1"># Evaluate</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Evaluate inference time cost...&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><a href="../../reference/api/python/graph_executor.html#tvm.contrib.graph_executor.GraphModule.benchmark" title="tvm.contrib.graph_executor.GraphModule.benchmark" class="sphx-glr-backref-module-tvm-contrib-graph_executor sphx-glr-backref-type-py-method"><span class="n">module</span><span class="o">.</span><span class="n">benchmark</span></a><span class="p">(</span><span class="n">dev</span><span class="p">,</span> <span class="n">repeat</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">min_repeat_ms</span><span class="o">=</span><span class="mi">500</span><span class="p">))</span>
</pre></div>
</div>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Compile...
Evaluate inference time cost...
Execution time summary:
mean (ms) median (ms) max (ms) min (ms) std (ms)
760.8692 760.1869 764.7312 757.6894 2.9150
</pre></div>
</div>
</div>
<div class="section" id="other-tips">
<h2>Other Tips<a class="headerlink" href="#other-tips" title="Permalink to this headline"></a></h2>
<ol class="arabic simple">
<li><p>During the tuning, the auto-scheduler needs to compile many programs and
extract feature from them. This part is CPU-intensive,
so a high-performance CPU with many cores is recommended for faster search.</p></li>
<li><p>You can use <code class="code docutils literal notranslate"><span class="pre">python3</span> <span class="pre">-m</span> <span class="pre">tvm.auto_scheduler.measure_record</span> <span class="pre">--mode</span> <span class="pre">distill</span> <span class="pre">-i</span> <span class="pre">log.json</span></code>
to distill the large log file and only save the best useful records.</p></li>
<li><p>You can resume a search from the previous log file. You just need to
add a new argument <code class="code docutils literal notranslate"><span class="pre">load_log_file</span></code> when creating the task scheduler
in function <code class="code docutils literal notranslate"><span class="pre">run_tuning</span></code>. Say,
<code class="code docutils literal notranslate"><span class="pre">tuner</span> <span class="pre">=</span> <span class="pre">auto_scheduler.TaskScheduler(tasks,</span> <span class="pre">task_weights,</span> <span class="pre">load_log_file=log_file)</span></code></p></li>
<li><p>If you have multiple target CPUs, you can use all of them for measurements to
parallelize the measurements. Check this <a class="reference internal" href="../tune_with_autotvm/tune_relay_cuda.html#tutorials-autotvm-scale-up-rpc-tracker"><span class="std std-ref">section</span></a>
to learn how to use the RPC Tracker and RPC Server.
To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
</ol>
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes 44.224 seconds)</p>
<div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
<div class="sphx-glr-download sphx-glr-download-python docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
</div>
<div class="sphx-glr-download sphx-glr-download-jupyter docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/ad2a7f55d615d188ad664d56696815a6/tune_network_x86.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">tune_network_x86.ipynb</span></code></a></p>
</div>
</div>
<p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="tune_network_cuda.html" class="btn btn-neutral float-right" title="Auto-scheduling a Neural Network for NVIDIA GPU" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="tune_conv2d_layer_cuda.html" class="btn btn-neutral float-left" title="Auto-scheduling a Convolution Layer for GPU" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<div id="button" class="backtop"><img src="../../_static/img/right.svg" alt="backtop"/> </div>
<section class="footerSec">
<div class="footerHeader">
<div class="d-flex align-md-items-center justify-content-between flex-column flex-md-row">
<div class="copywrite d-flex align-items-center">
<h5 id="copy-right-info">© 2023 Apache Software Foundation | All rights reserved</h5>
</div>
</div>
</div>
<div>
<div class="footernote">Copyright © 2023 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation.</div>
</div>
</section>
</footer>
</div>
</div>
</section>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script>
</body>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<!-- Theme Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-75982049-2', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>