docs/how_to/deploy_models/deploy_prequantized.html - tvm-site - Git at Google



 <!DOCTYPE html>
 <html class="writer-html5" lang="en" >
 <head>
   <meta charset="utf-8">

   <meta name="viewport" content="width=device-width, initial-scale=1.0">

   <title>Deploy a Framework-prequantized Model with TVM &mdash; tvm 0.18.dev0 documentation</title>


   <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
   <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/sg_gallery.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/sg_gallery-binder.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/sg_gallery-dataframe.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/sg_gallery-rendered-html.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
   <link rel="stylesheet" href="../../_static/css/tlcpack_theme.css" type="text/css" />


     <link rel="shortcut icon" href="../../_static/tvm-logo-square.png"/>


       <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
         <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
         <script src="../../_static/jquery.js"></script>
         <script src="../../_static/underscore.js"></script>
         <script src="../../_static/doctools.js"></script>

     <script type="text/javascript" src="../../_static/js/theme.js"></script>


     <script type="text/javascript" src="../../_static/js/tlcpack_theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)" href="deploy_prequantized_tflite.html" />
     <link rel="prev" title="Compile PyTorch Object Detection Models" href="deploy_object_detection_pytorch.html" />
 </head>

 <body class="wy-body-for-nav">


   <div class="wy-grid-for-nav">


 <header class="header">
     <div class="innercontainer">
       <div class="headerInner d-flex justify-content-between align-items-center">
           <div class="headerLogo">
                <a href="https://tvm.apache.org/"><img src=https://tvm.apache.org/assets/images/logo.svg alt="logo"></a>
           </div>

           <div id="headMenu" class="headerNav">
             <button type="button" id="closeHeadMenu" class="navCloseBtn"><img src="../../_static/img/close-icon.svg" alt="Close"></button>
              <ul class="nav">
                 <li class="nav-item">
                    <a class="nav-link" href=https://tvm.apache.org/community>Community</a>
                 </li>
                 <li class="nav-item">
                    <a class="nav-link" href=https://tvm.apache.org/download>Download</a>
                 </li>
                 <li class="nav-item">
                    <a class="nav-link" href=https://tvm.apache.org/vta>VTA</a>
                 </li>
                 <li class="nav-item">
                    <a class="nav-link" href=https://tvm.apache.org/blog>Blog</a>
                 </li>
                 <li class="nav-item">
                    <a class="nav-link" href=https://tvm.apache.org/docs>Docs</a>
                 </li>
                 <li class="nav-item">
                    <a class="nav-link" href=https://tvmconf.org>Conference</a>
                 </li>
                 <li class="nav-item">
                    <a class="nav-link" href=https://github.com/apache/tvm/>Github</a>
                 </li>
              </ul>
                <div class="responsivetlcdropdown">
                  <button type="button" class="btn-link">
                    ASF
                  </button>
                  <ul>
                      <li>
                        <a href=https://apache.org/>Apache Homepage</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/licenses/>License</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/security/>Security</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/events/current-event>Events</a>
                      </li>
                  </ul>
                </div>
           </div>
             <div class="responsiveMenuIcon">
               <button type="button" id="menuBtn" class="btn-menu"><img src="../../_static/img/menu-icon.svg" alt="Menu Icon"></button>
             </div>

             <div class="tlcDropdown">
               <div class="dropdown">
                 <button type="button" class="btn-link dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
                   ASF
                 </button>
                 <div class="dropdown-menu dropdown-menu-right">
                   <ul>
                      <li>
                        <a href=https://apache.org/>Apache Homepage</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/licenses/>License</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/security/>Security</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                      </li>
                      <li>
                        <a href=https://www.apache.org/events/current-event>Events</a>
                      </li>
                   </ul>
                 </div>
               </div>
           </div>
        </div>
     </div>
  </header>

     <nav data-toggle="wy-nav-shift" class="wy-nav-side fixed">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >


             <a href="../../index.html">


             <img src="../../_static/tvm-logo-small.png" class="logo" alt="Logo"/>

           </a>


               <input type="checkbox" class="version-toggle-box" hidden id="version-toggle">
               <label for="version-toggle" class="version-toggle-label">
                   <div tabindex="0" class="version version-selector version-selector-show">
                     0.18.dev0 <span class="chevron versions-hidden"><svg fill="none" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"><path d="m8 4 8 8-8 8" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/></svg></span><span class="chevron versions-shown"><svg fill="none" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"><path d="m4 8 8 8 8-8" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/></svg></span>
                   </div>
                 </label>
                 <div class="version-details wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
                   <p class="caption" role="heading"><span class="caption-text">Versions</span></p>
                   <ol style="text-align: left">


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="/">0.18.dev0 (main)</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.8.0/">v0.8.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.9.0/">v0.9.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.10.0/">v0.10.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.11.0/">v0.11.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.12.0/">v0.12.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.13.0/">v0.13.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.14.0/">v0.14.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.15.0/">v0.15.0</a></div></li>


                       <li><div class="version"><a style="font-size: 0.8em; padding: 4px" href="v0.16.0/">v0.16.0</a></div></li>

                   </ol>
                 </div>


 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
     <input type="hidden" name="check_keywords" value="yes" />
     <input type="hidden" name="area" value="default" />
   </form>
 </div>


         </div>


         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">


               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../install/index.html">Installing TVM</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../contribute/index.html">Contributor Guide</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="../../tutorial/index.html">User Tutorial</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../index.html">How To Guides</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="../compile_models/index.html">Compile Deep Learning Models</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="../deploy/index.html">Deploy Models and Integrate TVM</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#build-the-tvm-runtime-library">Build the TVM runtime library</a></li>
 <li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#cross-compile-the-tvm-runtime-for-other-architectures">Cross compile the TVM runtime for other architectures</a></li>
 <li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#optimize-and-tune-models-for-target-devices">Optimize and tune models for target devices</a></li>
 <li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#deploy-optimized-model-on-target-devices">Deploy optimized model on target devices</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="../deploy/index.html#additional-deployment-how-tos">Additional Deployment How-Tos</a><ul class="current">
 <li class="toctree-l4 current"><a class="reference internal" href="index.html">Deploy Deep Learning Models</a></li>
 </ul>
 </li>
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="../work_with_relay/index.html">Work With Relay</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../work_with_schedules/index.html">Work With Tensor Expression and Schedules</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../optimize_operators/index.html">Optimize Tensor Operators</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../tune_with_autotvm/index.html">Auto-Tune with Templates and AutoTVM</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../tune_with_autoscheduler/index.html">Use AutoScheduler for Template-Free Scheduling</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../work_with_microtvm/index.html">Work With microTVM</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../extend_tvm/index.html">Extend TVM</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../profile/index.html">Profile Models</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../errors.html">Handle TVM Errors</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../../faq.html">Frequently Asked Questions</a></li>
 </ul>
 </li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../dev/tutorial/index.html">Developer Tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../dev/how_to/how_to.html">Developer How-To Guide</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Architecture  Guide</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../arch/index.html">Design and Architecture</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Topic Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../topic/microtvm/index.html">microTVM: TVM on bare-metal</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../topic/vta/index.html">VTA: Versatile Tensor Accelerator</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference Guide</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../reference/langref/index.html">Language Reference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../reference/api/python/index.html">Python API</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../reference/api/links.html">Other APIs</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../reference/publications.html">Publications</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../genindex.html">Index</a></li>
 </ul>


         </div>

       </div>
     </nav>

     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

       <nav class="wy-nav-top" aria-label="top navigation" data-toggle="wy-nav-top">

             <div class="togglemenu">

             </div>
             <div class="nav-content">
               <!-- tvm -->
               Table of Contents
             </div>

       </nav>


       <div class="wy-nav-content">

         <div class="rst-content">


 <div role="navigation" aria-label="breadcrumbs navigation">

   <ul class="wy-breadcrumbs">

       <li><a href="../../index.html">Docs</a> <span class="br-arrow">></span></li>

           <li><a href="../index.html">How To Guides</a> <span class="br-arrow">></span></li>

           <li><a href="../deploy/index.html">Deploy Models and Integrate TVM</a> <span class="br-arrow">></span></li>

           <li><a href="index.html">Deploy Deep Learning Models</a> <span class="br-arrow">></span></li>

       <li>Deploy a Framework-prequantized Model with TVM</li>


       <li class="wy-breadcrumbs-aside">


               <a href="https://github.com/apache/tvm/edit/main/docs/how_to/deploy_models/deploy_prequantized.rst" class="fa fa-github"> Edit on GitHub</a>


       </li>

   </ul>


   <hr/>
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">

   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">Note</p>
 <p>This tutorial can be used interactively with Google Colab! You can also click
 <a class="reference internal" href="#sphx-glr-download-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">here</span></a> to run the Jupyter notebook locally.</p>
 <a class="reference external image-reference" href="https://colab.research.google.com/github/apache/tvm-site/blob/asf-site/docs/_downloads/c20f81a94729f461f33b52cc110fd9d6/deploy_prequantized.ipynb"><img alt="https://raw.githubusercontent.com/tlc-pack/web-data/main/images/utilities/colab_button.svg" class="align-center" src="https://raw.githubusercontent.com/tlc-pack/web-data/main/images/utilities/colab_button.svg" width="300px" /></a>
 </div>
 <div class="sphx-glr-example-title section" id="deploy-a-framework-prequantized-model-with-tvm">
 <span id="sphx-glr-how-to-deploy-models-deploy-prequantized-py"></span><h1>Deploy a Framework-prequantized Model with TVM<a class="headerlink" href="#deploy-a-framework-prequantized-model-with-tvm" title="Permalink to this headline">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://github.com/masahi">Masahiro Masuda</a></p>
 <p>This is a tutorial on loading models quantized by deep learning frameworks into TVM.
 Pre-quantized model import is one of the quantization support we have in TVM. More details on
 the quantization story in TVM can be found
 <a class="reference external" href="https://discuss.tvm.apache.org/t/quantization-story/3920">here</a>.</p>
 <p>Here, we demonstrate how to load and run models quantized by PyTorch, MXNet, and TFLite.
 Once loaded, we can run compiled, quantized models on any hardware TVM supports.</p>
 <p>First, necessary imports</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">PIL</span> <span class="kn">import</span> <span class="n">Image</span>

 <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>

 <span class="kn">import</span> <span class="nn">torch</span>
 <span class="kn">from</span> <span class="nn">torchvision.models.quantization</span> <span class="kn">import</span> <span class="n">mobilenet</span> <span class="k">as</span> <span class="n">qmobilenet</span>

 <span class="kn">import</span> <span class="nn">tvm</span>
 <span class="kn">from</span> <span class="nn">tvm</span> <span class="kn">import</span> <span class="n">relay</span>
 <span class="kn">from</span> <span class="nn">tvm.contrib.download</span> <span class="kn">import</span> <span class="n">download_testdata</span>
 </pre></div>
 </div>
 <p>Helper functions to run the demo</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">get_transform</span><span class="p">():</span>
     <span class="kn">import</span> <span class="nn">torchvision.transforms</span> <span class="k">as</span> <span class="nn">transforms</span>

     <span class="n">normalize</span> <span class="o">=</span> <span class="n">transforms</span><span class="o">.</span><span class="n">Normalize</span><span class="p">(</span><span class="n">mean</span><span class="o">=</span><span class="p">[</span><span class="mf">0.485</span><span class="p">,</span> <span class="mf">0.456</span><span class="p">,</span> <span class="mf">0.406</span><span class="p">],</span> <span class="n">std</span><span class="o">=</span><span class="p">[</span><span class="mf">0.229</span><span class="p">,</span> <span class="mf">0.224</span><span class="p">,</span> <span class="mf">0.225</span><span class="p">])</span>
     <span class="k">return</span> <span class="n">transforms</span><span class="o">.</span><span class="n">Compose</span><span class="p">(</span>
         <span class="p">[</span>
             <span class="n">transforms</span><span class="o">.</span><span class="n">Resize</span><span class="p">(</span><span class="mi">256</span><span class="p">),</span>
             <span class="n">transforms</span><span class="o">.</span><span class="n">CenterCrop</span><span class="p">(</span><span class="mi">224</span><span class="p">),</span>
             <span class="n">transforms</span><span class="o">.</span><span class="n">ToTensor</span><span class="p">(),</span>
             <span class="n">normalize</span><span class="p">,</span>
         <span class="p">]</span>
     <span class="p">)</span>


 <span class="k">def</span> <span class="nf">get_real_image</span><span class="p">(</span><span class="n">im_height</span><span class="p">,</span> <span class="n">im_width</span><span class="p">):</span>
     <span class="n">img_url</span> <span class="o">=</span> <span class="s2">&quot;https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true&quot;</span>
     <span class="n">img_path</span> <span class="o">=</span> <span class="n">download_testdata</span><span class="p">(</span><span class="n">img_url</span><span class="p">,</span> <span class="s2">&quot;cat.png&quot;</span><span class="p">,</span> <span class="n">module</span><span class="o">=</span><span class="s2">&quot;data&quot;</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">Image</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">img_path</span><span class="p">)</span><span class="o">.</span><span class="n">resize</span><span class="p">((</span><span class="n">im_height</span><span class="p">,</span> <span class="n">im_width</span><span class="p">))</span>


 <span class="k">def</span> <span class="nf">get_imagenet_input</span><span class="p">():</span>
     <span class="n">im</span> <span class="o">=</span> <span class="n">get_real_image</span><span class="p">(</span><span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">)</span>
     <span class="n">preprocess</span> <span class="o">=</span> <span class="n">get_transform</span><span class="p">()</span>
     <span class="n">pt_tensor</span> <span class="o">=</span> <span class="n">preprocess</span><span class="p">(</span><span class="n">im</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">expand_dims</span><span class="p">(</span><span class="n">pt_tensor</span><span class="o">.</span><span class="n">numpy</span><span class="p">(),</span> <span class="mi">0</span><span class="p">)</span>


 <span class="k">def</span> <span class="nf">get_synset</span><span class="p">():</span>
     <span class="n">synset_url</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
         <span class="p">[</span>
             <span class="s2">&quot;https://gist.githubusercontent.com/zhreshold/&quot;</span><span class="p">,</span>
             <span class="s2">&quot;4d0b62f3d01426887599d4f7ede23ee5/raw/&quot;</span><span class="p">,</span>
             <span class="s2">&quot;596b27d23537e5a1b5751d2b0481ef172f58b539/&quot;</span><span class="p">,</span>
             <span class="s2">&quot;imagenet1000_clsid_to_human.txt&quot;</span><span class="p">,</span>
         <span class="p">]</span>
     <span class="p">)</span>
     <span class="n">synset_name</span> <span class="o">=</span> <span class="s2">&quot;imagenet1000_clsid_to_human.txt&quot;</span>
     <span class="n">synset_path</span> <span class="o">=</span> <span class="n">download_testdata</span><span class="p">(</span><span class="n">synset_url</span><span class="p">,</span> <span class="n">synset_name</span><span class="p">,</span> <span class="n">module</span><span class="o">=</span><span class="s2">&quot;data&quot;</span><span class="p">)</span>
     <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">synset_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
         <span class="k">return</span> <span class="nb">eval</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>


 <span class="k">def</span> <span class="nf">run_tvm_model</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_name</span></a><span class="p">,</span> <span class="n">inp</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="o">=</span><span class="s2">&quot;llvm&quot;</span><span class="p">):</span>
     <span class="k">with</span> <a href="../../reference/api/python/ir.html#tvm.transform.PassContext" title="tvm.transform.PassContext" class="sphx-glr-backref-module-tvm-transform sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">tvm</span><span class="o">.</span><span class="n">transform</span><span class="o">.</span><span class="n">PassContext</span></a><span class="p">(</span><span class="n">opt_level</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
         <span class="n">lib</span> <span class="o">=</span> <span class="n">relay</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">)</span>

     <span class="n">runtime</span> <span class="o">=</span> <a href="../../reference/api/python/graph_executor.html#tvm.contrib.graph_executor.GraphModule" title="tvm.contrib.graph_executor.GraphModule" class="sphx-glr-backref-module-tvm-contrib-graph_executor sphx-glr-backref-type-py-class"><span class="n">tvm</span><span class="o">.</span><span class="n">contrib</span><span class="o">.</span><span class="n">graph_executor</span><span class="o">.</span><span class="n">GraphModule</span></a><span class="p">(</span><span class="n">lib</span><span class="p">[</span><span class="s2">&quot;default&quot;</span><span class="p">](</span><span class="n">tvm</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="p">,</span> <span class="mi">0</span><span class="p">)))</span>

     <span class="n">runtime</span><span class="o">.</span><span class="n">set_input</span><span class="p">(</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_name</span></a><span class="p">,</span> <span class="n">inp</span><span class="p">)</span>
     <span class="n">runtime</span><span class="o">.</span><span class="n">run</span><span class="p">()</span>
     <span class="k">return</span> <span class="n">runtime</span><span class="o">.</span><span class="n">get_output</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">numpy</span><span class="p">(),</span> <span class="n">runtime</span>
 </pre></div>
 </div>
 <p>A mapping from label to class name, to verify that the outputs from models below
 are reasonable</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">synset</span></a> <span class="o">=</span> <span class="n">get_synset</span><span class="p">()</span>
 </pre></div>
 </div>
 <p>Everyone’s favorite cat image for demonstration</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">inp</span> <span class="o">=</span> <span class="n">get_imagenet_input</span><span class="p">()</span>
 </pre></div>
 </div>
 <div class="section" id="deploy-a-quantized-pytorch-model">
 <h2>Deploy a quantized PyTorch Model<a class="headerlink" href="#deploy-a-quantized-pytorch-model" title="Permalink to this headline">¶</a></h2>
 <p>First, we demonstrate how to load deep learning models quantized by PyTorch,
 using our PyTorch frontend.</p>
 <p>Please refer to the PyTorch static quantization tutorial below to learn about
 their quantization workflow.
 <a class="reference external" href="https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html">https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html</a></p>
 <p>We use this function to quantize PyTorch models.
 In short, this function takes a floating point model and converts it to uint8.
 The model is per-channel quantized.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">quantize_model</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">inp</span><span class="p">):</span>
     <span class="n">model</span><span class="o">.</span><span class="n">fuse_model</span><span class="p">()</span>
     <span class="n">model</span><span class="o">.</span><span class="n">qconfig</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">quantization</span><span class="o">.</span><span class="n">get_default_qconfig</span><span class="p">(</span><span class="s2">&quot;fbgemm&quot;</span><span class="p">)</span>
     <span class="n">torch</span><span class="o">.</span><span class="n">quantization</span><span class="o">.</span><span class="n">prepare</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
     <span class="c1"># Dummy calibration</span>
     <span class="n">model</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
     <span class="n">torch</span><span class="o">.</span><span class="n">quantization</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="load-quantization-ready-pretrained-mobilenet-v2-model-from-torchvision">
 <h2>Load quantization-ready, pretrained Mobilenet v2 model from torchvision<a class="headerlink" href="#load-quantization-ready-pretrained-mobilenet-v2-model-from-torchvision" title="Permalink to this headline">¶</a></h2>
 <p>We choose mobilenet v2 because this model was trained with quantization aware
 training. Other models require a full post training calibration.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">qmodel</span> <span class="o">=</span> <span class="n">qmobilenet</span><span class="o">.</span><span class="n">mobilenet_v2</span><span class="p">(</span><span class="n">pretrained</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/venv/apache-tvm-py3.8/lib/python3.8/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter &#39;pretrained&#39; is deprecated since 0.13 and may be removed in the future, please use &#39;weights&#39; instead.
   warnings.warn(
 /venv/apache-tvm-py3.8/lib/python3.8/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for &#39;weights&#39; are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=MobileNet_V2_Weights.IMAGENET1K_V1`. You can also use `weights=MobileNet_V2_Weights.DEFAULT` to get the most up-to-date weights.
   warnings.warn(msg)
 Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth

   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
  47%|####6     | 6.30M/13.6M [00:00&lt;00:00, 31.8MB/s]
  69%|######8   | 9.34M/13.6M [00:00&lt;00:00, 21.2MB/s]
  90%|########9 | 12.2M/13.6M [00:00&lt;00:00, 22.4MB/s]
 100%|##########| 13.6M/13.6M [00:00&lt;00:00, 25.6MB/s]
 </pre></div>
 </div>
 </div>
 <div class="section" id="quantize-trace-and-run-the-pytorch-mobilenet-v2-model">
 <h2>Quantize, trace and run the PyTorch Mobilenet v2 model<a class="headerlink" href="#quantize-trace-and-run-the-pytorch-mobilenet-v2-model" title="Permalink to this headline">¶</a></h2>
 <p>The details are out of scope for this tutorial. Please refer to the tutorials
 on the PyTorch website to learn about quantization and jit.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pt_inp</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">inp</span><span class="p">)</span>
 <span class="n">quantize_model</span><span class="p">(</span><span class="n">qmodel</span><span class="p">,</span> <span class="n">pt_inp</span><span class="p">)</span>
 <span class="n">script_module</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">jit</span><span class="o">.</span><span class="n">trace</span><span class="p">(</span><span class="n">qmodel</span><span class="p">,</span> <span class="n">pt_inp</span><span class="p">)</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>

 <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
     <span class="n">pt_result</span> <span class="o">=</span> <span class="n">script_module</span><span class="p">(</span><span class="n">pt_inp</span><span class="p">)</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/venv/apache-tvm-py3.8/lib/python3.8/site-packages/torch/ao/quantization/observer.py:214: UserWarning: Please use quant_min and quant_max to specify the range for observers.                     reduce_range will be deprecated in a future release of PyTorch.
   warnings.warn(
 /venv/apache-tvm-py3.8/lib/python3.8/site-packages/torch/ao/quantization/observer.py:1209: UserWarning: must run observer before calling calculate_qparams.                                    Returning default scale and zero point
   warnings.warn(
 </pre></div>
 </div>
 </div>
 <div class="section" id="convert-quantized-mobilenet-v2-to-relay-qnn-using-the-pytorch-frontend">
 <h2>Convert quantized Mobilenet v2 to Relay-QNN using the PyTorch frontend<a class="headerlink" href="#convert-quantized-mobilenet-v2-to-relay-qnn-using-the-pytorch-frontend" title="Permalink to this headline">¶</a></h2>
 <p>The PyTorch frontend has support for converting a quantized PyTorch model to
 an equivalent Relay module enriched with quantization-aware operators.
 We call this representation Relay QNN dialect.</p>
 <p>You can print the output from the frontend to see how quantized models are
 represented.</p>
 <p>You would see operators specific to quantization such as
 qnn.quantize, qnn.dequantize, qnn.requantize, and qnn.conv2d etc.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_name</span></a> <span class="o">=</span> <span class="s2">&quot;input&quot;</span>  <span class="c1"># the input name can be arbitrary for PyTorch frontend.</span>
 <a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_shapes</span></a> <span class="o">=</span> <span class="p">[(</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_name</span></a><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))]</span>
 <span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/frontend.html#tvm.relay.frontend.from_pytorch" title="tvm.relay.frontend.from_pytorch" class="sphx-glr-backref-module-tvm-relay-frontend sphx-glr-backref-type-py-function"><span class="n">relay</span><span class="o">.</span><span class="n">frontend</span><span class="o">.</span><span class="n">from_pytorch</span></a><span class="p">(</span><span class="n">script_module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#list" title="builtins.list" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_shapes</span></a><span class="p">)</span>
 <span class="c1"># print(mod) # comment in to see the QNN IR dump</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="compile-and-run-the-relay-module">
 <h2>Compile and run the Relay module<a class="headerlink" href="#compile-and-run-the-relay-module" title="Permalink to this headline">¶</a></h2>
 <p>Once we obtained the quantized Relay module, the rest of the workflow
 is the same as running floating point models. Please refer to other
 tutorials for more details.</p>
 <p>Under the hood, quantization specific operators are lowered to a sequence of
 standard Relay operators before compilation.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a> <span class="o">=</span> <span class="s2">&quot;llvm&quot;</span>
 <span class="n">tvm_result</span><span class="p">,</span> <a href="../../reference/api/python/graph_executor.html#tvm.contrib.graph_executor.GraphModule" title="tvm.contrib.graph_executor.GraphModule" class="sphx-glr-backref-module-tvm-contrib-graph_executor sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">rt_mod</span></a> <span class="o">=</span> <span class="n">run_tvm_model</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">input_name</span></a><span class="p">,</span> <span class="n">inp</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="o">=</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">target</span></a><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 <div class="section" id="compare-the-output-labels">
 <h2>Compare the output labels<a class="headerlink" href="#compare-the-output-labels" title="Permalink to this headline">¶</a></h2>
 <p>We should see identical labels printed.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pt_top3_labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">pt_result</span><span class="p">[</span><span class="mi">0</span><span class="p">])[::</span><span class="o">-</span><span class="mi">1</span><span class="p">][:</span><span class="mi">3</span><span class="p">]</span>
 <span class="n">tvm_top3_labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">tvm_result</span><span class="p">[</span><span class="mi">0</span><span class="p">])[::</span><span class="o">-</span><span class="mi">1</span><span class="p">][:</span><span class="mi">3</span><span class="p">]</span>

 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;PyTorch top3 labels:&quot;</span><span class="p">,</span> <span class="p">[</span><a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">synset</span></a><span class="p">[</span><span class="n">label</span><span class="p">]</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">pt_top3_labels</span><span class="p">])</span>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;TVM top3 labels:&quot;</span><span class="p">,</span> <span class="p">[</span><a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">synset</span></a><span class="p">[</span><span class="n">label</span><span class="p">]</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">tvm_top3_labels</span><span class="p">])</span>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>PyTorch top3 labels: [&#39;tiger cat&#39;, &#39;Egyptian cat&#39;, &#39;lynx, catamount&#39;]
 TVM top3 labels: [&#39;tiger cat&#39;, &#39;Egyptian cat&#39;, &#39;tabby, tabby cat&#39;]
 </pre></div>
 </div>
 <p>However, due to the difference in numerics, in general the raw floating point
 outputs are not expected to be identical. Here, we print how many floating point
 output values are identical out of 1000 outputs from mobilenet v2.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="nb">print</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">%d</span><span class="s2"> in 1000 raw floating outputs identical.&quot;</span> <span class="o">%</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">tvm_result</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="n">pt_result</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>122 in 1000 raw floating outputs identical.
 </pre></div>
 </div>
 </div>
 <div class="section" id="measure-performance">
 <h2>Measure performance<a class="headerlink" href="#measure-performance" title="Permalink to this headline">¶</a></h2>
 <p>Here we give an example of how to measure performance of TVM compiled models.</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">n_repeat</span></a> <span class="o">=</span> <span class="mi">100</span>  <span class="c1"># should be bigger to make the measurement more accurate</span>
 <span class="n">dev</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">cpu</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 <span class="nb">print</span><span class="p">(</span><a href="../../reference/api/python/graph_executor.html#tvm.contrib.graph_executor.GraphModule.benchmark" title="tvm.contrib.graph_executor.GraphModule.benchmark" class="sphx-glr-backref-module-tvm-contrib-graph_executor sphx-glr-backref-type-py-method"><span class="n">rt_mod</span><span class="o">.</span><span class="n">benchmark</span></a><span class="p">(</span><span class="n">dev</span><span class="p">,</span> <span class="n">number</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">repeat</span><span class="o">=</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">n_repeat</span></a><span class="p">))</span>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
   87.9379      87.9031      91.7352      87.4494       0.4585
 </pre></div>
 </div>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>We recommend this method for the following reasons:</p>
 <blockquote>
 <div><ul class="simple">
 <li><p>Measurements are done in C++, so there is no Python overhead</p></li>
 <li><p>It includes several warm up runs</p></li>
 <li><p>The same method can be used to profile on remote devices (android etc.).</p></li>
 </ul>
 </div></blockquote>
 </div>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>Unless the hardware has special support for fast 8 bit instructions, quantized models are
 not expected to be any faster than FP32 models. Without fast 8 bit instructions, TVM does
 quantized convolution in 16 bit, even if the model itself is 8 bit.</p>
 <p>For x86, the best performance can be achieved on CPUs with AVX512 instructions set.
 In this case, TVM utilizes the fastest available 8 bit instructions for the given target.
 This includes support for the VNNI 8 bit dot product instruction (CascadeLake or newer).</p>
 <p>Moreover, the following general tips for CPU performance equally applies:</p>
 <blockquote>
 <div><ul class="simple">
 <li><p>Set the environment variable TVM_NUM_THREADS to the number of physical cores</p></li>
 <li><p>Choose the best target for your hardware, such as “llvm -mcpu=skylake-avx512” or
 “llvm -mcpu=cascadelake” (more CPUs with AVX512 would come in the future)</p></li>
 </ul>
 </div></blockquote>
 </div>
 </div>
 <div class="section" id="deploy-a-quantized-mxnet-model">
 <h2>Deploy a quantized MXNet Model<a class="headerlink" href="#deploy-a-quantized-mxnet-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
 </div>
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
 <p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  35.965 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
 </div>
 <div class="sphx-glr-download sphx-glr-download-jupyter docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/c20f81a94729f461f33b52cc110fd9d6/deploy_prequantized.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">deploy_prequantized.ipynb</span></code></a></p>
 </div>
 </div>
 <p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
 </div>
 </div>


            </div>

           </div>


 <footer>

     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">

         <a href="deploy_prequantized_tflite.html" class="btn btn-neutral float-right" title="Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>


         <a href="deploy_object_detection_pytorch.html" class="btn btn-neutral float-left" title="Compile PyTorch Object Detection Models" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>

     </div>

 <div id="button" class="backtop"><img src="../../_static/img/right.svg" alt="backtop"/> </div>
 <section class="footerSec">
     <div class="footerHeader">
       <div class="d-flex align-md-items-center justify-content-between flex-column flex-md-row">
         <div class="copywrite d-flex align-items-center">
           <h5 id="copy-right-info">© 2023 Apache Software Foundation | All rights reserved</h5>
         </div>
       </div>

     </div>

     <div>
       <div class="footernote">Copyright © 2023 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation.</div>
     </div>

 </section>
 </footer>
         </div>
       </div>

     </section>

   </div>


     <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>
     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script>

   </body>
   <script type="text/javascript">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>


     <!-- Theme Analytics -->
     <script>
     (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
       (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
       m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
     })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

     ga('create', 'UA-75982049-2', 'auto');
     ga('send', 'pageview');
     </script>


 </body>
 </html>