blob: 0f038163de4c63735131b25504ce847afe7c738f [file] [log] [blame]
<!doctype html><html lang=en class=no-js><head><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name=generator content="Hugo 0.111.3"><link rel=alternate type=text/html href=/update-site/docs/++version++/getting-started-python/_print/><link rel=alternate type=application/rss+xml href=/update-site/docs/++version++/getting-started-python/index.xml><meta name=robots content="index, follow"><link rel=apple-touch-icon sizes=57x57 href=https://apache.org/favicons/apple-touch-icon-57x57.png><link rel=apple-touch-icon sizes=60x60 href=https://apache.org/favicons/apple-touch-icon-60x60.png><link rel=apple-touch-icon sizes=72x72 href=https://apache.org/favicons/apple-touch-icon-72x72.png><link rel=apple-touch-icon sizes=76x76 href=https://apache.org/favicons/apple-touch-icon-76x76.png><link rel=apple-touch-icon sizes=114x114 href=https://apache.org/favicons/apple-touch-icon-114x114.png><link rel=apple-touch-icon sizes=120x120 href=https://apache.org/favicons/apple-touch-icon-120x120.png><link rel=apple-touch-icon sizes=144x144 href=https://apache.org/favicons/apple-touch-icon-144x144.png><link rel=apple-touch-icon sizes=152x152 href=https://apache.org/favicons/apple-touch-icon-152x152.png><link rel=apple-touch-icon sizes=180x180 href=https://apache.org/favicons/apple-touch-icon-180x180.png><link rel=icon type=image/png href=https://apache.org/favicons/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=https://apache.org/favicons/favicon-194x194.png sizes=194x194><link rel=icon type=image/png href=https://apache.org/favicons/favicon-96x96.png sizes=96x96><link rel=icon type=image/png href=https://apache.org/favicons/android-chrome-192x192.png sizes=192x192><link rel=icon type=image/png href=https://apache.org/favicons/favicon-16x16.png sizes=16x16><link rel=manifest href=https://apache.org/favicons/manifest.json><link rel="shortcut icon" href=https://apache.org/favicons/favicon.ico><title>Getting Started (Python) | Apache Avro</title><meta name=description content><meta property="og:title" content="Getting Started (Python)"><meta property="og:description" content><meta property="og:type" content="website"><meta property="og:url" content="/update-site/docs/++version++/getting-started-python/"><meta property="og:site_name" content="Apache Avro"><meta itemprop=name content="Getting Started (Python)"><meta itemprop=description content><meta name=twitter:card content="summary"><meta name=twitter:title content="Getting Started (Python)"><meta name=twitter:description content><link rel=preload href=/update-site/scss/main.min.6deb8a211453721a965671b611280fb11af8ef2def6b7a2b0a34f6a94939360f.css as=style><link href=/update-site/scss/main.min.6deb8a211453721a965671b611280fb11af8ef2def6b7a2b0a34f6a94939360f.css rel=stylesheet integrity><script src=https://code.jquery.com/jquery-3.5.1.min.js integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin=anonymous></script>
<link rel=stylesheet href=/css/prism.css></head><body class=td-section><header><nav class="js-navbar-scroll navbar navbar-expand navbar-dark flex-column flex-md-row td-navbar"><a class=navbar-brand href=/update-site/><span class=navbar-logo><img src=/docs/++version++/logo.svg width=100 height=30 style="margin:0 10px"></span><span class="text-uppercase font-weight-bold">Apache Avro</span></a><div class="td-navbar-nav-scroll ml-md-auto" id=main_navbar><ul class="navbar-nav mt-2 mt-lg-0"><li class="nav-item mr-4 mb-2 mb-lg-0"><a class=nav-link href=/update-site/project/><span>Project</span></a></li><li class="nav-item mr-4 mb-2 mb-lg-0"><a class=nav-link href=/update-site/blog/><span>Blog</span></a></li><li class="nav-item mr-4 mb-2 mb-lg-0"><a class=nav-link href=/update-site/community/><span>Community</span></a></li><li class="nav-item dropdown mr-4 d-none d-lg-block"><a class="nav-link dropdown-toggle" href=# id=navbarDropdown role=button data-toggle=dropdown aria-haspopup=true aria-expanded=false>Documentation</a><div class=dropdown-menu aria-labelledby=navbarDropdownMenuLink><a class=dropdown-item href=./docs/++version++/>++version++ (Current)</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.11.0/>1.11.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.10.2/>1.10.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.10.1/>1.10.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.10.0/>1.10.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.9.2/>1.9.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.9.1/>1.9.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.9.0/>1.9.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.8.2/>1.8.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.8.1/>1.8.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.8.0/>1.8.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.7/>1.7.7</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.6/>1.7.6</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.5/>1.7.5</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.4/>1.7.4</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.3/>1.7.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.2/>1.7.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.1/>1.7.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.7.0/>1.7.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.3/>1.6.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.2/>1.6.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.1/>1.6.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.6.0/>1.6.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.4/>1.5.4</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.3/>1.5.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.2/>1.5.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.1/>1.5.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.5.0/>1.5.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.4.1/>1.4.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.4.0/>1.4.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.3/>1.3.3</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.2/>1.3.2</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.1/>1.3.1</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.3.0/>1.3.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.2.0/>1.2.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.1.0/>1.1.0</a>
<a class=dropdown-item href=https://avro.apache.org/docs/1.0.0/>1.0.0</a></div></li><li class="nav-item dropdown mr-4 d-none d-lg-block"><a class="nav-link dropdown-toggle" href=# id=navbarDropdown role=button data-toggle=dropdown aria-haspopup=true aria-expanded=false>ASF links</a><div class=dropdown-menu aria-labelledby=navbarDropdownMenuLink><a class=dropdown-item href=http://www.apache.org/ target=_blank>ASF Web Site</a>
<a class=dropdown-item href=http://www.apache.org/licenses/ target=_blank>License</a>
<a class=dropdown-item href=http://www.apache.org/foundation/sponsorship.html target=_blank>Donate</a>
<a class=dropdown-item href=http://www.apache.org/foundation/thanks.html target=_blank>Thanks</a>
<a class=dropdown-item href=http://www.apache.org/security/ target=_blank>Security</a></div></li></ul></div><div class="navbar-nav d-none d-lg-block"></div></nav></header><div class="container-fluid td-outer"><div class=td-main><div class="row flex-xl-nowrap"><aside class="col-12 col-md-3 col-xl-2 td-sidebar d-print-none"><div id=td-sidebar-menu class=td-sidebar__inner><div id=content-mobile><form class="td-sidebar__search d-flex align-items-center"><button class="btn btn-link td-sidebar__toggle d-md-none p-0 ml-3 fas fa-bars" type=button data-toggle=collapse data-target=#td-section-nav aria-controls=td-docs-nav aria-expanded=false aria-label="Toggle section navigation"></button></form></div><div id=content-desktop></div><nav class="collapse td-sidebar-nav foldable-nav" id=td-section-nav><ul class="td-sidebar-nav__section pr-md-3 ul-0"><li class="td-sidebar-nav__section-title td-sidebar-nav__section with-child active-path" id=m-update-sitedocs-li><a href=/update-site/docs/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section tree-root" id=m-update-sitedocs><span>Documentation</span></a><ul class=ul-1><li class="td-sidebar-nav__section-title td-sidebar-nav__section with-child active-path" id=m-update-sitedocsversion-li><input type=checkbox id=m-update-sitedocsversion-check checked>
<label for=m-update-sitedocsversion-check><a href=/update-site/docs/++version++/ title="Apache Avro™ ++version++ Documentation" class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversion><span>++version++</span></a></label><ul class="ul-2 foldable"><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversiongetting-started-java-li><input type=checkbox id=m-update-sitedocsversiongetting-started-java-check>
<label for=m-update-sitedocsversiongetting-started-java-check><a href=/update-site/docs/++version++/getting-started-java/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversiongetting-started-java><span>Getting Started (Java)</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversiongetting-started-python-li><input type=checkbox id=m-update-sitedocsversiongetting-started-python-check>
<label for=m-update-sitedocsversiongetting-started-python-check><a href=/update-site/docs/++version++/getting-started-python/ class="align-left pl-0 active td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversiongetting-started-python><span class=td-sidebar-nav-active-item>Getting Started (Python)</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionspecification-li><input type=checkbox id=m-update-sitedocsversionspecification-check>
<label for=m-update-sitedocsversionspecification-check><a href=/update-site/docs/++version++/specification/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionspecification><span>Specification</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-java-li><input type=checkbox id=m-update-sitedocsversionapi-java-check>
<label for=m-update-sitedocsversionapi-java-check><a href=./api/java/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-java><span>Java API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-c-li><input type=checkbox id=m-update-sitedocsversionapi-c-check>
<label for=m-update-sitedocsversionapi-c-check><a href=./api/c/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-c><span>C API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-c-li><input type=checkbox id=m-update-sitedocsversionapi-c-check>
<label for=m-update-sitedocsversionapi-c-check><a href=./api/cpp/html/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-c><span>C++ API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionapi-csharp-li><input type=checkbox id=m-update-sitedocsversionapi-csharp-check>
<label for=m-update-sitedocsversionapi-csharp-check><a href=./api/csharp/html/ class="align-left pl-0 td-sidebar-link td-sidebar-link__page" id=m-update-sitedocsversionapi-csharp><span>C# API</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionmapreduce-guide-li><input type=checkbox id=m-update-sitedocsversionmapreduce-guide-check>
<label for=m-update-sitedocsversionmapreduce-guide-check><a href=/update-site/docs/++version++/mapreduce-guide/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionmapreduce-guide><span>MapReduce guide</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionidl-language-li><input type=checkbox id=m-update-sitedocsversionidl-language-check>
<label for=m-update-sitedocsversionidl-language-check><a href=/update-site/docs/++version++/idl-language/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionidl-language><span>IDL Language</span></a></label></li><li class="td-sidebar-nav__section-title td-sidebar-nav__section without-child" id=m-update-sitedocsversionsasl-profile-li><input type=checkbox id=m-update-sitedocsversionsasl-profile-check>
<label for=m-update-sitedocsversionsasl-profile-check><a href=/update-site/docs/++version++/sasl-profile/ class="align-left pl-0 td-sidebar-link td-sidebar-link__section" id=m-update-sitedocsversionsasl-profile><span>SASL profile</span></a></label></li></ul></li></ul></li></ul></nav></div></aside><aside class="d-none d-xl-block col-xl-2 td-sidebar-toc d-print-none"><div class="td-page-meta ml-2 pb-1 pt-2 mb-0"><a href=https://github.com/apache/avro/tree/master/doc/content/en/docs/++version++/Getting%20started%20%28Python%29/_index.md class=td-page-meta--view target=_blank rel=noopener><i class="fa fa-file-alt fa-fw"></i> View page source</a>
<a href=https://github.com/apache/avro/edit/master/doc/content/en/docs/++version++/Getting%20started%20%28Python%29/_index.md class=td-page-meta--edit target=_blank rel=noopener><i class="fa fa-edit fa-fw"></i> Edit this page</a>
<a href="https://github.com/apache/avro/new/master/doc/content/en/docs/++version++/Getting%20started%20%28Python%29/_index.md?filename=change-me.md&amp;value=---%0Atitle%3A+%22Long+Page+Title%22%0AlinkTitle%3A+%22Short+Nav+Title%22%0Aweight%3A+100%0Adescription%3A+%3E-%0A+++++Page+description+for+heading+and+indexes.%0A---%0A%0A%23%23+Heading%0A%0AEdit+this+template+to+create+your+new+page.%0A%0A%2A+Give+it+a+good+name%2C+ending+in+%60.md%60+-+e.g.+%60getting-started.md%60%0A%2A+Edit+the+%22front+matter%22+section+at+the+top+of+the+page+%28weight+controls+how+its+ordered+amongst+other+pages+in+the+same+directory%3B+lowest+number+first%29.%0A%2A+Add+a+good+commit+message+at+the+bottom+of+the+page+%28%3C80+characters%3B+use+the+extended+description+field+for+more+detail%29.%0A%2A+Create+a+new+branch+so+you+can+preview+your+new+file+and+request+a+review+via+Pull+Request.%0A" class=td-page-meta--child target=_blank rel=noopener><i class="fa fa-edit fa-fw"></i> Create child page</a>
<a href="https://github.com/apache/avro/issues/new?title=Getting%20Started%20%28Python%29" class=td-page-meta--issue target=_blank rel=noopener><i class="fab fa-github fa-fw"></i> Create documentation issue</a>
<a href=https://github.com/apache/avro/issues/new class=td-page-meta--project-issue target=_blank rel=noopener><i class="fas fa-tasks fa-fw"></i> Create project issue</a>
<a id=print href=/update-site/docs/++version++/getting-started-python/_print/><i class="fa fa-print fa-fw"></i> Print entire section</a></div><div class=td-toc><nav id=TableOfContents><ul><li><a href=#notice-for-python-3-users>Notice for Python 3 users</a></li><li><a href=#download>Download</a></li><li><a href=#defining-a-schema>Defining a schema</a></li><li><a href=#serializing-and-deserializing-without-code-generation>Serializing and deserializing without code generation</a></li></ul></nav></div><div class="taxonomy taxonomy-terms-cloud taxo-tags"><h5 class=taxonomy-title>Tag Cloud</h5><ul class=taxonomy-terms><li><a class=taxonomy-term href=/update-site/tags/java/ data-taxonomy-term=java><span class=taxonomy-label>java</span><span class=taxonomy-count>1</span></a></li><li><a class=taxonomy-term href=/update-site/tags/python/ data-taxonomy-term=python><span class=taxonomy-label>python</span><span class=taxonomy-count>1</span></a></li></ul></div></aside><main class="col-12 col-md-9 col-xl-8 pl-md-5" role=main><nav aria-label=breadcrumb class=td-breadcrumbs><ol class=breadcrumb><li class=breadcrumb-item><a href=/update-site/docs/>Documentation</a></li><li class=breadcrumb-item><a href=/update-site/docs/++version++/>++version++</a></li><li class="breadcrumb-item active" aria-current=page><a href=/update-site/docs/++version++/getting-started-python/>Getting Started (Python)</a></li></ol></nav><div class=td-content><h1>Getting Started (Python)</h1><header class=article-meta><div class="taxonomy taxonomy-terms-article taxo-tags"><h5 class=taxonomy-title>Tags:</h5><ul class=taxonomy-terms><li><a class=taxonomy-term href=/update-site/tags/python/ data-taxonomy-term=python><span class=taxonomy-label>python</span></a></li></ul></div><p class=reading-time><i class="fa fa-clock" aria-hidden=true></i>&nbsp; 5 minute read &nbsp;</p></header><p>This is a short guide for getting started with Apache Avro™ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt&rsquo;s Avro RPC Quick Start for a good introduction to using Avro for RPC.</p><h2 id=notice-for-python-3-users>Notice for Python 3 users</h2><p>A package called &ldquo;avro-python3&rdquo; had been provided to support Python 3 previously, but the codebase was consolidated into the &ldquo;avro&rdquo; package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the &ldquo;avro&rdquo; package instead. They are mostly API compatible, but there&rsquo;s a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse).</p><h2 id=download>Download</h2><p>For Python, the easiest way to get started is to install it from PyPI. Python&rsquo;s Avro API is available over PyPi.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>$ python3 -m pip install avro
</span></span></code></pre></div><p>The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avro™ Releases page. This guide uses Avro ++version++, the latest version at the time of writing. Download and unzip avro-++version++.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>$ tar xvf avro-&#43;&#43;version&#43;&#43;.tar.gz
</span></span><span style=display:flex><span>$ <span style=color:#204a87>cd</span> avro-&#43;&#43;version&#43;&#43;
</span></span><span style=display:flex><span>$ python setup.py install
</span></span><span style=display:flex><span>$ python
</span></span><span style=display:flex><span>&gt;&gt;&gt; import avro <span style=color:#8f5902;font-style:italic># should not raise ImportError</span>
</span></span></code></pre></div><p>Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-shell data-lang=shell><span style=display:flex><span>$ <span style=color:#204a87>cd</span> lang/py/
</span></span><span style=display:flex><span>$ python3 -m pip install -e .
</span></span><span style=display:flex><span>$ python
</span></span></code></pre></div><h2 id=defining-a-schema>Defining a schema</h2><p>Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let&rsquo;s start with a simple schema example, user.avsc:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;namespace&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;example.avro&#34;</span><span style=color:#000;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;record&#34;</span><span style=color:#000;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;User&#34;</span><span style=color:#000;font-weight:700>,</span>
</span></span><span style=display:flex><span> <span style=color:#204a87;font-weight:700>&#34;fields&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#000;font-weight:700>[</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;name&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;string&#34;</span><span style=color:#000;font-weight:700>},</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#000;font-weight:700>[</span><span style=color:#4e9a06>&#34;int&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;null&#34;</span><span style=color:#000;font-weight:700>]},</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>{</span><span style=color:#204a87;font-weight:700>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#204a87;font-weight:700>&#34;type&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#000;font-weight:700>[</span><span style=color:#4e9a06>&#34;string&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;null&#34;</span><span style=color:#000;font-weight:700>]}</span>
</span></span><span style=display:flex><span> <span style=color:#000;font-weight:700>]</span>
</span></span><span style=display:flex><span><span style=color:#000;font-weight:700>}</span>
</span></span></code></pre></div><p>This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type (&ldquo;type&rdquo;: &ldquo;record&rdquo;), a name (&ldquo;name&rdquo;: &ldquo;User&rdquo;), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace (&ldquo;namespace&rdquo;: &ldquo;example.avro&rdquo;), which together with the name attribute defines the &ldquo;full name&rdquo; of the schema (example.avro.User in this case).</p><p>Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field.</p><h2 id=serializing-and-deserializing-without-code-generation>Serializing and deserializing without code generation</h2><p>Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation.</p><p>Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>avro.schema</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>from</span> <span style=color:#000>avro.datafile</span> <span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>DataFileReader</span><span style=color:#000;font-weight:700>,</span> <span style=color:#000>DataFileWriter</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>from</span> <span style=color:#000>avro.io</span> <span style=color:#204a87;font-weight:700>import</span> <span style=color:#000>DatumReader</span><span style=color:#000;font-weight:700>,</span> <span style=color:#000>DatumWriter</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#000>schema</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>avro</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>parse</span><span style=color:#000;font-weight:700>(</span><span style=color:#204a87>open</span><span style=color:#000;font-weight:700>(</span><span style=color:#4e9a06>&#34;user.avsc&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;rb&#34;</span><span style=color:#000;font-weight:700>)</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>read</span><span style=color:#000;font-weight:700>())</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#000>writer</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>DataFileWriter</span><span style=color:#000;font-weight:700>(</span><span style=color:#204a87>open</span><span style=color:#000;font-weight:700>(</span><span style=color:#4e9a06>&#34;users.avro&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;wb&#34;</span><span style=color:#000;font-weight:700>),</span> <span style=color:#000>DatumWriter</span><span style=color:#000;font-weight:700>(),</span> <span style=color:#000>schema</span><span style=color:#000;font-weight:700>)</span>
</span></span><span style=display:flex><span><span style=color:#000>writer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>append</span><span style=color:#000;font-weight:700>({</span><span style=color:#4e9a06>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;Alyssa&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#0000cf;font-weight:700>256</span><span style=color:#000;font-weight:700>})</span>
</span></span><span style=display:flex><span><span style=color:#000>writer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>append</span><span style=color:#000;font-weight:700>({</span><span style=color:#4e9a06>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;Ben&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#0000cf;font-weight:700>7</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;red&#34;</span><span style=color:#000;font-weight:700>})</span>
</span></span><span style=display:flex><span><span style=color:#000>writer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>close</span><span style=color:#000;font-weight:700>()</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#000>reader</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>DataFileReader</span><span style=color:#000;font-weight:700>(</span><span style=color:#204a87>open</span><span style=color:#000;font-weight:700>(</span><span style=color:#4e9a06>&#34;users.avro&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;rb&#34;</span><span style=color:#000;font-weight:700>),</span> <span style=color:#000>DatumReader</span><span style=color:#000;font-weight:700>())</span>
</span></span><span style=display:flex><span><span style=color:#204a87;font-weight:700>for</span> <span style=color:#000>user</span> <span style=color:#204a87;font-weight:700>in</span> <span style=color:#000>reader</span><span style=color:#000;font-weight:700>:</span>
</span></span><span style=display:flex><span> <span style=color:#204a87>print</span><span style=color:#000;font-weight:700>(</span><span style=color:#000>user</span><span style=color:#000;font-weight:700>)</span>
</span></span><span style=display:flex><span><span style=color:#000>reader</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>close</span><span style=color:#000;font-weight:700>()</span>
</span></span></code></pre></div><p>This outputs:</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#a40000>u&#39;favorite_color&#39;:</span> <span style=color:#a40000>None,</span> <span style=color:#a40000>u&#39;favorite_number&#39;:</span> <span style=color:#a40000>256,</span> <span style=color:#a40000>u&#39;name&#39;:</span> <span style=color:#a40000>u&#39;Alyssa&#39;</span><span style=color:#000;font-weight:700>}</span>
</span></span><span style=display:flex><span><span style=color:#000;font-weight:700>{</span><span style=color:#a40000>u&#39;favorite_color&#39;:</span> <span style=color:#a40000>u&#39;red&#39;,</span> <span style=color:#a40000>u&#39;favorite_number&#39;:</span> <span style=color:#a40000>7,</span> <span style=color:#a40000>u&#39;name&#39;:</span> <span style=color:#a40000>u&#39;Ben&#39;</span><span style=color:#000;font-weight:700>}</span>
</span></span></code></pre></div><p>Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations.</p><p>Let&rsquo;s take a closer look at what&rsquo;s going on here.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#000>schema</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>avro</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>schema</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>parse</span><span style=color:#000;font-weight:700>(</span><span style=color:#204a87>open</span><span style=color:#000;font-weight:700>(</span><span style=color:#4e9a06>&#34;user.avsc&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;rb&#34;</span><span style=color:#000;font-weight:700>)</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>read</span><span style=color:#000;font-weight:700>())</span>
</span></span></code></pre></div><p>avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We&rsquo;re passing in the contents of our user.avsc schema file here.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#000>writer</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>DataFileWriter</span><span style=color:#000;font-weight:700>(</span><span style=color:#204a87>open</span><span style=color:#000;font-weight:700>(</span><span style=color:#4e9a06>&#34;users.avro&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;wb&#34;</span><span style=color:#000;font-weight:700>),</span> <span style=color:#000>DatumWriter</span><span style=color:#000;font-weight:700>(),</span> <span style=color:#000>schema</span><span style=color:#000;font-weight:700>)</span>
</span></span></code></pre></div><p>We create a DataFileWriter, which we&rsquo;ll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments:</p><ul><li>The file we&rsquo;ll serialize to</li><li>A DatumWriter, which is responsible for actually serializing the items to Avro&rsquo;s binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro).</li><li>The schema we&rsquo;re using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields.</li></ul><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#000>writer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>append</span><span style=color:#000;font-weight:700>({</span><span style=color:#4e9a06>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;Alyssa&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#0000cf;font-weight:700>256</span><span style=color:#000;font-weight:700>})</span>
</span></span><span style=display:flex><span><span style=color:#000>writer</span><span style=color:#ce5c00;font-weight:700>.</span><span style=color:#000>append</span><span style=color:#000;font-weight:700>({</span><span style=color:#4e9a06>&#34;name&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;Ben&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;favorite_number&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#0000cf;font-weight:700>7</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;favorite_color&#34;</span><span style=color:#000;font-weight:700>:</span> <span style=color:#4e9a06>&#34;red&#34;</span><span style=color:#000;font-weight:700>})</span>
</span></span></code></pre></div><p>We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type [&ldquo;string&rdquo;, &ldquo;null&rdquo;], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#000>reader</span> <span style=color:#ce5c00;font-weight:700>=</span> <span style=color:#000>DataFileReader</span><span style=color:#000;font-weight:700>(</span><span style=color:#204a87>open</span><span style=color:#000;font-weight:700>(</span><span style=color:#4e9a06>&#34;users.avro&#34;</span><span style=color:#000;font-weight:700>,</span> <span style=color:#4e9a06>&#34;rb&#34;</span><span style=color:#000;font-weight:700>),</span> <span style=color:#000>DatumReader</span><span style=color:#000;font-weight:700>())</span>
</span></span></code></pre></div><p>We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above.</p><div class=highlight><pre tabindex=0 style=background-color:#f8f8f8;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#204a87;font-weight:700>for</span> <span style=color:#000>user</span> <span style=color:#204a87;font-weight:700>in</span> <span style=color:#000>reader</span><span style=color:#000;font-weight:700>:</span>
</span></span><span style=display:flex><span> <span style=color:#204a87>print</span><span style=color:#000;font-weight:700>(</span><span style=color:#000>user</span><span style=color:#000;font-weight:700>)</span>
</span></span></code></pre></div><p>The DataFileReader is an iterator that returns dicts corresponding to the serialized items.</p><div class=section-index></div><div class="text-muted mt-5 pt-3 border-top">Last modified April 13, 2023: <a href=https://github.com/apache/avro/commit/8b181dcaa392cfc9bf9ed903deb586de9878f938>Fix the path for Java javadoc HTMLs (8b181dc)</a></div></div></main></div></div><footer class="bg-dark py-5 row d-print-none"><div class="container-fluid mx-sm-5"><div class=row><div class="col-4 col-sm-3 text-xs-center order-sm-2"><ul class="list-inline mb-0"><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="User mailing list" aria-label="User mailing list"><a class=text-white target=_blank rel=noopener href=mailto:user@avro.apache.org aria-label="User mailing list"><i class="fa fa-envelope"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title=Twitter aria-label=Twitter><a class=text-white target=_blank rel=noopener href=https://twitter.com/ApacheAvro aria-label=Twitter><i class="fab fa-twitter"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="Stack Overflow" aria-label="Stack Overflow"><a class=text-white target=_blank rel=noopener href=https://stackoverflow.com/questions/tagged/avro aria-label="Stack Overflow"><i class="fab fa-stack-overflow"></i></a></li></ul></div><div class="col-4 col-sm-3 text-right text-xs-center order-sm-3"><ul class="list-inline mb-0"><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title=GitHub aria-label=GitHub><a class=text-white target=_blank rel=noopener href=https://github.com/apache/avro aria-label=GitHub><i class="fab fa-github"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title=Issues aria-label=Issues><a class=text-white target=_blank rel=noopener href=https://issues.apache.org/jira/projects/AVRO/issues aria-label=Issues><i class="fab fa-jira"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="Chat with other project developers at Slack" aria-label="Chat with other project developers at Slack"><a class=text-white target=_blank rel=noopener href=https://the-asf.slack.com/ aria-label="Chat with other project developers at Slack"><i class="fab fa-slack"></i></a></li><li class="list-inline-item mx-2 h3" data-toggle=tooltip data-placement=top title="Developer mailing list" aria-label="Developer mailing list"><a class=text-white target=_blank rel=noopener href=mailto:dev@avro.apache.org aria-label="Developer mailing list"><i class="fa fa-envelope"></i></a></li></ul></div><div class="col-10 col-sm-3 text-center py-2 order-sm-2"><a href=https://www.apache.org/><small class=text-white>&copy; 2023 The Apache Software Foundation </small></a><small class=text-white>All Rights Reserved</small><p><small class=text-white>Apache Avro, Avro&trade;, Apache&reg;, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</small></p></div><div class="col-5 col-sm-3 order-sm-2"><a href=https://www.apache.org/events/current-event.html><img src=https://www.apache.org/events/current-event-234x60.png></a></div></div></div></footer></div><script src=https://cdn.jsdelivr.net/npm/popper.js@1.16.1/dist/umd/popper.min.js integrity=sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN crossorigin=anonymous></script>
<script src=https://cdn.jsdelivr.net/npm/bootstrap@4.6.1/dist/js/bootstrap.min.js integrity="sha512-UR25UO94eTnCVwjbXozyeVd6ZqpaAE9naiEUBK/A+QDbfSTQFhPGj5lOR6d8tsgbBk84Ggb5A3EkjsOgPRPcKA==" crossorigin=anonymous></script>
<script src=/js/tabpane-persist.js></script>
<script src=/update-site/js/main.min.b0910468256f44515fad3d1c8b5cf64a439da3abc1acef42ad39b9ceac3ae705.js integrity="sha256-sJEEaCVvRFFfrT0ci1z2SkOdo6vBrO9CrTm5zqw65wU=" crossorigin=anonymous></script>
<script src=/js/prism.js></script></body></html>