| <!DOCTYPE HTML> |
| <html lang="en-US"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>Dask</title> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <meta name="generator" content="Jekyll v4.3.4"> |
| <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900"> |
| <link rel="stylesheet" href="/css/screen.css"> |
| <link rel="icon" type="image/x-icon" href="/favicon.ico"> |
| <!--[if lt IE 9]> |
| <script src="/js/html5shiv.min.js"></script> |
| <script src="/js/respond.min.js"></script> |
| <![endif]--> |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| _paq.push(["setDoNotTrack", true]); |
| _paq.push(["disableCookies"]); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '68']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| </head> |
| |
| |
| <body class="wrap"> |
| <header role="banner"> |
| <nav class="mobile-nav show-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| <div class="grid"> |
| <div class="unit one-quarter center-on-mobiles"> |
| <h1> |
| <a href="/"> |
| <span class="sr-only">Apache ORC</span> |
| <img src="/img/logo.png" width="249" height="101" alt="ORC Logo"> |
| </a> |
| </h1> |
| </div> |
| <nav class="main-nav unit three-quarters hide-on-mobiles"> |
| <ul> |
| <li class=""> |
| <a href="/">Home</a> |
| </li> |
| <li class=""> |
| <a href="/releases/"><span class="show-on-mobiles">Rel</span> |
| <span class="hide-on-mobiles">Releases</span></a> |
| </li> |
| <li class="current"> |
| <a href="/docs/"><span class="show-on-mobiles">Doc</span> |
| <span class="hide-on-mobiles">Documentation</span></a> |
| </li> |
| <li class=""> |
| <a href="/talks/"><span class="show-on-mobiles">Talk</span> |
| <span class="hide-on-mobiles">Talks</span></a> |
| </li> |
| <li class=""> |
| <a href="/news/">News</a> |
| </li> |
| <li class=""> |
| <a href="/develop/"><span class="show-on-mobiles">Dev</span> |
| <span class="hide-on-mobiles">Develop</span></a> |
| </li> |
| <li class=""> |
| <a href="/help/">Help</a> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| </header> |
| |
| |
| <section class="docs"> |
| <div class="grid"> |
| |
| <div class="docs-nav-mobile unit whole show-on-mobiles"> |
| <select onchange="if (this.value) window.location.href=this.value"> |
| <option value="">Navigate the docs…</option> |
| |
| <optgroup label="Overview"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/index.html">Background</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/adopters.html">ORC Adopters</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/types.html">Types</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/indexes.html">Indexes</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/acid.html">ACID support</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Installing"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/building.html">Building ORC</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Spark"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-ddl.html">Spark DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/spark-config.html">Spark Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Python"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/pyarrow.html">PyArrow</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/dask.html">Dask</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in Hive"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-ddl.html">Hive DDL</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/hive-config.html">Hive Configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using in MapReduce"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapred.html">Using in MapRed</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/mapreduce.html">Using in MapReduce</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Using ORC Core"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java.html">Using Core Java</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-cpp.html">Using Core C++</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/core-java-config.html">ORC Java configuration</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| <optgroup label="Tools"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/cpp-tools.html">C++ Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <option value="/docs/java-tools.html">Java Tools</option> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </optgroup> |
| |
| </select> |
| </div> |
| |
| |
| <div class="unit four-fifths"> |
| <article> |
| <h1>Dask</h1> |
| <h2 id="how-to-install">How to install</h2> |
| |
| <p><a href="https://dask.org">Dask</a> also supports Apache ORC.</p> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>pip3 install "dask[dataframe]==2025.5.1" |
| pip3 install pandas |
| </code></pre></div></div> |
| |
| <h2 id="how-to-write-and-read-an-orc-file">How to write and read an ORC file</h2> |
| |
| <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>In [1]: import pandas as pd |
| |
| In [2]: import dask.dataframe as dd |
| |
| In [3]: pf = pd.DataFrame(data={"col1": [1, 2, 3], "col2": ["a", "b", None]}) |
| |
| In [4]: dd.to_orc(dd.from_pandas(pf, npartitions=2), path="/tmp/orc") |
| Out[4]: (None, None) |
| |
| In [5]: dd.read_orc(path="/tmp/orc").compute() |
| Out[5]: |
| col1 col2 |
| 0 1 a |
| 1 2 b |
| 0 3 <NA> |
| |
| In [6]: dd.read_orc(path="/tmp/orc", columns=["col1"]).compute() |
| Out[6]: |
| col1 |
| 0 1 |
| 1 2 |
| 0 3 |
| </code></pre></div></div> |
| |
| <p><a href="https://docs.dask.org/en/stable/10-minutes-to-dask.html">10 Minutes to Dask</a> page |
| provides a short overview.</p> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="section-nav"> |
| <div class="left align-right"> |
| |
| |
| |
| <a href="/docs/pyarrow.html" class="prev">Back</a> |
| |
| </div> |
| <div class="right align-left"> |
| |
| |
| |
| <a href="/docs/hive-ddl.html" class="next">Next</a> |
| |
| </div> |
| </div> |
| <div class="clear"></div> |
| |
| |
| </article> |
| </div> |
| |
| <div class="unit one-fifth hide-on-mobiles"> |
| <aside> |
| |
| <h4>Overview</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/index.html">Background</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/types.html">Types</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/indexes.html">Indexes</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/acid.html">ACID support</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Installing</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/building.html">Building ORC</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Spark</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/spark-config.html">Spark Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Python</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/pyarrow.html">PyArrow</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="current"><a href="/docs/dask.html">Dask</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in Hive</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using in MapReduce</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Using ORC Core</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-java.html">Using Core Java</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| <h4>Tools</h4> |
| |
| |
| <ul> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""><a href="/docs/java-tools.html">Java Tools</a></li> |
| |
| |
| |
| </ul> |
| |
| |
| </aside> |
| </div> |
| |
| |
| <div class="clear"></div> |
| |
| </div> |
| </section> |
| |
| |
| <footer role="contentinfo"> |
| <p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are © 2025 |
| <a href="https://www.apache.org/">Apache Software Foundation</a> |
| under the terms of the <a |
| href="https://www.apache.org/licenses/LICENSE-2.0.html"> |
| Apache License v2</a>. Apache ORC and its logo are trademarks |
| of the Apache Software Foundation.</p> |
| </footer> |
| |
| <script> |
| var anchorForId = function (id) { |
| var anchor = document.createElement("a"); |
| anchor.className = "header-link"; |
| anchor.href = "#" + id; |
| anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>"; |
| anchor.title = "Permalink"; |
| return anchor; |
| }; |
| |
| var linkifyAnchors = function (level, containingElement) { |
| var headers = containingElement.getElementsByTagName("h" + level); |
| for (var h = 0; h < headers.length; h++) { |
| var header = headers[h]; |
| |
| if (typeof header.id !== "undefined" && header.id !== "") { |
| header.appendChild(anchorForId(header.id)); |
| } |
| } |
| }; |
| |
| document.onreadystatechange = function () { |
| if (this.readyState === "complete") { |
| var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0]; |
| if (!contentBlock) { |
| return; |
| } |
| for (var level = 1; level <= 6; level++) { |
| linkifyAnchors(level, contentBlock); |
| } |
| } |
| }; |
| </script> |
| |
| |
| </body> |
| </html> |