| <!doctype html> |
| <html class="no-js" lang="en" data-content_root="./"> |
| <head><meta charset="utf-8"/> |
| <meta name="viewport" content="width=device-width,initial-scale=1"/> |
| <meta name="color-scheme" content="light dark"><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Substrait" href="substrait.html" /><link rel="prev" title="Arrow Flight SQL JDBC Driver" href="flight_sql_jdbc_driver.html" /> |
| |
| <!-- Generated with Sphinx 8.1.3 and Furo 2024.08.06 --> |
| <title>Dataset - arrow-java 18.1.0 documentation</title> |
| <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=8f2a1f02" /> |
| <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?v=354aac6f" /> |
| <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?v=302659d7" /> |
| |
| |
| |
| |
| <style> |
| body { |
| --color-code-background: #f8f8f8; |
| --color-code-foreground: black; |
| |
| } |
| @media not print { |
| body[data-theme="dark"] { |
| --color-code-background: #202020; |
| --color-code-foreground: #d0d0d0; |
| |
| } |
| @media (prefers-color-scheme: dark) { |
| body:not([data-theme="light"]) { |
| --color-code-background: #202020; |
| --color-code-foreground: #d0d0d0; |
| |
| } |
| } |
| } |
| </style></head> |
| <body> |
| |
| <script> |
| document.body.dataset.theme = localStorage.getItem("theme") || "auto"; |
| </script> |
| |
| |
| <svg xmlns="http://www.w3.org/2000/svg" style="display: none;"> |
| <symbol id="svg-toc" viewBox="0 0 24 24"> |
| <title>Contents</title> |
| <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024"> |
| <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/> |
| </svg> |
| </symbol> |
| <symbol id="svg-menu" viewBox="0 0 24 24"> |
| <title>Menu</title> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu"> |
| <line x1="3" y1="12" x2="21" y2="12"></line> |
| <line x1="3" y1="6" x2="21" y2="6"></line> |
| <line x1="3" y1="18" x2="21" y2="18"></line> |
| </svg> |
| </symbol> |
| <symbol id="svg-arrow-right" viewBox="0 0 24 24"> |
| <title>Expand</title> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right"> |
| <polyline points="9 18 15 12 9 6"></polyline> |
| </svg> |
| </symbol> |
| <symbol id="svg-sun" viewBox="0 0 24 24"> |
| <title>Light mode</title> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="feather-sun"> |
| <circle cx="12" cy="12" r="5"></circle> |
| <line x1="12" y1="1" x2="12" y2="3"></line> |
| <line x1="12" y1="21" x2="12" y2="23"></line> |
| <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line> |
| <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line> |
| <line x1="1" y1="12" x2="3" y2="12"></line> |
| <line x1="21" y1="12" x2="23" y2="12"></line> |
| <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line> |
| <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line> |
| </svg> |
| </symbol> |
| <symbol id="svg-moon" viewBox="0 0 24 24"> |
| <title>Dark mode</title> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon"> |
| <path stroke="none" d="M0 0h24v24H0z" fill="none" /> |
| <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" /> |
| </svg> |
| </symbol> |
| <symbol id="svg-sun-with-moon" viewBox="0 0 24 24"> |
| <title>Auto light/dark, in light mode</title> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="1" stroke-linecap="round" stroke-linejoin="round" |
| class="icon-custom-derived-from-feather-sun-and-tabler-moon"> |
| <path style="opacity: 50%" d="M 5.411 14.504 C 5.471 14.504 5.532 14.504 5.591 14.504 C 3.639 16.319 4.383 19.569 6.931 20.352 C 7.693 20.586 8.512 20.551 9.25 20.252 C 8.023 23.207 4.056 23.725 2.11 21.184 C 0.166 18.642 1.702 14.949 4.874 14.536 C 5.051 14.512 5.231 14.5 5.411 14.5 L 5.411 14.504 Z"/> |
| <line x1="14.5" y1="3.25" x2="14.5" y2="1.25"/> |
| <line x1="14.5" y1="15.85" x2="14.5" y2="17.85"/> |
| <line x1="10.044" y1="5.094" x2="8.63" y2="3.68"/> |
| <line x1="19" y1="14.05" x2="20.414" y2="15.464"/> |
| <line x1="8.2" y1="9.55" x2="6.2" y2="9.55"/> |
| <line x1="20.8" y1="9.55" x2="22.8" y2="9.55"/> |
| <line x1="10.044" y1="14.006" x2="8.63" y2="15.42"/> |
| <line x1="19" y1="5.05" x2="20.414" y2="3.636"/> |
| <circle cx="14.5" cy="9.55" r="3.6"/> |
| </svg> |
| </symbol> |
| <symbol id="svg-moon-with-sun" viewBox="0 0 24 24"> |
| <title>Auto light/dark, in dark mode</title> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="1" stroke-linecap="round" stroke-linejoin="round" |
| class="icon-custom-derived-from-feather-sun-and-tabler-moon"> |
| <path d="M 8.282 7.007 C 8.385 7.007 8.494 7.007 8.595 7.007 C 5.18 10.184 6.481 15.869 10.942 17.24 C 12.275 17.648 13.706 17.589 15 17.066 C 12.851 22.236 5.91 23.143 2.505 18.696 C -0.897 14.249 1.791 7.786 7.342 7.063 C 7.652 7.021 7.965 7 8.282 7 L 8.282 7.007 Z"/> |
| <line style="opacity: 50%" x1="18" y1="3.705" x2="18" y2="2.5"/> |
| <line style="opacity: 50%" x1="18" y1="11.295" x2="18" y2="12.5"/> |
| <line style="opacity: 50%" x1="15.316" y1="4.816" x2="14.464" y2="3.964"/> |
| <line style="opacity: 50%" x1="20.711" y1="10.212" x2="21.563" y2="11.063"/> |
| <line style="opacity: 50%" x1="14.205" y1="7.5" x2="13.001" y2="7.5"/> |
| <line style="opacity: 50%" x1="21.795" y1="7.5" x2="23" y2="7.5"/> |
| <line style="opacity: 50%" x1="15.316" y1="10.184" x2="14.464" y2="11.036"/> |
| <line style="opacity: 50%" x1="20.711" y1="4.789" x2="21.563" y2="3.937"/> |
| <circle style="opacity: 50%" cx="18" cy="7.5" r="2.169"/> |
| </svg> |
| </symbol> |
| <symbol id="svg-pencil" viewBox="0 0 24 24"> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-pencil-code"> |
| <path d="M4 20h4l10.5 -10.5a2.828 2.828 0 1 0 -4 -4l-10.5 10.5v4" /> |
| <path d="M13.5 6.5l4 4" /> |
| <path d="M20 21l2 -2l-2 -2" /> |
| <path d="M17 17l-2 2l2 2" /> |
| </svg> |
| </symbol> |
| <symbol id="svg-eye" viewBox="0 0 24 24"> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" |
| stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-eye-code"> |
| <path stroke="none" d="M0 0h24v24H0z" fill="none" /> |
| <path d="M10 12a2 2 0 1 0 4 0a2 2 0 0 0 -4 0" /> |
| <path |
| d="M11.11 17.958c-3.209 -.307 -5.91 -2.293 -8.11 -5.958c2.4 -4 5.4 -6 9 -6c3.6 0 6.6 2 9 6c-.21 .352 -.427 .688 -.647 1.008" /> |
| <path d="M20 21l2 -2l-2 -2" /> |
| <path d="M17 17l-2 2l2 2" /> |
| </svg> |
| </symbol> |
| </svg> |
| |
| <input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation"> |
| <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc"> |
| <label class="overlay sidebar-overlay" for="__navigation"> |
| <div class="visually-hidden">Hide navigation sidebar</div> |
| </label> |
| <label class="overlay toc-overlay" for="__toc"> |
| <div class="visually-hidden">Hide table of contents sidebar</div> |
| </label> |
| |
| <a class="skip-to-content muted-link" href="#furo-main-content">Skip to content</a> |
| |
| |
| |
| <div class="page"> |
| <header class="mobile-header"> |
| <div class="header-left"> |
| <label class="nav-overlay-icon" for="__navigation"> |
| <div class="visually-hidden">Toggle site navigation sidebar</div> |
| <i class="icon"><svg><use href="#svg-menu"></use></svg></i> |
| </label> |
| </div> |
| <div class="header-center"> |
| <a href="index.html"><div class="brand">arrow-java 18.1.0 documentation</div></a> |
| </div> |
| <div class="header-right"> |
| <div class="theme-toggle-container theme-toggle-header"> |
| <button class="theme-toggle"> |
| <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div> |
| <svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg> |
| <svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg> |
| <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg> |
| <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg> |
| </button> |
| </div> |
| <label class="toc-overlay-icon toc-header-icon" for="__toc"> |
| <div class="visually-hidden">Toggle table of contents sidebar</div> |
| <i class="icon"><svg><use href="#svg-toc"></use></svg></i> |
| </label> |
| </div> |
| </header> |
| <aside class="sidebar-drawer"> |
| <div class="sidebar-container"> |
| |
| <div class="sidebar-sticky"><a class="sidebar-brand" href="index.html"> |
| |
| |
| <span class="sidebar-brand-text">arrow-java 18.1.0 documentation</span> |
| |
| </a><form class="sidebar-search-container" method="get" action="search.html" role="search"> |
| <input class="sidebar-search" placeholder="Search" name="q" aria-label="Search"> |
| <input type="hidden" name="check_keywords" value="yes"> |
| <input type="hidden" name="area" value="default"> |
| </form> |
| <div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree"> |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="quickstartguide.html">Quick Start Guide</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="overview.html">High-Level Overview</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="install.html">Installing Java Modules</a></li> |
| <li class="toctree-l1 has-children"><a class="reference internal" href="developers/index.html">Java Development</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Java Development</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul> |
| <li class="toctree-l2"><a class="reference internal" href="developers/building.html">Building Arrow Java</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="developers/development.html">Development Guidelines</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="memory.html">Memory Management</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="vector.html">ValueVector</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="vector_schema_root.html">Tabular Data</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="table.html">Table</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="ipc.html">Reading/Writing IPC formats</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="algorithm.html">Java Algorithms</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="flight.html">Arrow Flight RPC</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="flight_sql.html">Arrow Flight SQL</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="flight_sql_jdbc_driver.html">Arrow Flight SQL JDBC Driver</a></li> |
| <li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Dataset</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="substrait.html">Substrait</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="cdata.html">C Data Interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="jdbc.html">Arrow JDBC Adapter</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="reference/index.html">Reference (javadoc)</a></li> |
| <li class="toctree-l1"><a class="reference external" href="https://arrow.apache.org/cookbook/java/">Cookbook</a></li> |
| </ul> |
| |
| </div> |
| </div> |
| |
| </div> |
| |
| </div> |
| </aside> |
| <div class="main"> |
| <div class="content"> |
| <div class="article-container"> |
| <a href="#" class="back-to-top muted-link"> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"> |
| <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path> |
| </svg> |
| <span>Back to top</span> |
| </a> |
| <div class="content-icon-container"> |
| <div class="view-this-page"> |
| <a class="muted-link" href="_sources/dataset.rst.txt" title="View this page"> |
| <svg><use href="#svg-eye"></use></svg> |
| <span class="visually-hidden">View this page</span> |
| </a> |
| </div> |
| <div class="theme-toggle-container theme-toggle-content"> |
| <button class="theme-toggle"> |
| <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div> |
| <svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg> |
| <svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg> |
| <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg> |
| <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg> |
| </button> |
| </div> |
| <label class="toc-overlay-icon toc-content-icon" for="__toc"> |
| <div class="visually-hidden">Toggle table of contents sidebar</div> |
| <i class="icon"><svg><use href="#svg-toc"></use></svg></i> |
| </label> |
| </div> |
| <article role="main" id="furo-main-content"> |
| <section id="dataset"> |
| <h1>Dataset<a class="headerlink" href="#dataset" title="Link to this heading">¶</a></h1> |
| <div class="admonition warning"> |
| <p class="admonition-title">Warning</p> |
| <p>Experimental: The Java module <code class="docutils literal notranslate"><span class="pre">dataset</span></code> is currently under early |
| development. API might be changed in each release of Apache Arrow until it |
| gets mature.</p> |
| </div> |
| <p>Dataset is an universal layer in Apache Arrow for querying data in different |
| formats or in different partitioning strategies. Usually the data to be queried |
| is supposed to be located from a traditional file system, however Arrow Dataset |
| is not designed only for querying files but can be extended to serve all |
| possible data sources such as from inter-process communication or from other |
| network locations, etc.</p> |
| <section id="getting-started"> |
| <h2>Getting Started<a class="headerlink" href="#getting-started" title="Link to this heading">¶</a></h2> |
| <p>Currently supported file formats are:</p> |
| <ul class="simple"> |
| <li><p>Apache Arrow (<code class="docutils literal notranslate"><span class="pre">.arrow</span></code>)</p></li> |
| <li><p>Apache ORC (<code class="docutils literal notranslate"><span class="pre">.orc</span></code>)</p></li> |
| <li><p>Apache Parquet (<code class="docutils literal notranslate"><span class="pre">.parquet</span></code>)</p></li> |
| <li><p>Comma-Separated Values (<code class="docutils literal notranslate"><span class="pre">.csv</span></code>)</p></li> |
| <li><p>Line-delimited JSON Values (<code class="docutils literal notranslate"><span class="pre">.json</span></code>)</p></li> |
| </ul> |
| <p>Below shows a simplest example of using Dataset to query a Parquet file in Java:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="c1">// read data from file /opt/example.parquet</span> |
| <span class="n">String</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"file:/opt/example.parquet"</span><span class="p">;</span> |
| <span class="n">ScanOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ScanOptions</span><span class="p">(</span><span class="cm">/*batchSize*/</span><span class="w"> </span><span class="mi">32768</span><span class="p">);</span> |
| <span class="k">try</span><span class="w"> </span><span class="p">(</span> |
| <span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span> |
| <span class="w"> </span><span class="n">DatasetFactory</span><span class="w"> </span><span class="n">datasetFactory</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileSystemDatasetFactory</span><span class="p">(</span> |
| <span class="w"> </span><span class="n">allocator</span><span class="p">,</span><span class="w"> </span><span class="n">NativeMemoryPool</span><span class="p">.</span><span class="na">getDefault</span><span class="p">(),</span> |
| <span class="w"> </span><span class="n">FileFormat</span><span class="p">.</span><span class="na">PARQUET</span><span class="p">,</span><span class="w"> </span><span class="n">uri</span><span class="p">);</span> |
| <span class="w"> </span><span class="n">Dataset</span><span class="w"> </span><span class="n">dataset</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">datasetFactory</span><span class="p">.</span><span class="na">finish</span><span class="p">();</span> |
| <span class="w"> </span><span class="n">Scanner</span><span class="w"> </span><span class="n">scanner</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">dataset</span><span class="p">.</span><span class="na">newScan</span><span class="p">(</span><span class="n">options</span><span class="p">);</span> |
| <span class="w"> </span><span class="n">ArrowReader</span><span class="w"> </span><span class="n">reader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">scanner</span><span class="p">.</span><span class="na">scanBatches</span><span class="p">()</span> |
| <span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="n">List</span><span class="o"><</span><span class="n">ArrowRecordBatch</span><span class="o">></span><span class="w"> </span><span class="n">batches</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ArrayList</span><span class="o"><></span><span class="p">();</span> |
| <span class="w"> </span><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">reader</span><span class="p">.</span><span class="na">loadNextBatch</span><span class="p">())</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="k">try</span><span class="w"> </span><span class="p">(</span><span class="n">VectorSchemaRoot</span><span class="w"> </span><span class="n">root</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">reader</span><span class="p">.</span><span class="na">getVectorSchemaRoot</span><span class="p">())</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="kd">final</span><span class="w"> </span><span class="n">VectorUnloader</span><span class="w"> </span><span class="n">unloader</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">VectorUnloader</span><span class="p">(</span><span class="n">root</span><span class="p">);</span> |
| <span class="w"> </span><span class="n">batches</span><span class="p">.</span><span class="na">add</span><span class="p">(</span><span class="n">unloader</span><span class="p">.</span><span class="na">getRecordBatch</span><span class="p">());</span> |
| <span class="w"> </span><span class="p">}</span> |
| <span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="c1">// do something with read record batches, for example:</span> |
| <span class="w"> </span><span class="n">analyzeArrowData</span><span class="p">(</span><span class="n">batches</span><span class="p">);</span> |
| |
| <span class="w"> </span><span class="c1">// finished the analysis of the data, close all resources:</span> |
| <span class="w"> </span><span class="n">AutoCloseables</span><span class="p">.</span><span class="na">close</span><span class="p">(</span><span class="n">batches</span><span class="p">);</span> |
| <span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">Exception</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p><code class="docutils literal notranslate"><span class="pre">ArrowRecordBatch</span></code> is a low-level composite Arrow data exchange format |
| that doesn’t provide API to read typed data from it directly. |
| It’s recommended to use utilities <code class="docutils literal notranslate"><span class="pre">VectorLoader</span></code> to load it into a schema |
| aware container <code class="docutils literal notranslate"><span class="pre">VectorSchemaRoot</span></code> by which user could be able to access |
| decoded data conveniently in Java.</p> |
| <p>The <code class="docutils literal notranslate"><span class="pre">ScanOptions</span> <span class="pre">batchSize</span></code> argument takes effect only if it is set to a value |
| smaller than the number of rows in the recordbatch.</p> |
| </div> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p>Load record batches with <a class="reference internal" href="vector_schema_root.html"><span class="doc">VectorSchemaRoot</span></a>.</p> |
| </div> |
| </section> |
| <section id="schema"> |
| <h2>Schema<a class="headerlink" href="#schema" title="Link to this heading">¶</a></h2> |
| <p>Schema of the data to be queried can be inspected via method |
| <code class="docutils literal notranslate"><span class="pre">DatasetFactory#inspect()</span></code> before actually reading it. For example:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="c1">// read data from local file /opt/example.parquet</span> |
| <span class="n">String</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"file:/opt/example.parquet"</span><span class="p">;</span> |
| <span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">(</span><span class="n">Long</span><span class="p">.</span><span class="na">MAX_VALUE</span><span class="p">);</span> |
| <span class="n">DatasetFactory</span><span class="w"> </span><span class="n">factory</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileSystemDatasetFactory</span><span class="p">(</span><span class="n">allocator</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">NativeMemoryPool</span><span class="p">.</span><span class="na">getDefault</span><span class="p">(),</span><span class="w"> </span><span class="n">FileFormat</span><span class="p">.</span><span class="na">PARQUET</span><span class="p">,</span><span class="w"> </span><span class="n">uri</span><span class="p">);</span> |
| |
| <span class="c1">// inspect schema</span> |
| <span class="n">Schema</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">factory</span><span class="p">.</span><span class="na">inspect</span><span class="p">();</span> |
| </pre></div> |
| </div> |
| <p>For some of the data format that is compatible with a user-defined schema, user |
| can use method <code class="docutils literal notranslate"><span class="pre">DatasetFactory#inspect(Schema</span> <span class="pre">schema)</span></code> to create the dataset:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">Schema</span><span class="w"> </span><span class="n">schema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">createUserSchema</span><span class="p">()</span> |
| <span class="n">Dataset</span><span class="w"> </span><span class="n">dataset</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">factory</span><span class="p">.</span><span class="na">finish</span><span class="p">(</span><span class="n">schema</span><span class="p">);</span> |
| </pre></div> |
| </div> |
| <p>Otherwise when the non-parameter method <code class="docutils literal notranslate"><span class="pre">DatasetFactory#inspect()</span></code> is called, |
| schema will be inferred automatically from data source. The same as the result |
| of <code class="docutils literal notranslate"><span class="pre">DatasetFactory#inspect()</span></code>.</p> |
| <p>Also, if projector is specified during scanning (see next section |
| <a class="reference internal" href="#java-dataset-projection"><span class="std std-ref">Projection (Subset of Columns)</span></a>), the actual schema of output data can be got |
| within method <code class="docutils literal notranslate"><span class="pre">Scanner::schema()</span></code>:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">Scanner</span><span class="w"> </span><span class="n">scanner</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">dataset</span><span class="p">.</span><span class="na">newScan</span><span class="p">(</span> |
| <span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ScanOptions</span><span class="p">(</span><span class="mi">32768</span><span class="p">,</span><span class="w"> </span><span class="n">Optional</span><span class="p">.</span><span class="na">of</span><span class="p">(</span><span class="k">new</span><span class="w"> </span><span class="n">String</span><span class="o">[]</span><span class="w"> </span><span class="p">{</span><span class="s">"id"</span><span class="p">,</span><span class="w"> </span><span class="s">"name"</span><span class="p">})));</span> |
| <span class="n">Schema</span><span class="w"> </span><span class="n">projectedSchema</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">scanner</span><span class="p">.</span><span class="na">schema</span><span class="p">();</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="projection-subset-of-columns"> |
| <span id="java-dataset-projection"></span><h2>Projection (Subset of Columns)<a class="headerlink" href="#projection-subset-of-columns" title="Link to this heading">¶</a></h2> |
| <p>User can specify projections in ScanOptions. For example:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">String</span><span class="o">[]</span><span class="w"> </span><span class="n">projection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">String</span><span class="o">[]</span><span class="w"> </span><span class="p">{</span><span class="s">"id"</span><span class="p">,</span><span class="w"> </span><span class="s">"name"</span><span class="p">};</span> |
| <span class="n">ScanOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ScanOptions</span><span class="p">(</span><span class="mi">32768</span><span class="p">,</span><span class="w"> </span><span class="n">Optional</span><span class="p">.</span><span class="na">of</span><span class="p">(</span><span class="n">projection</span><span class="p">));</span> |
| </pre></div> |
| </div> |
| <p>If no projection is needed, leave the optional projection argument absent in |
| ScanOptions:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">ScanOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ScanOptions</span><span class="p">(</span><span class="mi">32768</span><span class="p">,</span><span class="w"> </span><span class="n">Optional</span><span class="p">.</span><span class="na">empty</span><span class="p">());</span> |
| </pre></div> |
| </div> |
| <p>Or use shortcut constructor:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">ScanOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ScanOptions</span><span class="p">(</span><span class="mi">32768</span><span class="p">);</span> |
| </pre></div> |
| </div> |
| <p>Then all columns will be emitted during scanning.</p> |
| </section> |
| <section id="projection-produce-new-columns-and-filters"> |
| <h2>Projection (Produce New Columns) and Filters<a class="headerlink" href="#projection-produce-new-columns-and-filters" title="Link to this heading">¶</a></h2> |
| <p>User can specify projections (new columns) or filters in ScanOptions using Substrait. For example:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">ByteBuffer</span><span class="w"> </span><span class="n">substraitExpressionFilter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">getSubstraitExpressionFilter</span><span class="p">();</span> |
| <span class="n">ByteBuffer</span><span class="w"> </span><span class="n">substraitExpressionProject</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">getSubstraitExpressionProjection</span><span class="p">();</span> |
| <span class="c1">// Use Substrait APIs to create an Expression and serialize to a ByteBuffer</span> |
| <span class="n">ScanOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ScanOptions</span><span class="p">.</span><span class="na">Builder</span><span class="p">(</span><span class="cm">/*batchSize*/</span><span class="w"> </span><span class="mi">32768</span><span class="p">)</span> |
| <span class="w"> </span><span class="p">.</span><span class="na">columns</span><span class="p">(</span><span class="n">Optional</span><span class="p">.</span><span class="na">empty</span><span class="p">())</span> |
| <span class="w"> </span><span class="p">.</span><span class="na">substraitExpressionFilter</span><span class="p">(</span><span class="n">substraitExpressionFilter</span><span class="p">)</span> |
| <span class="w"> </span><span class="p">.</span><span class="na">substraitExpressionProjection</span><span class="p">(</span><span class="n">getSubstraitExpressionProjection</span><span class="p">())</span> |
| <span class="w"> </span><span class="p">.</span><span class="na">build</span><span class="p">();</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <dl class="simple"> |
| <dt><a class="reference internal" href="substrait.html"><span class="doc">Executing Projections and Filters Using Extended Expressions</span></a></dt><dd><p>Projections and Filters using Substrait.</p> |
| </dd> |
| </dl> |
| </div> |
| </section> |
| <section id="read-data-from-hdfs"> |
| <h2>Read Data from HDFS<a class="headerlink" href="#read-data-from-hdfs" title="Link to this heading">¶</a></h2> |
| <p><code class="docutils literal notranslate"><span class="pre">FileSystemDataset</span></code> supports reading data from non-local file systems. HDFS |
| support is included in the official Apache Arrow Java package releases and |
| can be used directly without re-building the source code.</p> |
| <p>To access HDFS data using Dataset API, pass a general HDFS URI to |
| <code class="docutils literal notranslate"><span class="pre">FilesSystemDatasetFactory</span></code>:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">String</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"hdfs://{hdfs_host}:{port}/data/example.parquet"</span><span class="p">;</span> |
| <span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">(</span><span class="n">Long</span><span class="p">.</span><span class="na">MAX_VALUE</span><span class="p">);</span> |
| <span class="n">DatasetFactory</span><span class="w"> </span><span class="n">factory</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileSystemDatasetFactory</span><span class="p">(</span><span class="n">allocator</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">NativeMemoryPool</span><span class="p">.</span><span class="na">getDefault</span><span class="p">(),</span><span class="w"> </span><span class="n">FileFormat</span><span class="p">.</span><span class="na">PARQUET</span><span class="p">,</span><span class="w"> </span><span class="n">uri</span><span class="p">);</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="native-memory-management"> |
| <h2>Native Memory Management<a class="headerlink" href="#native-memory-management" title="Link to this heading">¶</a></h2> |
| <p>To gain better performance and reduce code complexity, Java |
| <code class="docutils literal notranslate"><span class="pre">FileSystemDataset</span></code> internally relies on C++ |
| <code class="docutils literal notranslate"><span class="pre">arrow::dataset::FileSystemDataset</span></code> via JNI. |
| As a result, all Arrow data read from <code class="docutils literal notranslate"><span class="pre">FileSystemDataset</span></code> is supposed to be |
| allocated off the JVM heap. To manage this part of memory, an utility class |
| <code class="docutils literal notranslate"><span class="pre">NativeMemoryPool</span></code> is provided to users.</p> |
| <p>As a basic example, by using a listenable <code class="docutils literal notranslate"><span class="pre">NativeMemoryPool</span></code>, user can pass |
| a listener hooking on C++ buffer allocation/deallocation:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">AtomicLong</span><span class="w"> </span><span class="n">reserved</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">AtomicLong</span><span class="p">(</span><span class="mi">0</span><span class="n">L</span><span class="p">);</span> |
| <span class="n">ReservationListener</span><span class="w"> </span><span class="n">listener</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ReservationListener</span><span class="p">()</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="nd">@Override</span> |
| <span class="w"> </span><span class="kd">public</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="nf">reserve</span><span class="p">(</span><span class="kt">long</span><span class="w"> </span><span class="n">size</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="n">reserved</span><span class="p">.</span><span class="na">getAndAdd</span><span class="p">(</span><span class="n">size</span><span class="p">);</span> |
| <span class="w"> </span><span class="p">}</span> |
| |
| <span class="w"> </span><span class="nd">@Override</span> |
| <span class="w"> </span><span class="kd">public</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="nf">unreserve</span><span class="p">(</span><span class="kt">long</span><span class="w"> </span><span class="n">size</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="n">reserved</span><span class="p">.</span><span class="na">getAndAdd</span><span class="p">(</span><span class="o">-</span><span class="n">size</span><span class="p">);</span> |
| <span class="w"> </span><span class="p">}</span> |
| <span class="p">};</span> |
| <span class="n">NativeMemoryPool</span><span class="w"> </span><span class="n">pool</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">NativeMemoryPool</span><span class="p">.</span><span class="na">createListenable</span><span class="p">(</span><span class="n">listener</span><span class="p">);</span> |
| <span class="n">FileSystemDatasetFactory</span><span class="w"> </span><span class="n">factory</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileSystemDatasetFactory</span><span class="p">(</span><span class="n">allocator</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">pool</span><span class="p">,</span><span class="w"> </span><span class="n">FileFormat</span><span class="p">.</span><span class="na">PARQUET</span><span class="p">,</span><span class="w"> </span><span class="n">uri</span><span class="p">);</span> |
| </pre></div> |
| </div> |
| <p>Also, it’s a very common case to reserve the same amount of JVM direct memory |
| for the data read from datasets. For this use a built-in utility |
| class <code class="docutils literal notranslate"><span class="pre">DirectReservationListener</span></code> is provided:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">NativeMemoryPool</span><span class="w"> </span><span class="n">pool</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">NativeMemoryPool</span><span class="p">.</span><span class="na">createListenable</span><span class="p">(</span> |
| <span class="w"> </span><span class="n">DirectReservationListener</span><span class="p">.</span><span class="na">instance</span><span class="p">());</span> |
| </pre></div> |
| </div> |
| <p>This way, once the allocated byte count of Arrow buffers reaches the limit of |
| JVM direct memory, <code class="docutils literal notranslate"><span class="pre">OutOfMemoryError:</span> <span class="pre">Direct</span> <span class="pre">buffer</span> <span class="pre">memory</span></code> will |
| be thrown during scanning.</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>The default instance <code class="docutils literal notranslate"><span class="pre">NativeMemoryPool.getDefaultMemoryPool()</span></code> does |
| nothing on buffer allocation/deallocation. It’s OK to use it in |
| the case of POC or testing, but for production use in complex environment, |
| it’s recommended to manage memory by using a listenable memory pool.</p> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>The <code class="docutils literal notranslate"><span class="pre">BufferAllocator</span></code> instance passed to <code class="docutils literal notranslate"><span class="pre">FileSystemDatasetFactory</span></code>’s |
| constructor is also aware of the overall memory usage of the produced |
| dataset instances. Once the Java buffers are created the passed allocator |
| will become their parent allocator.</p> |
| </div> |
| </section> |
| <section id="usage-notes"> |
| <h2>Usage Notes<a class="headerlink" href="#usage-notes" title="Link to this heading">¶</a></h2> |
| <section id="native-object-resource-management"> |
| <h3>Native Object Resource Management<a class="headerlink" href="#native-object-resource-management" title="Link to this heading">¶</a></h3> |
| <p>As another result of relying on JNI, all components related to |
| <code class="docutils literal notranslate"><span class="pre">FileSystemDataset</span></code> should be closed manually or use try-with-resources to |
| release the corresponding native objects after using. For example:</p> |
| <div class="highlight-Java notranslate"><div class="highlight"><pre><span></span><span class="n">String</span><span class="w"> </span><span class="n">uri</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"file:/opt/example.parquet"</span><span class="p">;</span> |
| <span class="n">ScanOptions</span><span class="w"> </span><span class="n">options</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ScanOptions</span><span class="p">(</span><span class="cm">/*batchSize*/</span><span class="w"> </span><span class="mi">32768</span><span class="p">);</span> |
| <span class="k">try</span><span class="w"> </span><span class="p">(</span> |
| <span class="w"> </span><span class="n">BufferAllocator</span><span class="w"> </span><span class="n">allocator</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">RootAllocator</span><span class="p">();</span> |
| <span class="w"> </span><span class="n">DatasetFactory</span><span class="w"> </span><span class="n">factory</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">FileSystemDatasetFactory</span><span class="p">(</span> |
| <span class="w"> </span><span class="n">allocator</span><span class="p">,</span><span class="w"> </span><span class="n">NativeMemoryPool</span><span class="p">.</span><span class="na">getDefault</span><span class="p">(),</span> |
| <span class="w"> </span><span class="n">FileFormat</span><span class="p">.</span><span class="na">PARQUET</span><span class="p">,</span><span class="w"> </span><span class="n">uri</span><span class="p">);</span> |
| <span class="w"> </span><span class="n">Dataset</span><span class="w"> </span><span class="n">dataset</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">factory</span><span class="p">.</span><span class="na">finish</span><span class="p">();</span> |
| <span class="w"> </span><span class="n">Scanner</span><span class="w"> </span><span class="n">scanner</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">dataset</span><span class="p">.</span><span class="na">newScan</span><span class="p">(</span><span class="n">options</span><span class="p">)</span> |
| <span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| |
| <span class="w"> </span><span class="c1">// do something</span> |
| |
| <span class="p">}</span><span class="w"> </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="n">Exception</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="n">e</span><span class="p">.</span><span class="na">printStackTrace</span><span class="p">();</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>If user forgets to close them then native object leakage might be caused.</p> |
| </section> |
| <section id="batchsize"> |
| <h3>BatchSize<a class="headerlink" href="#batchsize" title="Link to this heading">¶</a></h3> |
| <p>The <code class="docutils literal notranslate"><span class="pre">batchSize</span></code> argument of <code class="docutils literal notranslate"><span class="pre">ScanOptions</span></code> is a limit on the size of an individual batch.</p> |
| <p>For example, let’s try to read a Parquet file with gzip compression and 3 row groups:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span># Let configure ScanOptions as: |
| ScanOptions options = new ScanOptions(/*batchSize*/ 32768); |
| |
| $ parquet-tools meta data4_3rg_gzip.parquet |
| file schema: schema |
| age: OPTIONAL INT64 R:0 D:1 |
| name: OPTIONAL BINARY L:STRING R:0 D:1 |
| row group 1: RC:4 TS:182 OFFSET:4 |
| row group 2: RC:4 TS:190 OFFSET:420 |
| row group 3: RC:3 TS:179 OFFSET:838 |
| </pre></div> |
| </div> |
| <p>Here, we set the batchSize in ScanOptions to 32768. Because that’s greater |
| than the number of rows in the next batch, which is 4 rows because the first |
| row group has only 4 rows, then the program gets only 4 rows. The scanner |
| will not combine smaller batches to reach the limit, but it will split |
| large batches to stay under the limit. So in the case the row group had more |
| than 32768 rows, it would get split into blocks of 32768 rows or less.</p> |
| </section> |
| </section> |
| </section> |
| |
| </article> |
| </div> |
| <footer> |
| |
| <div class="related-pages"> |
| <a class="next-page" href="substrait.html"> |
| <div class="page-info"> |
| <div class="context"> |
| <span>Next</span> |
| </div> |
| <div class="title">Substrait</div> |
| </div> |
| <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg> |
| </a> |
| <a class="prev-page" href="flight_sql_jdbc_driver.html"> |
| <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg> |
| <div class="page-info"> |
| <div class="context"> |
| <span>Previous</span> |
| </div> |
| |
| <div class="title">Arrow Flight SQL JDBC Driver</div> |
| |
| </div> |
| </a> |
| </div> |
| <div class="bottom-of-page"> |
| <div class="left-details"> |
| <div class="copyright"> |
| Copyright © 2025, Apache Arrow Developers |
| </div> |
| Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s |
| |
| <a href="https://github.com/pradyunsg/furo">Furo</a> |
| |
| </div> |
| <div class="right-details"> |
| |
| </div> |
| </div> |
| |
| </footer> |
| </div> |
| <aside class="toc-drawer"> |
| |
| |
| <div class="toc-sticky toc-scroll"> |
| <div class="toc-title-container"> |
| <span class="toc-title"> |
| On this page |
| </span> |
| </div> |
| <div class="toc-tree-container"> |
| <div class="toc-tree"> |
| <ul> |
| <li><a class="reference internal" href="#">Dataset</a><ul> |
| <li><a class="reference internal" href="#getting-started">Getting Started</a></li> |
| <li><a class="reference internal" href="#schema">Schema</a></li> |
| <li><a class="reference internal" href="#projection-subset-of-columns">Projection (Subset of Columns)</a></li> |
| <li><a class="reference internal" href="#projection-produce-new-columns-and-filters">Projection (Produce New Columns) and Filters</a></li> |
| <li><a class="reference internal" href="#read-data-from-hdfs">Read Data from HDFS</a></li> |
| <li><a class="reference internal" href="#native-memory-management">Native Memory Management</a></li> |
| <li><a class="reference internal" href="#usage-notes">Usage Notes</a><ul> |
| <li><a class="reference internal" href="#native-object-resource-management">Native Object Resource Management</a></li> |
| <li><a class="reference internal" href="#batchsize">BatchSize</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| |
| </div> |
| </div> |
| </div> |
| |
| |
| </aside> |
| </div> |
| </div><script src="_static/documentation_options.js?v=c4c92189"></script> |
| <script src="_static/doctools.js?v=9bcbadda"></script> |
| <script src="_static/sphinx_highlight.js?v=dc90522c"></script> |
| <script src="_static/scripts/furo.js?v=5fa4622c"></script> |
| </body> |
| </html> |