| <!DOCTYPE html> |
| |
| <html lang="en" data-content_root="../../"> |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <title>User-Defined Functions — Apache Arrow DataFusion documentation</title> |
| |
| <link href="../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| <link href="../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| |
| |
| <link rel="stylesheet" |
| href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| |
| |
| <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" /> |
| <link rel="stylesheet" type="text/css" href="../../_static/styles/pydata-sphinx-theme.css?v=1140d252" /> |
| <link rel="stylesheet" type="text/css" href="../../_static/graphviz.css?v=4ae1632d" /> |
| <link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css?v=dca7052a" /> |
| |
| <link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"> |
| |
| <script src="../../_static/documentation_options.js?v=8a448e45"></script> |
| <script src="../../_static/doctools.js?v=9bcbadda"></script> |
| <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script> |
| <link rel="index" title="Index" href="../../genindex.html" /> |
| <link rel="search" title="Search" href="../../search.html" /> |
| <link rel="next" title="IO" href="../io/index.html" /> |
| <link rel="prev" title="Window Functions" href="windows.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="en"> |
| |
| |
| <!-- Google Analytics --> |
| |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <div class="container-fluid" id="banner"></div> |
| |
| |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| |
| <!-- Only show if we have sidebars configured, else just a small margin --> |
| <div class="col-12 col-md-3 bd-sidebar"> |
| <div class="sidebar-start-items"> |
| <a class="navbar-brand" href="../../index.html"> |
| <img src="../../_static/images/2x_bgwhite_original.png" class="logo" alt="logo"> |
| </a> |
| |
| <form class="bd-search d-flex align-items-center" action="../../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form> |
| |
| <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| <div class="bd-toc-item active"> |
| |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| LINKS |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/datafusion-python"> |
| Github and Issue Tracker |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/"> |
| Rust's API Docs |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md"> |
| Code of conduct |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/datafusion-python/tree/main/examples"> |
| Examples |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| USER GUIDE |
| </span> |
| </p> |
| <ul class="current nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../introduction.html"> |
| Introduction |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../basics.html"> |
| Concepts |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../data-sources.html"> |
| Data Sources |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../dataframe/index.html"> |
| DataFrames |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/> |
| <label for="toctree-checkbox-1"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../dataframe/rendering.html"> |
| HTML Rendering in Jupyter |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1 current active has-children"> |
| <a class="reference internal" href="index.html"> |
| Common Operations |
| </a> |
| <input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/> |
| <label for="toctree-checkbox-2"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul class="current"> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="views.html"> |
| Registering Views |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="basic-info.html"> |
| Basic Operations |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="select-and-filter.html"> |
| Column Selections |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="expressions.html"> |
| Expressions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="joins.html"> |
| Joins |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="functions.html"> |
| Functions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="aggregations.html"> |
| Aggregation |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="windows.html"> |
| Window Functions |
| </a> |
| </li> |
| <li class="toctree-l2 current active"> |
| <a class="current reference internal" href="#"> |
| User-Defined Functions |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../io/index.html"> |
| IO |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/> |
| <label for="toctree-checkbox-3"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../io/arrow.html"> |
| Arrow |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../io/avro.html"> |
| Avro |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../io/csv.html"> |
| CSV |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../io/json.html"> |
| JSON |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../io/parquet.html"> |
| Parquet |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../io/table_provider.html"> |
| Custom Table Provider |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../configuration.html"> |
| Configuration |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../sql.html"> |
| SQL |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| CONTRIBUTOR GUIDE |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../contributor-guide/introduction.html"> |
| Introduction |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../contributor-guide/ffi.html"> |
| Python Extensions |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| API |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../../autoapi/index.html"> |
| API Reference |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/> |
| <label for="toctree-checkbox-4"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2 has-children"> |
| <a class="reference internal" href="../../autoapi/datafusion/index.html"> |
| datafusion |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/> |
| <label for="toctree-checkbox-5"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/catalog/index.html"> |
| datafusion.catalog |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/context/index.html"> |
| datafusion.context |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/dataframe/index.html"> |
| datafusion.dataframe |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/dataframe_formatter/index.html"> |
| datafusion.dataframe_formatter |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/expr/index.html"> |
| datafusion.expr |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/functions/index.html"> |
| datafusion.functions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/html_formatter/index.html"> |
| datafusion.html_formatter |
| </a> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../../autoapi/datafusion/input/index.html"> |
| datafusion.input |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/> |
| <label for="toctree-checkbox-6"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../../autoapi/datafusion/input/base/index.html"> |
| datafusion.input.base |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../../autoapi/datafusion/input/location/index.html"> |
| datafusion.input.location |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/io/index.html"> |
| datafusion.io |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/object_store/index.html"> |
| datafusion.object_store |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/plan/index.html"> |
| datafusion.plan |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/record_batch/index.html"> |
| datafusion.record_batch |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/substrait/index.html"> |
| datafusion.substrait |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/unparser/index.html"> |
| datafusion.unparser |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html"> |
| datafusion.user_defined |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| |
| |
| </div> |
| </nav> |
| </div> |
| <div class="sidebar-end-items"> |
| </div> |
| </div> |
| |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| |
| <div class="toc-item"> |
| |
| <div class="tocsection onthispage pt-5 pb-3"> |
| <i class="fas fa-list"></i> On this page |
| </div> |
| |
| <nav id="bd-toc-nav"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#scalar-functions"> |
| Scalar Functions |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#aggregate-functions"> |
| Aggregate Functions |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#window-functions"> |
| Window Functions |
| </a> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#udwf-options"> |
| UDWF options |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#table-functions"> |
| Table Functions |
| </a> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| |
| <div class="toc-item"> |
| |
| </div> |
| |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <section id="user-defined-functions"> |
| <h1>User-Defined Functions<a class="headerlink" href="#user-defined-functions" title="Link to this heading">ΒΆ</a></h1> |
| <p>DataFusion provides powerful expressions and functions, reducing the need for custom Python |
| functions. However you can still incorporate your own functions, i.e. User-Defined Functions (UDFs).</p> |
| <section id="scalar-functions"> |
| <h2>Scalar Functions<a class="headerlink" href="#scalar-functions" title="Link to this heading">ΒΆ</a></h2> |
| <p>When writing a user-defined function that can operate on a row by row basis, these are called Scalar |
| Functions. You can define your own scalar function by calling |
| <a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html#datafusion.user_defined.ScalarUDF.udf" title="datafusion.user_defined.ScalarUDF.udf"><code class="xref py py-func docutils literal notranslate"><span class="pre">udf()</span></code></a> .</p> |
| <p>The basic definition of a scalar UDF is a python function that takes one or more |
| <a class="reference external" href="https://arrow.apache.org/docs/python/index.html">pyarrow</a> arrays and returns a single array as |
| output. DataFusion scalar UDFs operate on an entire batch of records at a time, though the |
| evaluation of those records should be on a row by row basis. In the following example, we compute |
| if the input array contains null values.</p> |
| <div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">1</span><span class="p">]:</span> <span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">2</span><span class="p">]:</span> <span class="kn">import</span><span class="w"> </span><span class="nn">datafusion</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">3</span><span class="p">]:</span> <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">udf</span><span class="p">,</span> <span class="n">col</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">4</span><span class="p">]:</span> <span class="k">def</span><span class="w"> </span><span class="nf">is_null</span><span class="p">(</span><span class="n">array</span><span class="p">:</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">)</span> <span class="o">-></span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">:</span> |
| <span class="o">...</span><span class="p">:</span> <span class="k">return</span> <span class="n">array</span><span class="o">.</span><span class="n">is_null</span><span class="p">()</span> |
| <span class="o">...</span><span class="p">:</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">5</span><span class="p">]:</span> <span class="n">is_null_arr</span> <span class="o">=</span> <span class="n">udf</span><span class="p">(</span><span class="n">is_null</span><span class="p">,</span> <span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">int64</span><span class="p">()],</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">bool_</span><span class="p">(),</span> <span class="s1">'stable'</span><span class="p">)</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">6</span><span class="p">]:</span> <span class="n">ctx</span> <span class="o">=</span> <span class="n">datafusion</span><span class="o">.</span><span class="n">SessionContext</span><span class="p">()</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">7</span><span class="p">]:</span> <span class="n">batch</span> <span class="o">=</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">RecordBatch</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">(</span> |
| <span class="o">...</span><span class="p">:</span> <span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">3</span><span class="p">]),</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">])],</span> |
| <span class="o">...</span><span class="p">:</span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s2">"a"</span><span class="p">,</span> <span class="s2">"b"</span><span class="p">],</span> |
| <span class="o">...</span><span class="p">:</span> <span class="p">)</span> |
| <span class="o">...</span><span class="p">:</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">8</span><span class="p">]:</span> <span class="n">df</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">create_dataframe</span><span class="p">([[</span><span class="n">batch</span><span class="p">]],</span> <span class="n">name</span><span class="o">=</span><span class="s2">"batch_array"</span><span class="p">)</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">9</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">),</span> <span class="n">is_null_arr</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"is_null"</span><span class="p">))</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> |
| <span class="n">DataFrame</span><span class="p">()</span> |
| <span class="o">+---+---------+</span> |
| <span class="o">|</span> <span class="n">a</span> <span class="o">|</span> <span class="n">is_null</span> <span class="o">|</span> |
| <span class="o">+---+---------+</span> |
| <span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">false</span> <span class="o">|</span> |
| <span class="o">|</span> <span class="o">|</span> <span class="n">true</span> <span class="o">|</span> |
| <span class="o">|</span> <span class="mi">3</span> <span class="o">|</span> <span class="n">false</span> <span class="o">|</span> |
| <span class="o">+---+---------+</span> |
| </pre></div> |
| </div> |
| <p>In the previous example, we used the fact that pyarrow provides a variety of built in array |
| functions such as <code class="docutils literal notranslate"><span class="pre">is_null()</span></code>. There are additional pyarrow |
| <a class="reference external" href="https://arrow.apache.org/docs/python/compute.html">compute functions</a> available. When possible, |
| it is highly recommended to use these functions because they can perform computations without doing |
| any copy operations from the original arrays. This leads to greatly improved performance.</p> |
| <p>If you need to perform an operation in python that is not available with the pyarrow compute |
| functions, you will need to convert the record batch into python values, perform your operation, |
| and construct an array. This operation of converting the built in data type of the array into a |
| python object can be one of the slowest operations in DataFusion, so it should be done sparingly.</p> |
| <p>The following example performs the same operation as before with <code class="docutils literal notranslate"><span class="pre">is_null</span></code> but demonstrates |
| converting to Python objects to do the evaluation.</p> |
| <div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">10</span><span class="p">]:</span> <span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">11</span><span class="p">]:</span> <span class="kn">import</span><span class="w"> </span><span class="nn">datafusion</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">12</span><span class="p">]:</span> <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">udf</span><span class="p">,</span> <span class="n">col</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">13</span><span class="p">]:</span> <span class="k">def</span><span class="w"> </span><span class="nf">is_null</span><span class="p">(</span><span class="n">array</span><span class="p">:</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">)</span> <span class="o">-></span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">:</span> |
| <span class="o">....</span><span class="p">:</span> <span class="k">return</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">value</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">array</span><span class="p">])</span> |
| <span class="o">....</span><span class="p">:</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">14</span><span class="p">]:</span> <span class="n">is_null_arr</span> <span class="o">=</span> <span class="n">udf</span><span class="p">(</span><span class="n">is_null</span><span class="p">,</span> <span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">int64</span><span class="p">()],</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">bool_</span><span class="p">(),</span> <span class="s1">'stable'</span><span class="p">)</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">15</span><span class="p">]:</span> <span class="n">ctx</span> <span class="o">=</span> <span class="n">datafusion</span><span class="o">.</span><span class="n">SessionContext</span><span class="p">()</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">16</span><span class="p">]:</span> <span class="n">batch</span> <span class="o">=</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">RecordBatch</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">(</span> |
| <span class="o">....</span><span class="p">:</span> <span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">3</span><span class="p">]),</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">])],</span> |
| <span class="o">....</span><span class="p">:</span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s2">"a"</span><span class="p">,</span> <span class="s2">"b"</span><span class="p">],</span> |
| <span class="o">....</span><span class="p">:</span> <span class="p">)</span> |
| <span class="o">....</span><span class="p">:</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">17</span><span class="p">]:</span> <span class="n">df</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">create_dataframe</span><span class="p">([[</span><span class="n">batch</span><span class="p">]],</span> <span class="n">name</span><span class="o">=</span><span class="s2">"batch_array"</span><span class="p">)</span> |
| |
| <span class="n">In</span> <span class="p">[</span><span class="mi">18</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">),</span> <span class="n">is_null_arr</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"is_null"</span><span class="p">))</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> |
| <span class="n">DataFrame</span><span class="p">()</span> |
| <span class="o">+---+---------+</span> |
| <span class="o">|</span> <span class="n">a</span> <span class="o">|</span> <span class="n">is_null</span> <span class="o">|</span> |
| <span class="o">+---+---------+</span> |
| <span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">false</span> <span class="o">|</span> |
| <span class="o">|</span> <span class="o">|</span> <span class="n">true</span> <span class="o">|</span> |
| <span class="o">|</span> <span class="mi">3</span> <span class="o">|</span> <span class="n">false</span> <span class="o">|</span> |
| <span class="o">+---+---------+</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="aggregate-functions"> |
| <h2>Aggregate Functions<a class="headerlink" href="#aggregate-functions" title="Link to this heading">ΒΆ</a></h2> |
| <p>The <a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html#datafusion.user_defined.AggregateUDF.udaf" title="datafusion.user_defined.AggregateUDF.udaf"><code class="xref py py-func docutils literal notranslate"><span class="pre">udaf()</span></code></a> function allows you to define User-Defined |
| Aggregate Functions (UDAFs). To use this you must implement an |
| <a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html#datafusion.user_defined.Accumulator" title="datafusion.user_defined.Accumulator"><code class="xref py py-class docutils literal notranslate"><span class="pre">Accumulator</span></code></a> that determines how the aggregation is performed.</p> |
| <p>When defining a UDAF there are four methods you need to implement. The <code class="docutils literal notranslate"><span class="pre">update</span></code> function takes the |
| array(s) of input and updates the internal state of the accumulator. You should define this function |
| to have as many input arguments as you will pass when calling the UDAF. Since aggregation may be |
| split into multiple batches, we must have a method to combine multiple batches. For this, we have |
| two functions, <code class="docutils literal notranslate"><span class="pre">state</span></code> and <code class="docutils literal notranslate"><span class="pre">merge</span></code>. <code class="docutils literal notranslate"><span class="pre">state</span></code> will return an array of scalar values that contain |
| the current state of a single batch accumulation. Then we must <code class="docutils literal notranslate"><span class="pre">merge</span></code> the results of these |
| different states. Finally <code class="docutils literal notranslate"><span class="pre">evaluate</span></code> is the call that will return the final result after the |
| <code class="docutils literal notranslate"><span class="pre">merge</span></code> is complete.</p> |
| <p>In the following example we want to define a custom aggregate function that will return the |
| difference between the sum of two columns. The state can be represented by a single value and we can |
| also see how the inputs to <code class="docutils literal notranslate"><span class="pre">update</span></code> and <code class="docutils literal notranslate"><span class="pre">merge</span></code> differ.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow.compute</span> |
| <span class="kn">import</span><span class="w"> </span><span class="nn">datafusion</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">udaf</span><span class="p">,</span> <span class="n">Accumulator</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">List</span> |
| |
| <span class="k">class</span><span class="w"> </span><span class="nc">MyAccumulator</span><span class="p">(</span><span class="n">Accumulator</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Interface of a user-defined accumulation.</span> |
| <span class="sd"> """</span> |
| <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_sum</span> <span class="o">=</span> <span class="mf">0.0</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">update</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">values_a</span><span class="p">:</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">,</span> <span class="n">values_b</span><span class="p">:</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_sum</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sum</span> <span class="o">+</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">compute</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">values_a</span><span class="p">)</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> <span class="o">-</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">compute</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">values_b</span><span class="p">)</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">states</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_sum</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sum</span> <span class="o">+</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">compute</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">states</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">state</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Array</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">_sum</span><span class="p">])</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">evaluate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">Scalar</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">scalar</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_sum</span><span class="p">)</span> |
| |
| <span class="n">ctx</span> <span class="o">=</span> <span class="n">datafusion</span><span class="o">.</span><span class="n">SessionContext</span><span class="p">()</span> |
| <span class="n">df</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">from_pydict</span><span class="p">(</span> |
| <span class="p">{</span> |
| <span class="s2">"a"</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> |
| <span class="s2">"b"</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> |
| <span class="p">}</span> |
| <span class="p">)</span> |
| |
| <span class="n">my_udaf</span> <span class="o">=</span> <span class="n">udaf</span><span class="p">(</span><span class="n">MyAccumulator</span><span class="p">,</span> <span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">float64</span><span class="p">(),</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">float64</span><span class="p">()],</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">float64</span><span class="p">(),</span> <span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">float64</span><span class="p">()],</span> <span class="s1">'stable'</span><span class="p">)</span> |
| |
| <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([],</span> <span class="p">[</span><span class="n">my_udaf</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">),</span> <span class="n">col</span><span class="p">(</span><span class="s2">"b"</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"col_diff"</span><span class="p">)])</span> |
| </pre></div> |
| </div> |
| </section> |
| <section id="window-functions"> |
| <h2>Window Functions<a class="headerlink" href="#window-functions" title="Link to this heading">ΒΆ</a></h2> |
| <p>To implement a User-Defined Window Function (UDWF) you must call the |
| <a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html#datafusion.user_defined.WindowUDF.udwf" title="datafusion.user_defined.WindowUDF.udwf"><code class="xref py py-func docutils literal notranslate"><span class="pre">udwf()</span></code></a> function using a class that implements the abstract |
| class <a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html#datafusion.user_defined.WindowEvaluator" title="datafusion.user_defined.WindowEvaluator"><code class="xref py py-class docutils literal notranslate"><span class="pre">WindowEvaluator</span></code></a>.</p> |
| <p>There are three methods of evaluation of UDWFs.</p> |
| <ul class="simple"> |
| <li><p><code class="docutils literal notranslate"><span class="pre">evaluate</span></code> is the simplest case, where you are given an array and are expected to calculate the |
| value for a single row of that array. This is the simplest case, but also the least performant.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">evaluate_all</span></code> computes the values for all rows for an input array at a single time.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">evaluate_all_with_rank</span></code> computes the values for all rows, but you only have the rank |
| information for the rows.</p></li> |
| </ul> |
| <p>Which methods you implement are based upon which of these options are set.</p> |
| <table class="table"> |
| <thead> |
| <tr class="row-odd"><th class="head"><p><code class="docutils literal notranslate"><span class="pre">uses_window_frame</span></code></p></th> |
| <th class="head"><p><code class="docutils literal notranslate"><span class="pre">supports_bounded_execution</span></code></p></th> |
| <th class="head"><p><code class="docutils literal notranslate"><span class="pre">include_rank</span></code></p></th> |
| <th class="head"><p>function_to_implement</p></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="row-even"><td><p>False (default)</p></td> |
| <td><p>False (default)</p></td> |
| <td><p>False (default)</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">evaluate_all</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>False</p></td> |
| <td><p>True</p></td> |
| <td><p>False</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">evaluate</span></code></p></td> |
| </tr> |
| <tr class="row-even"><td><p>False</p></td> |
| <td><p>True</p></td> |
| <td><p>False</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">evaluate_all_with_rank</span></code></p></td> |
| </tr> |
| <tr class="row-odd"><td><p>True</p></td> |
| <td><p>True/False</p></td> |
| <td><p>True/False</p></td> |
| <td><p><code class="docutils literal notranslate"><span class="pre">evaluate</span></code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| <section id="udwf-options"> |
| <h3>UDWF options<a class="headerlink" href="#udwf-options" title="Link to this heading">ΒΆ</a></h3> |
| <p>When you define your UDWF you can override the functions that return these values. They will |
| determine which evaluate functions are called.</p> |
| <ul class="simple"> |
| <li><p><code class="docutils literal notranslate"><span class="pre">uses_window_frame</span></code> is set for functions that compute based on the specified window frame. If |
| your function depends upon the specified frame, set this to <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">supports_bounded_execution</span></code> specifies if your function can be incrementally computed.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">include_rank</span></code> is set to <code class="docutils literal notranslate"><span class="pre">True</span></code> for window functions that can be computed only using the rank |
| information.</p></li> |
| </ul> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">udwf</span><span class="p">,</span> <span class="n">col</span><span class="p">,</span> <span class="n">SessionContext</span> |
| <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion.user_defined</span><span class="w"> </span><span class="kn">import</span> <span class="n">WindowEvaluator</span> |
| |
| <span class="k">class</span><span class="w"> </span><span class="nc">ExponentialSmooth</span><span class="p">(</span><span class="n">WindowEvaluator</span><span class="p">):</span> |
| <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">alpha</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">alpha</span> <span class="o">=</span> <span class="n">alpha</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">evaluate_all</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">values</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">],</span> <span class="n">num_rows</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">:</span> |
| <span class="n">results</span> <span class="o">=</span> <span class="p">[]</span> |
| <span class="n">curr_value</span> <span class="o">=</span> <span class="mf">0.0</span> |
| <span class="n">values</span> <span class="o">=</span> <span class="n">values</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <span class="k">for</span> <span class="n">idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_rows</span><span class="p">):</span> |
| <span class="k">if</span> <span class="n">idx</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> |
| <span class="n">curr_value</span> <span class="o">=</span> <span class="n">values</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">curr_value</span> <span class="o">=</span> <span class="n">values</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span><span class="o">.</span><span class="n">as_py</span><span class="p">()</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">alpha</span> <span class="o">+</span> <span class="n">curr_value</span> <span class="o">*</span> <span class="p">(</span> |
| <span class="mf">1.0</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">alpha</span> |
| <span class="p">)</span> |
| <span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">curr_value</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> |
| |
| <span class="n">exp_smooth</span> <span class="o">=</span> <span class="n">udwf</span><span class="p">(</span> |
| <span class="n">ExponentialSmooth</span><span class="p">(</span><span class="mf">0.9</span><span class="p">),</span> |
| <span class="n">pa</span><span class="o">.</span><span class="n">float64</span><span class="p">(),</span> |
| <span class="n">pa</span><span class="o">.</span><span class="n">float64</span><span class="p">(),</span> |
| <span class="n">volatility</span><span class="o">=</span><span class="s2">"immutable"</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="n">ctx</span> <span class="o">=</span> <span class="n">SessionContext</span><span class="p">()</span> |
| |
| <span class="n">df</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">from_pydict</span><span class="p">({</span> |
| <span class="s2">"a"</span><span class="p">:</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.1</span><span class="p">,</span> <span class="mf">2.9</span><span class="p">,</span> <span class="mf">4.0</span><span class="p">,</span> <span class="mf">5.1</span><span class="p">,</span> <span class="mf">6.0</span><span class="p">,</span> <span class="mf">6.9</span><span class="p">,</span> <span class="mf">8.0</span><span class="p">]</span> |
| <span class="p">})</span> |
| |
| <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"a"</span><span class="p">,</span> <span class="n">exp_smooth</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"smooth_a"</span><span class="p">))</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> |
| </pre></div> |
| </div> |
| </section> |
| </section> |
| <section id="table-functions"> |
| <h2>Table Functions<a class="headerlink" href="#table-functions" title="Link to this heading">ΒΆ</a></h2> |
| <p>User Defined Table Functions are slightly different than the other functions |
| described here. These functions take any number of <cite>Expr</cite> arguments, but only |
| literal expressions are supported. Table functions must return a Table |
| Provider as described in the ref:<cite>_io_custom_table_provider</cite> page.</p> |
| <p>Once you have a table function, you can register it with the session context |
| by using <a class="reference internal" href="../../autoapi/datafusion/context/index.html#datafusion.context.SessionContext.register_udtf" title="datafusion.context.SessionContext.register_udtf"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.context.SessionContext.register_udtf()</span></code></a>.</p> |
| <p>There are examples of both rust backed and python based table functions in the |
| examples folder of the repository. If you have a rust backed table function |
| that you wish to expose via PyO3, you need to expose it as a <code class="docutils literal notranslate"><span class="pre">PyCapsule</span></code>.</p> |
| <div class="highlight-rust notranslate"><div class="highlight"><pre><span></span><span class="cp">#[pymethods]</span> |
| <span class="k">impl</span><span class="w"> </span><span class="n">MyTableFunction</span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">__datafusion_table_function__</span><span class="o"><'</span><span class="na">py</span><span class="o">></span><span class="p">(</span> |
| <span class="w"> </span><span class="o">&</span><span class="bp">self</span><span class="p">,</span> |
| <span class="w"> </span><span class="n">py</span><span class="p">:</span><span class="w"> </span><span class="nc">Python</span><span class="o"><'</span><span class="na">py</span><span class="o">></span><span class="p">,</span> |
| <span class="w"> </span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">PyResult</span><span class="o"><</span><span class="n">Bound</span><span class="o"><'</span><span class="na">py</span><span class="p">,</span><span class="w"> </span><span class="n">PyCapsule</span><span class="o">>></span><span class="w"> </span><span class="p">{</span> |
| <span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cr</span><span class="s">"datafusion_table_function"</span><span class="p">.</span><span class="n">into</span><span class="p">();</span> |
| |
| <span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">func</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">clone</span><span class="p">();</span> |
| <span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">provider</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">FFI_TableFunction</span><span class="p">::</span><span class="n">new</span><span class="p">(</span><span class="n">Arc</span><span class="p">::</span><span class="n">new</span><span class="p">(</span><span class="n">func</span><span class="p">),</span><span class="w"> </span><span class="nb">None</span><span class="p">);</span> |
| |
| <span class="w"> </span><span class="n">PyCapsule</span><span class="p">::</span><span class="n">new</span><span class="p">(</span><span class="n">py</span><span class="p">,</span><span class="w"> </span><span class="n">provider</span><span class="p">,</span><span class="w"> </span><span class="nb">Some</span><span class="p">(</span><span class="n">name</span><span class="p">))</span> |
| <span class="w"> </span><span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| </section> |
| </section> |
| |
| |
| </div> |
| |
| |
| <!-- Previous / next buttons --> |
| <div class='prev-next-area'> |
| <a class='left-prev' id="prev-link" href="windows.html" title="previous page"> |
| <i class="fas fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">Window Functions</p> |
| </div> |
| </a> |
| <a class='right-next' id="next-link" href="../io/index.html" title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">IO</p> |
| </div> |
| <i class="fas fa-angle-right"></i> |
| </a> |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| <script src="../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script> |
| |
| <!-- Based on pydata_sphinx_theme/footer.html --> |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| |
| <div class="footer-item"> |
| <p class="copyright"> |
| © Copyright 2019-2024, Apache Software Foundation.<br> |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 8.1.3.<br> |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| <p>Apache Arrow DataFusion, Arrow DataFusion, Apache, the Apache feather logo, and the Apache Arrow DataFusion project logo</p> |
| <p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p> |
| </div> |
| </div> |
| </footer> |
| |
| |
| </body> |
| </html> |