| <!DOCTYPE html> |
| |
| <html lang="en" data-content_root="../../../"> |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <title>datafusion.dataframe — Apache Arrow DataFusion documentation</title> |
| |
| <link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet"> |
| |
| |
| <link rel="stylesheet" |
| href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| |
| |
| <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/styles/pydata-sphinx-theme.css?v=1140d252" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/graphviz.css?v=4ae1632d" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/theme_overrides.css?v=dca7052a" /> |
| |
| <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"> |
| |
| <script src="../../../_static/documentation_options.js?v=8a448e45"></script> |
| <script src="../../../_static/doctools.js?v=9bcbadda"></script> |
| <script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script> |
| <link rel="index" title="Index" href="../../../genindex.html" /> |
| <link rel="search" title="Search" href="../../../search.html" /> |
| <link rel="next" title="datafusion.dataframe_formatter" href="../dataframe_formatter/index.html" /> |
| <link rel="prev" title="datafusion.context" href="../context/index.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="en"> |
| |
| |
| <!-- Google Analytics --> |
| |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <div class="container-fluid" id="banner"></div> |
| |
| |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| |
| <!-- Only show if we have sidebars configured, else just a small margin --> |
| <div class="col-12 col-md-3 bd-sidebar"> |
| <div class="sidebar-start-items"> |
| <a class="navbar-brand" href="../../../index.html"> |
| <img src="../../../_static/images/2x_bgwhite_original.png" class="logo" alt="logo"> |
| </a> |
| |
| <form class="bd-search d-flex align-items-center" action="../../../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form> |
| |
| <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| <div class="bd-toc-item active"> |
| |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| LINKS |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/datafusion-python"> |
| Github and Issue Tracker |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/"> |
| Rust's API Docs |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md"> |
| Code of conduct |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference external" href="https://github.com/apache/datafusion-python/tree/main/examples"> |
| Examples |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| USER GUIDE |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../../user-guide/introduction.html"> |
| Introduction |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../../user-guide/basics.html"> |
| Concepts |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../../user-guide/data-sources.html"> |
| Data Sources |
| </a> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../../../user-guide/dataframe/index.html"> |
| DataFrames |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/> |
| <label for="toctree-checkbox-1"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/dataframe/rendering.html"> |
| HTML Rendering in Jupyter |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../../../user-guide/common-operations/index.html"> |
| Common Operations |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/> |
| <label for="toctree-checkbox-2"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/views.html"> |
| Registering Views |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/basic-info.html"> |
| Basic Operations |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/select-and-filter.html"> |
| Column Selections |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/expressions.html"> |
| Expressions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/joins.html"> |
| Joins |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/functions.html"> |
| Functions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/aggregations.html"> |
| Aggregation |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/windows.html"> |
| Window Functions |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/common-operations/udf-and-udfa.html"> |
| User-Defined Functions |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1 has-children"> |
| <a class="reference internal" href="../../../user-guide/io/index.html"> |
| IO |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/> |
| <label for="toctree-checkbox-3"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/io/arrow.html"> |
| Arrow |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/io/avro.html"> |
| Avro |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/io/csv.html"> |
| CSV |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/io/json.html"> |
| JSON |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/io/parquet.html"> |
| Parquet |
| </a> |
| </li> |
| <li class="toctree-l2"> |
| <a class="reference internal" href="../../../user-guide/io/table_provider.html"> |
| Custom Table Provider |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../../user-guide/configuration.html"> |
| Configuration |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../../user-guide/sql.html"> |
| SQL |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| CONTRIBUTOR GUIDE |
| </span> |
| </p> |
| <ul class="nav bd-sidenav"> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../../contributor-guide/introduction.html"> |
| Introduction |
| </a> |
| </li> |
| <li class="toctree-l1"> |
| <a class="reference internal" href="../../../contributor-guide/ffi.html"> |
| Python Extensions |
| </a> |
| </li> |
| </ul> |
| <p aria-level="2" class="caption" role="heading"> |
| <span class="caption-text"> |
| API |
| </span> |
| </p> |
| <ul class="current nav bd-sidenav"> |
| <li class="toctree-l1 current active has-children"> |
| <a class="reference internal" href="../../index.html"> |
| API Reference |
| </a> |
| <input checked="" class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/> |
| <label for="toctree-checkbox-4"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul class="current"> |
| <li class="toctree-l2 current active has-children"> |
| <a class="reference internal" href="../index.html"> |
| datafusion |
| </a> |
| <input checked="" class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/> |
| <label for="toctree-checkbox-5"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul class="current"> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../catalog/index.html"> |
| datafusion.catalog |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../context/index.html"> |
| datafusion.context |
| </a> |
| </li> |
| <li class="toctree-l3 current active"> |
| <a class="current reference internal" href="#"> |
| datafusion.dataframe |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../dataframe_formatter/index.html"> |
| datafusion.dataframe_formatter |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../expr/index.html"> |
| datafusion.expr |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../functions/index.html"> |
| datafusion.functions |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../html_formatter/index.html"> |
| datafusion.html_formatter |
| </a> |
| </li> |
| <li class="toctree-l3 has-children"> |
| <a class="reference internal" href="../input/index.html"> |
| datafusion.input |
| </a> |
| <input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/> |
| <label for="toctree-checkbox-6"> |
| <i class="fas fa-chevron-down"> |
| </i> |
| </label> |
| <ul> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../input/base/index.html"> |
| datafusion.input.base |
| </a> |
| </li> |
| <li class="toctree-l4"> |
| <a class="reference internal" href="../input/location/index.html"> |
| datafusion.input.location |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../io/index.html"> |
| datafusion.io |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../object_store/index.html"> |
| datafusion.object_store |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../plan/index.html"> |
| datafusion.plan |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../record_batch/index.html"> |
| datafusion.record_batch |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../substrait/index.html"> |
| datafusion.substrait |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../unparser/index.html"> |
| datafusion.unparser |
| </a> |
| </li> |
| <li class="toctree-l3"> |
| <a class="reference internal" href="../user_defined/index.html"> |
| datafusion.user_defined |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| |
| |
| </div> |
| </nav> |
| </div> |
| <div class="sidebar-end-items"> |
| </div> |
| </div> |
| |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| |
| <div class="toc-item"> |
| |
| <div class="tocsection onthispage pt-5 pb-3"> |
| <i class="fas fa-list"></i> On this page |
| </div> |
| |
| <nav id="bd-toc-nav"> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#classes"> |
| Classes |
| </a> |
| </li> |
| <li class="toc-h2 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#module-contents"> |
| Module Contents |
| </a> |
| <ul class="visible nav section-nav flex-column"> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression |
| </span> |
| </code> |
| </a> |
| <ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.from_str"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.from_str() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.get_default_level"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.get_default_level() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.BROTLI"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.BROTLI |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.GZIP"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.GZIP |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.LZ4"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.LZ4 |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.LZ4_RAW"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.LZ4_RAW |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.SNAPPY"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.SNAPPY |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.UNCOMPRESSED"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.UNCOMPRESSED |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.Compression.ZSTD"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| Compression.ZSTD |
| </span> |
| </code> |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame |
| </span> |
| </code> |
| </a> |
| <ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__arrow_c_stream__"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.__arrow_c_stream__() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__getitem__"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.__getitem__() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.__repr__"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.__repr__() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame._repr_html_"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame._repr_html_() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.aggregate"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.aggregate() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.cache"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.cache() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.cast"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.cast() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.collect"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.collect() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.collect_partitioned"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.collect_partitioned() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.count"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.count() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.default_str_repr"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.default_str_repr() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.describe"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.describe() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.distinct"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.distinct() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.drop"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.drop() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.except_all"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.except_all() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execute_stream"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.execute_stream() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execute_stream_partitioned"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.execute_stream_partitioned() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.execution_plan"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.execution_plan() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.explain"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.explain() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.fill_null"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.fill_null() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.filter"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.filter() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.head"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.head() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.intersect"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.intersect() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.into_view"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.into_view() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.join"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.join() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.join_on"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.join_on() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.limit"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.limit() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.logical_plan"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.logical_plan() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.optimized_logical_plan"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.optimized_logical_plan() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.parse_sql_expr"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.parse_sql_expr() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.repartition"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.repartition() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.repartition_by_hash"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.repartition_by_hash() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.schema"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.schema() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.select"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.select() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.select_columns"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.select_columns() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.show"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.show() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.sort"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.sort() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.tail"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.tail() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_arrow_table"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.to_arrow_table() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pandas"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.to_pandas() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_polars"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.to_polars() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pydict"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.to_pydict() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.to_pylist"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.to_pylist() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.transform"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.transform() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.union"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.union() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.union_distinct"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.union_distinct() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.unnest_columns"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.unnest_columns() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_column"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.with_column() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_column_renamed"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.with_column_renamed() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.with_columns"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.with_columns() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_csv"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.write_csv() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_json"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.write_json() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_parquet"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.write_parquet() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_parquet_with_options"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.write_parquet_with_options() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.write_table"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.write_table() |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrame.df"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrame.df |
| </span> |
| </code> |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrameWriteOptions"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrameWriteOptions |
| </span> |
| </code> |
| </a> |
| <ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.DataFrameWriteOptions._raw_write_options"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| DataFrameWriteOptions._raw_write_options |
| </span> |
| </code> |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| InsertOp |
| </span> |
| </code> |
| </a> |
| <ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp.APPEND"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| InsertOp.APPEND |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp.OVERWRITE"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| InsertOp.OVERWRITE |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.InsertOp.REPLACE"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| InsertOp.REPLACE |
| </span> |
| </code> |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions |
| </span> |
| </code> |
| </a> |
| <ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions.bloom_filter_enabled |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions.bloom_filter_fpp |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions.bloom_filter_ndv |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.compression"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions.compression |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.dictionary_enabled"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions.dictionary_enabled |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.encoding"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions.encoding |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetColumnOptions.statistics_enabled"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetColumnOptions.statistics_enabled |
| </span> |
| </code> |
| </a> |
| </li> |
| </ul> |
| </li> |
| <li class="toc-h3 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions |
| </span> |
| </code> |
| </a> |
| <ul class="nav section-nav flex-column"> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.allow_single_file_parallelism |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.bloom_filter_fpp |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.bloom_filter_ndv |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.bloom_filter_on_write |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.column_index_truncate_length |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.column_specific_options"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.column_specific_options |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.created_by"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.created_by |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.data_page_row_count_limit |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.data_pagesize_limit |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_enabled"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.dictionary_enabled |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.dictionary_page_size_limit |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.encoding"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.encoding |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.max_row_group_size"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.max_row_group_size |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.maximum_buffered_record_batches_per_stream |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.maximum_parallel_row_group_writers |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.skip_arrow_metadata |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.statistics_enabled"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.statistics_enabled |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.statistics_truncate_length |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.write_batch_size"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.write_batch_size |
| </span> |
| </code> |
| </a> |
| </li> |
| <li class="toc-h4 nav-item toc-entry"> |
| <a class="reference internal nav-link" href="#datafusion.dataframe.ParquetWriterOptions.writer_version"> |
| <code class="docutils literal notranslate"> |
| <span class="pre"> |
| ParquetWriterOptions.writer_version |
| </span> |
| </code> |
| </a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| |
| </nav> |
| </div> |
| |
| <div class="toc-item"> |
| |
| </div> |
| |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <section id="module-datafusion.dataframe"> |
| <span id="datafusion-dataframe"></span><h1>datafusion.dataframe<a class="headerlink" href="#module-datafusion.dataframe" title="Link to this heading">¶</a></h1> |
| <p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> is one of the core concepts in DataFusion.</p> |
| <p>See <a class="reference internal" href="../../../user-guide/basics.html#user-guide-concepts"><span class="std std-ref">Concepts</span></a> in the online documentation for more information.</p> |
| <section id="classes"> |
| <h2>Classes<a class="headerlink" href="#classes" title="Link to this heading">¶</a></h2> |
| <table class="autosummary longtable table autosummary"> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Compression</span></code></a></p></td> |
| <td><p>Enum representing the available compression types for Parquet files.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrame</span></code></a></p></td> |
| <td><p>Two dimensional table representation of data.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DataFrameWriteOptions</span></code></a></p></td> |
| <td><p>Writer options for DataFrame.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.InsertOp" title="datafusion.dataframe.InsertOp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InsertOp</span></code></a></p></td> |
| <td><p>Insert operation mode.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.dataframe.ParquetColumnOptions" title="datafusion.dataframe.ParquetColumnOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ParquetColumnOptions</span></code></a></p></td> |
| <td><p>Parquet options for individual columns.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ParquetWriterOptions</span></code></a></p></td> |
| <td><p>Advanced parquet writer options.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </section> |
| <section id="module-contents"> |
| <h2>Module Contents<a class="headerlink" href="#module-contents" title="Link to this heading">¶</a></h2> |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">Compression</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwds</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.Compression" title="Link to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-obj docutils literal notranslate"><span class="pre">enum.Enum</span></code></p> |
| <p>Enum representing the available compression types for Parquet files.</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.from_str"> |
| <em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_str</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><span class="pre">Compression</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.Compression.from_str" title="Link to this definition">¶</a></dt> |
| <dd><p>Convert a string to a Compression enum value.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>value</strong> – The string representation of the compression type.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>The Compression enum lowercase value.</p> |
| </dd> |
| <dt class="field-odd">Raises<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>ValueError</strong> – If the string does not match any Compression enum value.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.get_default_level"> |
| <span class="sig-name descname"><span class="pre">get_default_level</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.Compression.get_default_level" title="Link to this definition">¶</a></dt> |
| <dd><p>Get the default compression level for the compression type.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>The default compression level for the compression type.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.BROTLI"> |
| <span class="sig-name descname"><span class="pre">BROTLI</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'brotli'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.BROTLI" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.GZIP"> |
| <span class="sig-name descname"><span class="pre">GZIP</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'gzip'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.GZIP" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.LZ4"> |
| <span class="sig-name descname"><span class="pre">LZ4</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'lz4'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.LZ4" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.LZ4_RAW"> |
| <span class="sig-name descname"><span class="pre">LZ4_RAW</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'lz4_raw'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.LZ4_RAW" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.SNAPPY"> |
| <span class="sig-name descname"><span class="pre">SNAPPY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'snappy'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.SNAPPY" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.UNCOMPRESSED"> |
| <span class="sig-name descname"><span class="pre">UNCOMPRESSED</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'uncompressed'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.UNCOMPRESSED" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.Compression.ZSTD"> |
| <span class="sig-name descname"><span class="pre">ZSTD</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'zstd'</span></em><a class="headerlink" href="#datafusion.dataframe.Compression.ZSTD" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">DataFrame</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">datafusion._internal.DataFrame</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.DataFrame" title="Link to this definition">¶</a></dt> |
| <dd><p>Two dimensional table representation of data.</p> |
| <p>See <a class="reference internal" href="../../../user-guide/basics.html#user-guide-concepts"><span class="std std-ref">Concepts</span></a> in the online documentation for more information.</p> |
| <p>This constructor is not to be used by the end user.</p> |
| <p>See <a class="reference internal" href="../context/index.html#datafusion.context.SessionContext" title="datafusion.context.SessionContext"><code class="xref py py-class docutils literal notranslate"><span class="pre">SessionContext</span></code></a> for methods to |
| create a <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__arrow_c_stream__"> |
| <span class="sig-name descname"><span class="pre">__arrow_c_stream__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">requested_schema</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">object</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">object</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__arrow_c_stream__" title="Link to this definition">¶</a></dt> |
| <dd><p>Export an Arrow PyCapsule Stream.</p> |
| <p>This will execute and collect the DataFrame. We will attempt to respect the |
| requested schema, but only trivial transformations will be applied such as only |
| returning the fields listed in the requested schema if their data types match |
| those in the DataFrame.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>requested_schema</strong> – Attempt to provide the DataFrame using this schema.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>Arrow PyCapsule object.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__getitem__"> |
| <span class="sig-name descname"><span class="pre">__getitem__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">key</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__getitem__" title="Link to this definition">¶</a></dt> |
| <dd><p>Return a new :py:class`DataFrame` with the specified column or columns.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>key</strong> – Column name or list of column names to select.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame with the specified column or columns.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.__repr__"> |
| <span class="sig-name descname"><span class="pre">__repr__</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.__repr__" title="Link to this definition">¶</a></dt> |
| <dd><p>Return a string representation of the DataFrame.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>String representation of the DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame._repr_html_"> |
| <span class="sig-name descname"><span class="pre">_repr_html_</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame._repr_html_" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.aggregate"> |
| <span class="sig-name descname"><span class="pre">aggregate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">aggs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.aggregate" title="Link to this definition">¶</a></dt> |
| <dd><p>Aggregates the rows of the current DataFrame.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>group_by</strong> – Sequence of expressions or column names to group by.</p></li> |
| <li><p><strong>aggs</strong> – Sequence of expressions to aggregate.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after aggregation.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.cache"> |
| <span class="sig-name descname"><span class="pre">cache</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.cache" title="Link to this definition">¶</a></dt> |
| <dd><p>Cache the DataFrame as a memory table.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Cached DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.cast"> |
| <span class="sig-name descname"><span class="pre">cast</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">mapping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">pyarrow.DataType</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.cast" title="Link to this definition">¶</a></dt> |
| <dd><p>Cast one or more columns to a different data type.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>mapping</strong> – Mapped with column as key and column dtype as value.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after casting columns</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.collect"> |
| <span class="sig-name descname"><span class="pre">collect</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.collect" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and collect results into memory.</p> |
| <p>Prior to calling <code class="docutils literal notranslate"><span class="pre">collect</span></code>, modifying a DataFrame simply updates a plan |
| (no actual computation is performed). Calling <code class="docutils literal notranslate"><span class="pre">collect</span></code> triggers the |
| computation.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>List of <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.RecordBatch</span></code> collected from the DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.collect_partitioned"> |
| <span class="sig-name descname"><span class="pre">collect_partitioned</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.collect_partitioned" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute this DataFrame and collect all partitioned results.</p> |
| <p>This operation returns <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.RecordBatch</span></code> maintaining the input |
| partitioning.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><dl class="simple"> |
| <dt>List of list of <code class="xref py py-class docutils literal notranslate"><span class="pre">RecordBatch</span></code> collected from the</dt><dd><p>DataFrame.</p> |
| </dd> |
| </dl> |
| </p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.count"> |
| <span class="sig-name descname"><span class="pre">count</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.count" title="Link to this definition">¶</a></dt> |
| <dd><p>Return the total number of rows in this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <p>Note that this method will actually run a plan to calculate the |
| count, which may be slow for large or complicated DataFrames.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Number of rows in the DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.default_str_repr"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">default_str_repr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">batches</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">pyarrow.RecordBatch</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">schema</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">pyarrow.Schema</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_more</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">table_uuid</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.default_str_repr" title="Link to this definition">¶</a></dt> |
| <dd><p>Return the default string representation of a DataFrame.</p> |
| <p>This method is used by the default formatter and implemented in Rust for |
| performance reasons.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.describe"> |
| <span class="sig-name descname"><span class="pre">describe</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.describe" title="Link to this definition">¶</a></dt> |
| <dd><p>Return the statistics for this DataFrame.</p> |
| <p>Only summarized numeric datatypes at the moments and returns nulls |
| for non-numeric datatypes.</p> |
| <p>The output format is modeled after pandas.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>A summary DataFrame containing statistics.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.distinct"> |
| <span class="sig-name descname"><span class="pre">distinct</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.distinct" title="Link to this definition">¶</a></dt> |
| <dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with all duplicated rows removed.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>DataFrame after removing duplicates.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.drop"> |
| <span class="sig-name descname"><span class="pre">drop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.drop" title="Link to this definition">¶</a></dt> |
| <dd><p>Drop arbitrary amount of columns.</p> |
| <p>Column names are case-sensitive and do not require double quotes like |
| other operations such as <cite>select</cite>. Leading and trailing double quotes |
| are allowed and will be automatically stripped if present.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>columns</strong> – Column names to drop from the dataframe. Both <code class="docutils literal notranslate"><span class="pre">column_name</span></code> |
| and <code class="docutils literal notranslate"><span class="pre">"column_name"</span></code> are accepted.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame with those columns removed in the projection.</p> |
| </dd> |
| </dl> |
| <p>Example Usage:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">'ID_For_Students'</span><span class="p">)</span> <span class="c1"># Works</span> |
| <span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s1">'"ID_For_Students"'</span><span class="p">)</span> <span class="c1"># Also works (quotes stripped)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.except_all"> |
| <span class="sig-name descname"><span class="pre">except_all</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.except_all" title="Link to this definition">¶</a></dt> |
| <dd><p>Calculate the exception of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>other</strong> – DataFrame to calculate exception with.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after exception.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execute_stream"> |
| <span class="sig-name descname"><span class="pre">execute_stream</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="../record_batch/index.html#datafusion.record_batch.RecordBatchStream" title="datafusion.record_batch.RecordBatchStream"><span class="pre">datafusion.record_batch.RecordBatchStream</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execute_stream" title="Link to this definition">¶</a></dt> |
| <dd><p>Executes this DataFrame and returns a stream over a single partition.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Record Batch Stream over a single partition.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execute_stream_partitioned"> |
| <span class="sig-name descname"><span class="pre">execute_stream_partitioned</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../record_batch/index.html#datafusion.record_batch.RecordBatchStream" title="datafusion.record_batch.RecordBatchStream"><span class="pre">datafusion.record_batch.RecordBatchStream</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execute_stream_partitioned" title="Link to this definition">¶</a></dt> |
| <dd><p>Executes this DataFrame and returns a stream for each partition.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>One record batch stream per partition.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.execution_plan"> |
| <span class="sig-name descname"><span class="pre">execution_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.ExecutionPlan" title="datafusion.plan.ExecutionPlan"><span class="pre">datafusion.plan.ExecutionPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.execution_plan" title="Link to this definition">¶</a></dt> |
| <dd><p>Return the execution/physical plan.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Execution plan.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.explain"> |
| <span class="sig-name descname"><span class="pre">explain</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">analyze</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.explain" title="Link to this definition">¶</a></dt> |
| <dd><p>Print an explanation of the DataFrame’s plan so far.</p> |
| <p>If <code class="docutils literal notranslate"><span class="pre">analyze</span></code> is specified, runs the plan and reports metrics.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>verbose</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, more details will be included.</p></li> |
| <li><p><strong>analyze</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, the plan will run and metrics reported.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.fill_null"> |
| <span class="sig-name descname"><span class="pre">fill_null</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">subset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.fill_null" title="Link to this definition">¶</a></dt> |
| <dd><p>Fill null values in specified columns with a value.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>value</strong> – Value to replace nulls with. Will be cast to match column type.</p></li> |
| <li><p><strong>subset</strong> – Optional list of column names to fill. If None, fills all columns.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame with null values replaced where type casting is possible</p> |
| </dd> |
| </dl> |
| <p class="rubric">Examples</p> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">fill_null</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="c1"># Fill all nulls with 0 where possible</span> |
| <span class="gp">>>> </span><span class="c1"># Fill nulls in specific string columns</span> |
| <span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">fill_null</span><span class="p">(</span><span class="s2">"missing"</span><span class="p">,</span> <span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s2">"name"</span><span class="p">,</span> <span class="s2">"category"</span><span class="p">])</span> |
| </pre></div> |
| </div> |
| <p class="rubric">Notes</p> |
| <ul class="simple"> |
| <li><p>Only fills nulls in columns where the value can be cast to the column type</p></li> |
| <li><p>For columns where casting fails, the original column is kept unchanged</p></li> |
| <li><p>For columns not in subset, the original column is kept unchanged</p></li> |
| </ul> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.filter"> |
| <span class="sig-name descname"><span class="pre">filter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">predicates</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.filter" title="Link to this definition">¶</a></dt> |
| <dd><p>Return a DataFrame for which <code class="docutils literal notranslate"><span class="pre">predicate</span></code> evaluates to <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p> |
| <p>Rows for which <code class="docutils literal notranslate"><span class="pre">predicate</span></code> evaluates to <code class="docutils literal notranslate"><span class="pre">False</span></code> or <code class="docutils literal notranslate"><span class="pre">None</span></code> are filtered |
| out. If more than one predicate is provided, these predicates will be |
| combined as a logical AND. Each <code class="docutils literal notranslate"><span class="pre">predicate</span></code> must be an |
| <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> created using helper functions such as |
| <a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a> or <a class="reference internal" href="../index.html#datafusion.lit" title="datafusion.lit"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.lit()</span></code></a>. |
| If more complex logic is required, see the logical operations in |
| <a class="reference internal" href="../functions/index.html#module-datafusion.functions" title="datafusion.functions"><code class="xref py py-mod docutils literal notranslate"><span class="pre">functions</span></code></a>.</p> |
| <p>Example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span> |
| <span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">)</span> <span class="o">></span> <span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span> |
| </pre></div> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>predicates</strong> – Predicate expression(s) to filter the DataFrame.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after filtering.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.head"> |
| <span class="sig-name descname"><span class="pre">head</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.head" title="Link to this definition">¶</a></dt> |
| <dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>n</strong> – Number of rows to take from the head of the DataFrame.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after limiting.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.intersect"> |
| <span class="sig-name descname"><span class="pre">intersect</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.intersect" title="Link to this definition">¶</a></dt> |
| <dd><p>Calculate the intersection of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>other</strong> – DataFrame to intersect with.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after intersection.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.into_view"> |
| <span class="sig-name descname"><span class="pre">into_view</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="../catalog/index.html#datafusion.catalog.Table" title="datafusion.catalog.Table"><span class="pre">datafusion.catalog.Table</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.into_view" title="Link to this definition">¶</a></dt> |
| <dd><p>Convert <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> into a <a class="reference internal" href="../index.html#datafusion.Table" title="datafusion.Table"><code class="xref py py-class docutils literal notranslate"><span class="pre">Table</span></code></a>.</p> |
| <p class="rubric">Examples</p> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">SessionContext</span> |
| <span class="gp">>>> </span><span class="n">ctx</span> <span class="o">=</span> <span class="n">SessionContext</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">df</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT 1 AS value"</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">view</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">into_view</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">ctx</span><span class="o">.</span><span class="n">register_table</span><span class="p">(</span><span class="s2">"values_view"</span><span class="p">,</span> <span class="n">view</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">df</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> <span class="c1"># The DataFrame is still usable</span> |
| <span class="gp">>>> </span><span class="n">ctx</span><span class="o">.</span><span class="n">sql</span><span class="p">(</span><span class="s2">"SELECT value FROM values_view"</span><span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.join"> |
| <span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.join" title="Link to this definition">¶</a></dt> |
| <dt class="sig sig-object py"> |
| <span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span></dt> |
| <dt class="sig sig-object py"> |
| <span class="sig-name descname"><span class="pre">join</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">join_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">left_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">right_on</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span></dt> |
| <dd><p>Join this <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with another <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <p><cite>on</cite> has to be provided or both <cite>left_on</cite> and <cite>right_on</cite> in conjunction.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>right</strong> – Other DataFrame to join with.</p></li> |
| <li><p><strong>on</strong> – Column names to join on in both dataframes.</p></li> |
| <li><p><strong>how</strong> – Type of join to perform. Supported types are “inner”, “left”, |
| “right”, “full”, “semi”, “anti”.</p></li> |
| <li><p><strong>left_on</strong> – Join column of the left dataframe.</p></li> |
| <li><p><strong>right_on</strong> – Join column of the right dataframe.</p></li> |
| <li><p><strong>join_keys</strong> – Tuple of two lists of column names to join on. [Deprecated]</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after join.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.join_on"> |
| <span class="sig-name descname"><span class="pre">join_on</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">right</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">on_exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">how</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'inner'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'left'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'right'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'full'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'semi'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'anti'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'inner'</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.join_on" title="Link to this definition">¶</a></dt> |
| <dd><p>Join two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> using the specified expressions.</p> |
| <p>Join predicates must be <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> objects, typically |
| built with <a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a>. On expressions are used to support |
| in-equality predicates. Equality predicates are correctly optimized.</p> |
| <p>Example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span> |
| <span class="n">df</span><span class="o">.</span><span class="n">join_on</span><span class="p">(</span><span class="n">other_df</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">"id"</span><span class="p">)</span> <span class="o">==</span> <span class="n">col</span><span class="p">(</span><span class="s2">"other_id"</span><span class="p">))</span> |
| </pre></div> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>right</strong> – Other DataFrame to join with.</p></li> |
| <li><p><strong>on_exprs</strong> – single or multiple (in)-equality predicates.</p></li> |
| <li><p><strong>how</strong> – Type of join to perform. Supported types are “inner”, “left”, |
| “right”, “full”, “semi”, “anti”.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after join.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.limit"> |
| <span class="sig-name descname"><span class="pre">limit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">count</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">offset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.limit" title="Link to this definition">¶</a></dt> |
| <dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>count</strong> – Number of rows to limit the DataFrame to.</p></li> |
| <li><p><strong>offset</strong> – Number of rows to skip.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after limiting.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.logical_plan"> |
| <span class="sig-name descname"><span class="pre">logical_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.LogicalPlan" title="datafusion.plan.LogicalPlan"><span class="pre">datafusion.plan.LogicalPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.logical_plan" title="Link to this definition">¶</a></dt> |
| <dd><p>Return the unoptimized <code class="docutils literal notranslate"><span class="pre">LogicalPlan</span></code>.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Unoptimized logical plan.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.optimized_logical_plan"> |
| <span class="sig-name descname"><span class="pre">optimized_logical_plan</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="../plan/index.html#datafusion.plan.LogicalPlan" title="datafusion.plan.LogicalPlan"><span class="pre">datafusion.plan.LogicalPlan</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.optimized_logical_plan" title="Link to this definition">¶</a></dt> |
| <dd><p>Return the optimized <code class="docutils literal notranslate"><span class="pre">LogicalPlan</span></code>.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Optimized logical plan.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.parse_sql_expr"> |
| <span class="sig-name descname"><span class="pre">parse_sql_expr</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">expr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.parse_sql_expr" title="Link to this definition">¶</a></dt> |
| <dd><p>Creates logical expression from a SQL query text.</p> |
| <p>The expression is created and processed against the current schema.</p> |
| <p>Example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span> |
| <span class="n">df</span><span class="o">.</span><span class="n">parse_sql_expr</span><span class="p">(</span><span class="s2">"a > 1"</span><span class="p">)</span> |
| |
| <span class="n">should</span> <span class="n">produce</span><span class="p">:</span> |
| |
| <span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">)</span> <span class="o">></span> <span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>expr</strong> – Expression string to be converted to datafusion expression</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>Logical expression .</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.repartition"> |
| <span class="sig-name descname"><span class="pre">repartition</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.repartition" title="Link to this definition">¶</a></dt> |
| <dd><p>Repartition a DataFrame into <code class="docutils literal notranslate"><span class="pre">num</span></code> partitions.</p> |
| <p>The batches allocation uses a round-robin algorithm.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>num</strong> – Number of partitions to repartition the DataFrame into.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>Repartitioned DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.repartition_by_hash"> |
| <span class="sig-name descname"><span class="pre">repartition_by_hash</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.repartition_by_hash" title="Link to this definition">¶</a></dt> |
| <dd><p>Repartition a DataFrame using a hash partitioning scheme.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>exprs</strong> – Expressions to evaluate and perform hashing on.</p></li> |
| <li><p><strong>num</strong> – Number of partitions to repartition the DataFrame into.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>Repartitioned DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.schema"> |
| <span class="sig-name descname"><span class="pre">schema</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">pyarrow.Schema</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.schema" title="Link to this definition">¶</a></dt> |
| <dd><p>Return the <code class="xref py py-class docutils literal notranslate"><span class="pre">pyarrow.Schema</span></code> of this DataFrame.</p> |
| <p>The output schema contains information on the name, data type, and |
| nullability for each column.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Describing schema of the DataFrame</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.select"> |
| <span class="sig-name descname"><span class="pre">select</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.select" title="Link to this definition">¶</a></dt> |
| <dd><p>Project arbitrary expressions into a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>exprs</strong> – Either column names or <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> to select.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after projection. It has one column for each expression.</p> |
| </dd> |
| </dl> |
| <p>Example usage:</p> |
| <p>The following example will return 3 columns from the original dataframe. |
| The first two columns will be the original column <code class="docutils literal notranslate"><span class="pre">a</span></code> and <code class="docutils literal notranslate"><span class="pre">b</span></code> since the |
| string “a” is assumed to refer to column selection. Also a duplicate of |
| column <code class="docutils literal notranslate"><span class="pre">a</span></code> will be returned with the column name <code class="docutils literal notranslate"><span class="pre">alternate_a</span></code>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"a"</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">"b"</span><span class="p">),</span> <span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"alternate_a"</span><span class="p">))</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.select_columns"> |
| <span class="sig-name descname"><span class="pre">select_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.select_columns" title="Link to this definition">¶</a></dt> |
| <dd><p>Filter the DataFrame by columns.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>DataFrame only containing the specified columns.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.show"> |
| <span class="sig-name descname"><span class="pre">show</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">20</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.show" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the DataFrame and print the result to the console.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>num</strong> – Number of lines to show.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.sort"> |
| <span class="sig-name descname"><span class="pre">sort</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">datafusion.expr.SortKey</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.sort" title="Link to this definition">¶</a></dt> |
| <dd><p>Sort the DataFrame by the specified sorting expressions or column names.</p> |
| <p>Note that any expression can be turned into a sort expression by |
| calling its <code class="docutils literal notranslate"><span class="pre">sort</span></code> method.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>exprs</strong> – Sort expressions or column names, applied in order.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after sorting.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.tail"> |
| <span class="sig-name descname"><span class="pre">tail</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.tail" title="Link to this definition">¶</a></dt> |
| <dd><p>Return a new <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> with a limited number of rows.</p> |
| <p>Be aware this could be potentially expensive since the row size needs to be |
| determined of the dataframe. This is done by collecting it.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>n</strong> – Number of rows to take from the tail of the DataFrame.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after limiting.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_arrow_table"> |
| <span class="sig-name descname"><span class="pre">to_arrow_table</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">pyarrow.Table</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_arrow_table" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into an Arrow Table.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Arrow Table.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pandas"> |
| <span class="sig-name descname"><span class="pre">to_pandas</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">pandas.DataFrame</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pandas" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a Pandas DataFrame.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Pandas DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_polars"> |
| <span class="sig-name descname"><span class="pre">to_polars</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">polars.DataFrame</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_polars" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a Polars DataFrame.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Polars DataFrame.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pydict"> |
| <span class="sig-name descname"><span class="pre">to_pydict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pydict" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a dictionary of lists.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Dictionary of lists.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.to_pylist"> |
| <span class="sig-name descname"><span class="pre">to_pylist</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.to_pylist" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and convert it into a list of dictionaries.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>List of dictionaries.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.transform"> |
| <span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">func</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="pre">Ellipsis</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.transform" title="Link to this definition">¶</a></dt> |
| <dd><p>Apply a function to the current DataFrame which returns another DataFrame.</p> |
| <p>This is useful for chaining together multiple functions. For example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">add_3</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">with_column</span><span class="p">(</span><span class="s2">"modified"</span><span class="p">,</span> <span class="n">lit</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span> |
| |
| <span class="k">def</span><span class="w"> </span><span class="nf">within_limit</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">limit</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">)</span> <span class="o"><</span> <span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span> |
| |
| <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">modify_df</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">within_limit</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>func</strong> – A callable function that takes a DataFrame as it’s first argument</p></li> |
| <li><p><strong>args</strong> – Zero or more arguments to pass to <cite>func</cite></p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>After applying func to the original dataframe.</p> |
| </dd> |
| <dt class="field-odd">Return type<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame">DataFrame</a></p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.union"> |
| <span class="sig-name descname"><span class="pre">union</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">distinct</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.union" title="Link to this definition">¶</a></dt> |
| <dd><p>Calculate the union of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>other</strong> – DataFrame to union with.</p></li> |
| <li><p><strong>distinct</strong> – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, duplicate rows will be removed.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after union.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.union_distinct"> |
| <span class="sig-name descname"><span class="pre">union_distinct</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.union_distinct" title="Link to this definition">¶</a></dt> |
| <dd><p>Calculate the distinct union of two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.</p> |
| <p>The two <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> must have exactly the same schema. |
| Any duplicate rows are discarded.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>other</strong> – DataFrame to union with.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame after union.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.unnest_columns"> |
| <span class="sig-name descname"><span class="pre">unnest_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">preserve_nulls</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.unnest_columns" title="Link to this definition">¶</a></dt> |
| <dd><p>Expand columns of arrays into a single row per array element.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>columns</strong> – Column names to perform unnest operation on.</p></li> |
| <li><p><strong>preserve_nulls</strong> – If False, rows with null entries will not be |
| returned.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>A DataFrame with the columns expanded.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_column"> |
| <span class="sig-name descname"><span class="pre">with_column</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">expr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_column" title="Link to this definition">¶</a></dt> |
| <dd><p>Add an additional column to the DataFrame.</p> |
| <p>The <code class="docutils literal notranslate"><span class="pre">expr</span></code> must be an <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> constructed with |
| <a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a> or <a class="reference internal" href="../index.html#datafusion.lit" title="datafusion.lit"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.lit()</span></code></a>.</p> |
| <p>Example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span> |
| <span class="n">df</span><span class="o">.</span><span class="n">with_column</span><span class="p">(</span><span class="s2">"b"</span><span class="p">,</span> <span class="n">col</span><span class="p">(</span><span class="s2">"a"</span><span class="p">)</span> <span class="o">+</span> <span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span> |
| </pre></div> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>name</strong> – Name of the column to add.</p></li> |
| <li><p><strong>expr</strong> – Expression to compute the column.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame with the new column.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_column_renamed"> |
| <span class="sig-name descname"><span class="pre">with_column_renamed</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">old_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_column_renamed" title="Link to this definition">¶</a></dt> |
| <dd><p>Rename one column by applying a new projection.</p> |
| <p>This is a no-op if the column to be renamed does not exist.</p> |
| <p>The method supports case sensitive rename with wrapping column name |
| into one the following symbols (” or ‘ or `).</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>old_name</strong> – Old column name.</p></li> |
| <li><p><strong>new_name</strong> – New column name.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame with the column renamed.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.with_columns"> |
| <span class="sig-name descname"><span class="pre">with_columns</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">named_exprs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><span class="pre">DataFrame</span></a></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.with_columns" title="Link to this definition">¶</a></dt> |
| <dd><p>Add columns to the DataFrame.</p> |
| <p>By passing expressions, iterables of expressions, or named expressions. |
| All expressions must be <a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><code class="xref py py-class docutils literal notranslate"><span class="pre">Expr</span></code></a> objects created via |
| <a class="reference internal" href="../index.html#datafusion.col" title="datafusion.col"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.col()</span></code></a> or <a class="reference internal" href="../index.html#datafusion.lit" title="datafusion.lit"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.lit()</span></code></a>. |
| To pass named expressions use the form <code class="docutils literal notranslate"><span class="pre">name=Expr</span></code>.</p> |
| <p>Example usage: The following will add 4 columns labeled <code class="docutils literal notranslate"><span class="pre">a</span></code>, <code class="docutils literal notranslate"><span class="pre">b</span></code>, <code class="docutils literal notranslate"><span class="pre">c</span></code>, |
| and <code class="docutils literal notranslate"><span class="pre">d</span></code>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span> |
| <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">with_columns</span><span class="p">(</span> |
| <span class="n">col</span><span class="p">(</span><span class="s2">"x"</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"a"</span><span class="p">),</span> |
| <span class="p">[</span><span class="n">lit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"b"</span><span class="p">),</span> <span class="n">col</span><span class="p">(</span><span class="s2">"y"</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">"c"</span><span class="p">)],</span> |
| <span class="n">d</span><span class="o">=</span><span class="n">lit</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>exprs</strong> – Either a single expression or an iterable of expressions to add.</p></li> |
| <li><p><strong>named_exprs</strong> – Named expressions in the form of <code class="docutils literal notranslate"><span class="pre">name=expr</span></code></p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>DataFrame with the new columns added.</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_csv"> |
| <span class="sig-name descname"><span class="pre">write_csv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">with_header</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_csv" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a CSV file.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>path</strong> – Path of the CSV file to write.</p></li> |
| <li><p><strong>with_header</strong> – If true, output the CSV header row.</p></li> |
| <li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_json"> |
| <span class="sig-name descname"><span class="pre">write_json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_json" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a JSON file.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>path</strong> – Path of the JSON file to write.</p></li> |
| <li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_parquet"> |
| <span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_parquet" title="Link to this definition">¶</a></dt> |
| <dt class="sig sig-object py"> |
| <span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.Compression" title="datafusion.dataframe.Compression"><span class="pre">Compression</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">Compression.ZSTD</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span></dt> |
| <dt class="sig sig-object py"> |
| <span class="sig-name descname"><span class="pre">write_parquet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><span class="pre">ParquetWriterOptions</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a Parquet file.</p> |
| <p>Available compression types are:</p> |
| <ul class="simple"> |
| <li><p>“uncompressed”: No compression.</p></li> |
| <li><p>“snappy”: Snappy compression.</p></li> |
| <li><p>“gzip”: Gzip compression.</p></li> |
| <li><p>“brotli”: Brotli compression.</p></li> |
| <li><p>“lz4”: LZ4 compression.</p></li> |
| <li><p>“lz4_raw”: LZ4_RAW compression.</p></li> |
| <li><p>“zstd”: Zstandard compression.</p></li> |
| </ul> |
| <p>LZO compression is not yet implemented in arrow-rs and is therefore |
| excluded.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>path</strong> – Path of the Parquet file to write.</p></li> |
| <li><p><strong>compression</strong> – Compression type to use. Default is “ZSTD”.</p></li> |
| <li><p><strong>compression_level</strong> – Compression level to use. For ZSTD, the |
| recommended range is 1 to 22, with the default being 4. Higher levels |
| provide better compression but slower speed.</p></li> |
| <li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_parquet_with_options"> |
| <span class="sig-name descname"><span class="pre">write_parquet_with_options</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">pathlib.Path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.ParquetWriterOptions" title="datafusion.dataframe.ParquetWriterOptions"><span class="pre">ParquetWriterOptions</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_parquet_with_options" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a Parquet file.</p> |
| <p>Allows advanced writer options to be set with <cite>ParquetWriterOptions</cite>.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>path</strong> – Path of the Parquet file to write.</p></li> |
| <li><p><strong>options</strong> – Sets the writer parquet options (see <cite>ParquetWriterOptions</cite>).</p></li> |
| <li><p><strong>write_options</strong> – Options that impact how the DataFrame is written.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.write_table"> |
| <span class="sig-name descname"><span class="pre">write_table</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.DataFrameWriteOptions" title="datafusion.dataframe.DataFrameWriteOptions"><span class="pre">DataFrameWriteOptions</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.write_table" title="Link to this definition">¶</a></dt> |
| <dd><p>Execute the <a class="reference internal" href="#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a> and write the results to a table.</p> |
| <p>The table must be registered with the session to perform this operation. |
| Not all table providers support writing operations. See the individual |
| implementations for details.</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrame.df"> |
| <span class="sig-name descname"><span class="pre">df</span></span><a class="headerlink" href="#datafusion.dataframe.DataFrame.df" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrameWriteOptions"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">DataFrameWriteOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">insert_operation</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datafusion.dataframe.InsertOp" title="datafusion.dataframe.InsertOp"><span class="pre">InsertOp</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">single_file_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">partition_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sort_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="../expr/index.html#datafusion.expr.SortExpr" title="datafusion.expr.SortExpr"><span class="pre">datafusion.expr.SortExpr</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.Expr" title="datafusion.expr.Expr"><span class="pre">datafusion.expr.Expr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">collections.abc.Sequence</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="../expr/index.html#datafusion.expr.SortExpr" title="datafusion.expr.SortExpr"><span class="pre">datafusion.expr.SortExpr</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.DataFrameWriteOptions" title="Link to this definition">¶</a></dt> |
| <dd><p>Writer options for DataFrame.</p> |
| <p>There is no guarantee the table provider supports all writer options. |
| See the individual implementation and documentation for details.</p> |
| <p>Instantiate writer options for DataFrame.</p> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.DataFrameWriteOptions._raw_write_options"> |
| <span class="sig-name descname"><span class="pre">_raw_write_options</span></span><a class="headerlink" href="#datafusion.dataframe.DataFrameWriteOptions._raw_write_options" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.InsertOp"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">InsertOp</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwds</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.InsertOp" title="Link to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-obj docutils literal notranslate"><span class="pre">enum.Enum</span></code></p> |
| <p>Insert operation mode.</p> |
| <p>These modes are used by the table writing feature to define how record |
| batches should be written to a table.</p> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.InsertOp.APPEND"> |
| <span class="sig-name descname"><span class="pre">APPEND</span></span><a class="headerlink" href="#datafusion.dataframe.InsertOp.APPEND" title="Link to this definition">¶</a></dt> |
| <dd><p>Appends new rows to the existing table without modifying any existing rows.</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.InsertOp.OVERWRITE"> |
| <span class="sig-name descname"><span class="pre">OVERWRITE</span></span><a class="headerlink" href="#datafusion.dataframe.InsertOp.OVERWRITE" title="Link to this definition">¶</a></dt> |
| <dd><p>Overwrites all existing rows in the table with the new rows.</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.InsertOp.REPLACE"> |
| <span class="sig-name descname"><span class="pre">REPLACE</span></span><a class="headerlink" href="#datafusion.dataframe.InsertOp.REPLACE" title="Link to this definition">¶</a></dt> |
| <dd><p>Replace existing rows that collide with the inserted rows.</p> |
| <p>Replacement is typically based on a unique key or primary key.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">ParquetColumnOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_fpp</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_ndv</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions" title="Link to this definition">¶</a></dt> |
| <dd><p>Parquet options for individual columns.</p> |
| <p>Contains the available options that can be applied for an individual Parquet column, |
| replacing the global options in <code class="docutils literal notranslate"><span class="pre">ParquetWriterOptions</span></code>.</p> |
| <p>Initialize the ParquetColumnOptions.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>encoding</strong> – Sets encoding for the column path. Valid values are: <code class="docutils literal notranslate"><span class="pre">plain</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">plain_dictionary</span></code>, <code class="docutils literal notranslate"><span class="pre">rle</span></code>, <code class="docutils literal notranslate"><span class="pre">bit_packed</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_binary_packed</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">delta_length_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">rle_dictionary</span></code>, |
| and <code class="docutils literal notranslate"><span class="pre">byte_stream_split</span></code>. These values are not case-sensitive. If |
| <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options</p></li> |
| <li><p><strong>dictionary_enabled</strong> – Sets if dictionary encoding is enabled for the column |
| path. If <cite>None</cite>, uses the default parquet options</p></li> |
| <li><p><strong>compression</strong> – Sets default parquet compression codec for the column path. |
| Valid values are <code class="docutils literal notranslate"><span class="pre">uncompressed</span></code>, <code class="docutils literal notranslate"><span class="pre">snappy</span></code>, <code class="docutils literal notranslate"><span class="pre">gzip(level)</span></code>, <code class="docutils literal notranslate"><span class="pre">lzo</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">brotli(level)</span></code>, <code class="docutils literal notranslate"><span class="pre">lz4</span></code>, <code class="docutils literal notranslate"><span class="pre">zstd(level)</span></code>, and <code class="docutils literal notranslate"><span class="pre">lz4_raw</span></code>. These |
| values are not case-sensitive. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet |
| options.</p></li> |
| <li><p><strong>statistics_enabled</strong> – Sets if statistics are enabled for the column Valid |
| values are: <code class="docutils literal notranslate"><span class="pre">none</span></code>, <code class="docutils literal notranslate"><span class="pre">chunk</span></code>, and <code class="docutils literal notranslate"><span class="pre">page</span></code> These values are not case |
| sensitive. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options.</p></li> |
| <li><p><strong>bloom_filter_enabled</strong> – Sets if bloom filter is enabled for the column path. |
| If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options.</p></li> |
| <li><p><strong>bloom_filter_fpp</strong> – Sets bloom filter false positive probability for the |
| column path. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet options.</p></li> |
| <li><p><strong>bloom_filter_ndv</strong> – Sets bloom filter number of distinct values. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, |
| uses the default parquet options.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled"> |
| <span class="sig-name descname"><span class="pre">bloom_filter_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp"> |
| <span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv"> |
| <span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.compression"> |
| <span class="sig-name descname"><span class="pre">compression</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.compression" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.dictionary_enabled"> |
| <span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.dictionary_enabled" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.encoding"> |
| <span class="sig-name descname"><span class="pre">encoding</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.encoding" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetColumnOptions.statistics_enabled"> |
| <span class="sig-name descname"><span class="pre">statistics_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetColumnOptions.statistics_enabled" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">datafusion.dataframe.</span></span><span class="sig-name descname"><span class="pre">ParquetWriterOptions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data_pagesize_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_batch_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">writer_version</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'1.0'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">skip_arrow_metadata</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'zstd(3)'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compression_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dictionary_page_size_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_enabled</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'page'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_row_group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1024</span> <span class="pre">*</span> <span class="pre">1024</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">created_by</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'datafusion-python'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_index_truncate_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">statistics_truncate_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_page_row_count_limit</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">20000</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_on_write</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_fpp</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bloom_filter_ndv</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">allow_single_file_parallelism</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">maximum_parallel_row_group_writers</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">maximum_buffered_record_batches_per_stream</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_specific_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#datafusion.dataframe.ParquetColumnOptions" title="datafusion.dataframe.ParquetColumnOptions"><span class="pre">ParquetColumnOptions</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions" title="Link to this definition">¶</a></dt> |
| <dd><p>Advanced parquet writer options.</p> |
| <p>Allows settings the writer options that apply to the entire file. Some options can |
| also be set on a column by column basis, with the field <code class="docutils literal notranslate"><span class="pre">column_specific_options</span></code> |
| (see <code class="docutils literal notranslate"><span class="pre">ParquetColumnOptions</span></code>).</p> |
| <p>Initialize the ParquetWriterOptions.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>data_pagesize_limit</strong> – Sets best effort maximum size of data page in bytes.</p></li> |
| <li><p><strong>write_batch_size</strong> – Sets write_batch_size in bytes.</p></li> |
| <li><p><strong>writer_version</strong> – Sets parquet writer version. Valid values are <code class="docutils literal notranslate"><span class="pre">1.0</span></code> and |
| <code class="docutils literal notranslate"><span class="pre">2.0</span></code>.</p></li> |
| <li><p><strong>skip_arrow_metadata</strong> – Skip encoding the embedded arrow metadata in the |
| KV_meta.</p></li> |
| <li><p><strong>compression</strong> – <p>Compression type to use. Default is <code class="docutils literal notranslate"><span class="pre">zstd(3)</span></code>. |
| Available compression types are</p> |
| <ul> |
| <li><p><code class="docutils literal notranslate"><span class="pre">uncompressed</span></code>: No compression.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">snappy</span></code>: Snappy compression.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">gzip(n)</span></code>: Gzip compression with level n.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">brotli(n)</span></code>: Brotli compression with level n.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">lz4</span></code>: LZ4 compression.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">lz4_raw</span></code>: LZ4_RAW compression.</p></li> |
| <li><p><code class="docutils literal notranslate"><span class="pre">zstd(n)</span></code>: Zstandard compression with level n.</p></li> |
| </ul> |
| </p></li> |
| <li><p><strong>compression_level</strong> – Compression level to set.</p></li> |
| <li><p><strong>dictionary_enabled</strong> – Sets if dictionary encoding is enabled. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, |
| uses the default parquet writer setting.</p></li> |
| <li><p><strong>dictionary_page_size_limit</strong> – Sets best effort maximum dictionary page size, |
| in bytes.</p></li> |
| <li><p><strong>statistics_enabled</strong> – Sets if statistics are enabled for any column Valid |
| values are <code class="docutils literal notranslate"><span class="pre">none</span></code>, <code class="docutils literal notranslate"><span class="pre">chunk</span></code>, and <code class="docutils literal notranslate"><span class="pre">page</span></code>. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the |
| default parquet writer setting.</p></li> |
| <li><p><strong>max_row_group_size</strong> – Target maximum number of rows in each row group |
| (defaults to 1M rows). Writing larger row groups requires more memory |
| to write, but can get better compression and be faster to read.</p></li> |
| <li><p><strong>created_by</strong> – Sets “created by” property.</p></li> |
| <li><p><strong>column_index_truncate_length</strong> – Sets column index truncate length.</p></li> |
| <li><p><strong>statistics_truncate_length</strong> – Sets statistics truncate length. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, |
| uses the default parquet writer setting.</p></li> |
| <li><p><strong>data_page_row_count_limit</strong> – Sets best effort maximum number of rows in a data |
| page.</p></li> |
| <li><p><strong>encoding</strong> – Sets default encoding for any column. Valid values are <code class="docutils literal notranslate"><span class="pre">plain</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">plain_dictionary</span></code>, <code class="docutils literal notranslate"><span class="pre">rle</span></code>, <code class="docutils literal notranslate"><span class="pre">bit_packed</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_binary_packed</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">delta_length_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">delta_byte_array</span></code>, <code class="docutils literal notranslate"><span class="pre">rle_dictionary</span></code>, |
| and <code class="docutils literal notranslate"><span class="pre">byte_stream_split</span></code>. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, uses the default parquet writer |
| setting.</p></li> |
| <li><p><strong>bloom_filter_on_write</strong> – Write bloom filters for all columns when creating |
| parquet files.</p></li> |
| <li><p><strong>bloom_filter_fpp</strong> – Sets bloom filter false positive probability. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, |
| uses the default parquet writer setting</p></li> |
| <li><p><strong>bloom_filter_ndv</strong> – Sets bloom filter number of distinct values. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, |
| uses the default parquet writer setting.</p></li> |
| <li><p><strong>allow_single_file_parallelism</strong> – Controls whether DataFusion will attempt to |
| speed up writing parquet files by serializing them in parallel. Each |
| column in each row group in each output file are serialized in parallel |
| leveraging a maximum possible core count of |
| <code class="docutils literal notranslate"><span class="pre">n_files</span> <span class="pre">*</span> <span class="pre">n_row_groups</span> <span class="pre">*</span> <span class="pre">n_columns</span></code>.</p></li> |
| <li><p><strong>maximum_parallel_row_group_writers</strong> – By default parallel parquet writer is |
| tuned for minimum memory usage in a streaming execution plan. You may |
| see a performance benefit when writing large parquet files by increasing |
| <code class="docutils literal notranslate"><span class="pre">maximum_parallel_row_group_writers</span></code> and |
| <code class="docutils literal notranslate"><span class="pre">maximum_buffered_record_batches_per_stream</span></code> if your system has idle |
| cores and can tolerate additional memory usage. Boosting these values is |
| likely worthwhile when writing out already in-memory data, such as from |
| a cached data frame.</p></li> |
| <li><p><strong>maximum_buffered_record_batches_per_stream</strong> – See |
| <code class="docutils literal notranslate"><span class="pre">maximum_parallel_row_group_writers</span></code>.</p></li> |
| <li><p><strong>column_specific_options</strong> – Overrides options for specific columns. If a column |
| is not a part of this dictionary, it will use the parameters provided |
| here.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism"> |
| <span class="sig-name descname"><span class="pre">allow_single_file_parallelism</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp"> |
| <span class="sig-name descname"><span class="pre">bloom_filter_fpp</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv"> |
| <span class="sig-name descname"><span class="pre">bloom_filter_ndv</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write"> |
| <span class="sig-name descname"><span class="pre">bloom_filter_on_write</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length"> |
| <span class="sig-name descname"><span class="pre">column_index_truncate_length</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">64</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.column_specific_options"> |
| <span class="sig-name descname"><span class="pre">column_specific_options</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.column_specific_options" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.created_by"> |
| <span class="sig-name descname"><span class="pre">created_by</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'datafusion-python'</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.created_by" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit"> |
| <span class="sig-name descname"><span class="pre">data_page_row_count_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">20000</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit"> |
| <span class="sig-name descname"><span class="pre">data_pagesize_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.dictionary_enabled"> |
| <span class="sig-name descname"><span class="pre">dictionary_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_enabled" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit"> |
| <span class="sig-name descname"><span class="pre">dictionary_page_size_limit</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.encoding"> |
| <span class="sig-name descname"><span class="pre">encoding</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.encoding" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.max_row_group_size"> |
| <span class="sig-name descname"><span class="pre">max_row_group_size</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1048576</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.max_row_group_size" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream"> |
| <span class="sig-name descname"><span class="pre">maximum_buffered_record_batches_per_stream</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers"> |
| <span class="sig-name descname"><span class="pre">maximum_parallel_row_group_writers</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata"> |
| <span class="sig-name descname"><span class="pre">skip_arrow_metadata</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.statistics_enabled"> |
| <span class="sig-name descname"><span class="pre">statistics_enabled</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'page'</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.statistics_enabled" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length"> |
| <span class="sig-name descname"><span class="pre">statistics_truncate_length</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.write_batch_size"> |
| <span class="sig-name descname"><span class="pre">write_batch_size</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1024</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.write_batch_size" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datafusion.dataframe.ParquetWriterOptions.writer_version"> |
| <span class="sig-name descname"><span class="pre">writer_version</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'1.0'</span></em><a class="headerlink" href="#datafusion.dataframe.ParquetWriterOptions.writer_version" title="Link to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| </section> |
| </section> |
| |
| |
| </div> |
| |
| |
| <!-- Previous / next buttons --> |
| <div class='prev-next-area'> |
| <a class='left-prev' id="prev-link" href="../context/index.html" title="previous page"> |
| <i class="fas fa-angle-left"></i> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">previous</p> |
| <p class="prev-next-title">datafusion.context</p> |
| </div> |
| </a> |
| <a class='right-next' id="next-link" href="../dataframe_formatter/index.html" title="next page"> |
| <div class="prev-next-info"> |
| <p class="prev-next-subtitle">next</p> |
| <p class="prev-next-title">datafusion.dataframe_formatter</p> |
| </div> |
| <i class="fas fa-angle-right"></i> |
| </a> |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script> |
| |
| <!-- Based on pydata_sphinx_theme/footer.html --> |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| |
| <div class="footer-item"> |
| <p class="copyright"> |
| © Copyright 2019-2024, Apache Software Foundation.<br> |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 8.1.3.<br> |
| </p> |
| </div> |
| |
| <div class="footer-item"> |
| <p>Apache Arrow DataFusion, Arrow DataFusion, Apache, the Apache feather logo, and the Apache Arrow DataFusion project logo</p> |
| <p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p> |
| </div> |
| </div> |
| </footer> |
| |
| |
| </body> |
| </html> |