| |
| <!doctype html> |
| <html lang="en" class="no-js"> |
| <head> |
| |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| |
| |
| |
| <link rel="canonical" href="https://py.iceberg.apache.org/reference/pyiceberg/io/pyarrow/"> |
| |
| |
| <link rel="prev" href="../fsspec/"> |
| |
| |
| <link rel="next" href="../../manifest/"> |
| |
| |
| <link rel="icon" href="../../../../assets/images/iceberg-logo-icon.png"> |
| <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.21"> |
| |
| |
| |
| <title>pyarrow - PyIceberg</title> |
| |
| |
| |
| <link rel="stylesheet" href="../../../../assets/stylesheets/main.2a3383ac.min.css"> |
| |
| |
| <link rel="stylesheet" href="../../../../assets/stylesheets/palette.06af60db.min.css"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Lato:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback"> |
| <style>:root{--md-text-font:"Lato";--md-code-font:"Roboto Mono"}</style> |
| |
| |
| |
| <link rel="stylesheet" href="../../../../assets/_mkdocstrings.css"> |
| |
| <script>__md_scope=new URL("../../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script> |
| |
| |
| |
| |
| |
| |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| _paq.push(["setDoNotTrack", true]); |
| _paq.push(["disableCookies"]); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '82']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo --> |
| |
| </head> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo"> |
| |
| |
| <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off"> |
| <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off"> |
| <label class="md-overlay" for="__drawer"></label> |
| <div data-md-component="skip"> |
| |
| |
| <a href="#pyiceberg.io.pyarrow" class="md-skip"> |
| Skip to content |
| </a> |
| |
| </div> |
| <div data-md-component="announce"> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <header class="md-header md-header--shadow md-header--lifted" data-md-component="header"> |
| <nav class="md-header__inner md-grid" aria-label="Header"> |
| <a href="../../../.." title="PyIceberg" class="md-header__button md-logo" aria-label="PyIceberg" data-md-component="logo"> |
| |
| <img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo"> |
| |
| </a> |
| <label class="md-header__button md-icon" for="__drawer"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg> |
| </label> |
| <div class="md-header__title" data-md-component="header-title"> |
| <div class="md-header__ellipsis"> |
| <div class="md-header__topic"> |
| <span class="md-ellipsis"> |
| PyIceberg |
| </span> |
| </div> |
| <div class="md-header__topic" data-md-component="header-topic"> |
| <span class="md-ellipsis"> |
| |
| pyarrow |
| |
| </span> |
| </div> |
| </div> |
| </div> |
| |
| |
| <form class="md-header__option" data-md-component="palette"> |
| |
| |
| |
| |
| <input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0"> |
| |
| <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg> |
| </label> |
| |
| |
| |
| |
| |
| <input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1"> |
| |
| <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg> |
| </label> |
| |
| |
| </form> |
| |
| |
| |
| <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script> |
| |
| |
| |
| |
| |
| <label class="md-header__button md-icon" for="__search"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg> |
| </label> |
| <div class="md-search" data-md-component="search" role="dialog"> |
| <label class="md-search__overlay" for="__search"></label> |
| <div class="md-search__inner" role="search"> |
| <form class="md-search__form" name="search"> |
| <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required> |
| <label class="md-search__icon md-icon" for="__search"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg> |
| </label> |
| <nav class="md-search__options" aria-label="Search"> |
| |
| <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg> |
| </button> |
| </nav> |
| |
| </form> |
| <div class="md-search__output"> |
| <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix> |
| <div class="md-search-result" data-md-component="search-result"> |
| <div class="md-search-result__meta"> |
| Initializing search |
| </div> |
| <ol class="md-search-result__list" role="presentation"></ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| <div class="md-header__source"> |
| <a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source"> |
| <div class="md-source__icon md-icon"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg> |
| </div> |
| <div class="md-source__repository"> |
| apache/iceberg-python |
| </div> |
| </a> |
| </div> |
| |
| </nav> |
| |
| |
| |
| <nav class="md-tabs" aria-label="Tabs" data-md-component="tabs"> |
| <div class="md-grid"> |
| <ul class="md-tabs__list"> |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../.." class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Getting started |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../configuration/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Configuration |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../cli/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| CLI |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../api/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| API |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../contributing/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Contributing |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../community/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Community |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../verify-release/" class="md-tabs__link"> |
| |
| |
| |
| Releases |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item md-tabs__item--active"> |
| <a href="../../" class="md-tabs__link"> |
| |
| |
| |
| Code Reference |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </div> |
| </nav> |
| |
| |
| </header> |
| |
| <div class="md-container" data-md-component="container"> |
| |
| |
| |
| |
| <main class="md-main" data-md-component="main"> |
| <div class="md-main__inner md-grid"> |
| |
| |
| |
| <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" > |
| <div class="md-sidebar__scrollwrap"> |
| <div class="md-sidebar__inner"> |
| |
| |
| |
| |
| |
| |
| <nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0"> |
| <label class="md-nav__title" for="__drawer"> |
| <a href="../../../.." title="PyIceberg" class="md-nav__button md-logo" aria-label="PyIceberg" data-md-component="logo"> |
| |
| <img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo"> |
| |
| </a> |
| PyIceberg |
| </label> |
| |
| <div class="md-nav__source"> |
| <a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source"> |
| <div class="md-source__icon md-icon"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg> |
| </div> |
| <div class="md-source__repository"> |
| apache/iceberg-python |
| </div> |
| </a> |
| </div> |
| |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../.." class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Getting started |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../configuration/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Configuration |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../cli/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| CLI |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../../../api/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| API |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_4"> |
| <span class="md-nav__icon md-icon"></span> |
| API |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../row-filter-syntax/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Row Filter Syntax |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../expression-dsl/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Expression DSL |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../contributing/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Contributing |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../community/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Community |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" > |
| |
| |
| <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Releases |
| |
| </span> |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_7"> |
| <span class="md-nav__icon md-icon"></span> |
| Releases |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../verify-release/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Verify a release |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../how-to-release/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| How to release |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="https://github.com/apache/iceberg-python/releases" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Release Notes |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../nightly-build/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Nightly Build |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" checked> |
| |
| |
| <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex=""> |
| |
| |
| |
| <span class="md-ellipsis"> |
| Code Reference |
| |
| </span> |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="true"> |
| <label class="md-nav__title" for="__nav_8"> |
| <span class="md-nav__icon md-icon"></span> |
| Code Reference |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1" checked> |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| pyiceberg |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1" id="__nav_8_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_8_1_label" aria-expanded="true"> |
| <label class="md-nav__title" for="__nav_8_1"> |
| <span class="md-nav__icon md-icon"></span> |
| pyiceberg |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../avro/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| avro |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_1" id="__nav_8_1_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_1_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_1"> |
| <span class="md-nav__icon md-icon"></span> |
| avro |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1_1" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../avro/codecs/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| codecs |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_1_1" id="__nav_8_1_1_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_1_1_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_1_1"> |
| <span class="md-nav__icon md-icon"></span> |
| codecs |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/bzip2/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| bzip2 |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/codec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| codec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/deflate/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| deflate |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/snappy_codec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| snappy_codec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/zstandard_codec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| zstandard_codec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/decoder/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| decoder |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/encoder/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| encoder |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/file/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| file |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/reader/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| reader |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/resolver/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| resolver |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/writer/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| writer |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../catalog/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| catalog |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_2" id="__nav_8_1_2_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_2_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_2"> |
| <span class="md-nav__icon md-icon"></span> |
| catalog |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/bigquery_metastore/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| bigquery_metastore |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/dynamodb/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| dynamodb |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/glue/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| glue |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/hive/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| hive |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/memory/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| memory |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/noop/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| noop |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2_7" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../catalog/rest/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| rest |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_2_7" id="__nav_8_1_2_7_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_2_7_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_2_7"> |
| <span class="md-nav__icon md-icon"></span> |
| rest |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/rest/auth/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| auth |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/rest/response/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| response |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/sql/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| sql |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_3" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../cli/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| cli |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_3" id="__nav_8_1_3_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_3_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_3"> |
| <span class="md-nav__icon md-icon"></span> |
| cli |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../cli/console/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| console |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../cli/output/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| output |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../conversions/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| conversions |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../exceptions/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| exceptions |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_6" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../expressions/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| expressions |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_6" id="__nav_8_1_6_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_6_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_6"> |
| <span class="md-nav__icon md-icon"></span> |
| expressions |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../expressions/literals/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| literals |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../expressions/parser/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| parser |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../expressions/visitors/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| visitors |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_7" checked> |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| io |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_7" id="__nav_8_1_7_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_7_label" aria-expanded="true"> |
| <label class="md-nav__title" for="__nav_8_1_7"> |
| <span class="md-nav__icon md-icon"></span> |
| io |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../fsspec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| fsspec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active"> |
| |
| <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc"> |
| |
| |
| |
| <label class="md-nav__link md-nav__link--active" for="__toc"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| pyarrow |
| |
| </span> |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <a href="./" class="md-nav__link md-nav__link--active"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| pyarrow |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| |
| <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> |
| |
| |
| |
| |
| <label class="md-nav__title" for="__toc"> |
| <span class="md-nav__icon md-icon"></span> |
| Table of contents |
| </label> |
| <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| ArrowScan |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="ArrowScan"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_record_batches |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_table |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFile |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFile"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __len__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| create |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| exists |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| open |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_input_file |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFileIO |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFileIO"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __getstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __setstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| delete |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_input |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_output |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parse_location |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowSchemaVisitor |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowSchemaVisitor"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| list |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| map |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| primitive |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| schema |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| struct |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| UnsupportedPyArrowTypeException |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| compute_statistics_plan |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| data_file_statistics_from_parquet_metadata |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parquet_path_to_id_mapping |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| visit_pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../manifest/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| manifest |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../partitioning/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| partitioning |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../schema/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| schema |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../serializers/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| serializers |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../table/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| table |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_12" id="__nav_8_1_12_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_12_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_12"> |
| <span class="md-nav__icon md-icon"></span> |
| table |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/inspect/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| inspect |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/locations/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| locations |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/maintenance/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| maintenance |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/metadata/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| metadata |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/name_mapping/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| name_mapping |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/puffin/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| puffin |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/refs/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| refs |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/snapshots/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| snapshots |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/sorting/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| sorting |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/statistics/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| statistics |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12_11" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../table/update/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| update |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_12_11" id="__nav_8_1_12_11_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_12_11_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_12_11"> |
| <span class="md-nav__icon md-icon"></span> |
| update |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/schema/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| schema |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/snapshot/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| snapshot |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/sorting/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| sorting |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/spec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| spec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/statistics/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| statistics |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/validate/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| validate |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/upsert_util/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| upsert_util |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../transforms/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| transforms |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../typedef/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| typedef |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../types/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| types |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_16" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../utils/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| utils |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_16" id="__nav_8_1_16_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_16_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_16"> |
| <span class="md-nav__icon md-icon"></span> |
| utils |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/bin_packing/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| bin_packing |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/concurrent/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| concurrent |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/config/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| config |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/datetime/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| datetime |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/decimal/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| decimal |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/deprecated/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| deprecated |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/lazydict/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| lazydict |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/parsing/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| parsing |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/properties/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| properties |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/schema_conversion/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| schema_conversion |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/singleton/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| singleton |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/truncate/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| truncate |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| </ul> |
| </nav> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" > |
| <div class="md-sidebar__scrollwrap"> |
| <div class="md-sidebar__inner"> |
| |
| |
| <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> |
| |
| |
| |
| |
| <label class="md-nav__title" for="__toc"> |
| <span class="md-nav__icon md-icon"></span> |
| Table of contents |
| </label> |
| <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| ArrowScan |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="ArrowScan"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_record_batches |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_table |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFile |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFile"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __len__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| create |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| exists |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| open |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_input_file |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFileIO |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFileIO"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __getstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __setstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| delete |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_input |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_output |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parse_location |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowSchemaVisitor |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowSchemaVisitor"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| list |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| map |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| primitive |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| schema |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| struct |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| UnsupportedPyArrowTypeException |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| compute_statistics_plan |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| data_file_statistics_from_parquet_metadata |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parquet_path_to_id_mapping |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| visit_pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| |
| </nav> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| <div class="md-content" data-md-component="content"> |
| <article class="md-content__inner md-typeset"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <h1>pyarrow</h1> |
| |
| <div class="doc doc-object doc-module"> |
| |
| |
| |
| <a id="pyiceberg.io.pyarrow"></a> |
| <div class="doc doc-contents first"> |
| |
| <p>FileIO implementation for reading and writing table files that uses pyarrow.fs.</p> |
| <p>This file contains a FileIO implementation that relies on the filesystem interface provided |
| by PyArrow. It relies on PyArrow's <code>from_uri</code> method that infers the correct filesystem |
| type to use. Theoretically, this allows the supported storage types to grow naturally |
| with the pyarrow library.</p> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.ArrowScan" class="doc doc-heading"> |
| <code>ArrowScan</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1621">1621</a></span> |
| <span class="normal"><a href="#__codelineno-0-1622">1622</a></span> |
| <span class="normal"><a href="#__codelineno-0-1623">1623</a></span> |
| <span class="normal"><a href="#__codelineno-0-1624">1624</a></span> |
| <span class="normal"><a href="#__codelineno-0-1625">1625</a></span> |
| <span class="normal"><a href="#__codelineno-0-1626">1626</a></span> |
| <span class="normal"><a href="#__codelineno-0-1627">1627</a></span> |
| <span class="normal"><a href="#__codelineno-0-1628">1628</a></span> |
| <span class="normal"><a href="#__codelineno-0-1629">1629</a></span> |
| <span class="normal"><a href="#__codelineno-0-1630">1630</a></span> |
| <span class="normal"><a href="#__codelineno-0-1631">1631</a></span> |
| <span class="normal"><a href="#__codelineno-0-1632">1632</a></span> |
| <span class="normal"><a href="#__codelineno-0-1633">1633</a></span> |
| <span class="normal"><a href="#__codelineno-0-1634">1634</a></span> |
| <span class="normal"><a href="#__codelineno-0-1635">1635</a></span> |
| <span class="normal"><a href="#__codelineno-0-1636">1636</a></span> |
| <span class="normal"><a href="#__codelineno-0-1637">1637</a></span> |
| <span class="normal"><a href="#__codelineno-0-1638">1638</a></span> |
| <span class="normal"><a href="#__codelineno-0-1639">1639</a></span> |
| <span class="normal"><a href="#__codelineno-0-1640">1640</a></span> |
| <span class="normal"><a href="#__codelineno-0-1641">1641</a></span> |
| <span class="normal"><a href="#__codelineno-0-1642">1642</a></span> |
| <span class="normal"><a href="#__codelineno-0-1643">1643</a></span> |
| <span class="normal"><a href="#__codelineno-0-1644">1644</a></span> |
| <span class="normal"><a href="#__codelineno-0-1645">1645</a></span> |
| <span class="normal"><a href="#__codelineno-0-1646">1646</a></span> |
| <span class="normal"><a href="#__codelineno-0-1647">1647</a></span> |
| <span class="normal"><a href="#__codelineno-0-1648">1648</a></span> |
| <span class="normal"><a href="#__codelineno-0-1649">1649</a></span> |
| <span class="normal"><a href="#__codelineno-0-1650">1650</a></span> |
| <span class="normal"><a href="#__codelineno-0-1651">1651</a></span> |
| <span class="normal"><a href="#__codelineno-0-1652">1652</a></span> |
| <span class="normal"><a href="#__codelineno-0-1653">1653</a></span> |
| <span class="normal"><a href="#__codelineno-0-1654">1654</a></span> |
| <span class="normal"><a href="#__codelineno-0-1655">1655</a></span> |
| <span class="normal"><a href="#__codelineno-0-1656">1656</a></span> |
| <span class="normal"><a href="#__codelineno-0-1657">1657</a></span> |
| <span class="normal"><a href="#__codelineno-0-1658">1658</a></span> |
| <span class="normal"><a href="#__codelineno-0-1659">1659</a></span> |
| <span class="normal"><a href="#__codelineno-0-1660">1660</a></span> |
| <span class="normal"><a href="#__codelineno-0-1661">1661</a></span> |
| <span class="normal"><a href="#__codelineno-0-1662">1662</a></span> |
| <span class="normal"><a href="#__codelineno-0-1663">1663</a></span> |
| <span class="normal"><a href="#__codelineno-0-1664">1664</a></span> |
| <span class="normal"><a href="#__codelineno-0-1665">1665</a></span> |
| <span class="normal"><a href="#__codelineno-0-1666">1666</a></span> |
| <span class="normal"><a href="#__codelineno-0-1667">1667</a></span> |
| <span class="normal"><a href="#__codelineno-0-1668">1668</a></span> |
| <span class="normal"><a href="#__codelineno-0-1669">1669</a></span> |
| <span class="normal"><a href="#__codelineno-0-1670">1670</a></span> |
| <span class="normal"><a href="#__codelineno-0-1671">1671</a></span> |
| <span class="normal"><a href="#__codelineno-0-1672">1672</a></span> |
| <span class="normal"><a href="#__codelineno-0-1673">1673</a></span> |
| <span class="normal"><a href="#__codelineno-0-1674">1674</a></span> |
| <span class="normal"><a href="#__codelineno-0-1675">1675</a></span> |
| <span class="normal"><a href="#__codelineno-0-1676">1676</a></span> |
| <span class="normal"><a href="#__codelineno-0-1677">1677</a></span> |
| <span class="normal"><a href="#__codelineno-0-1678">1678</a></span> |
| <span class="normal"><a href="#__codelineno-0-1679">1679</a></span> |
| <span class="normal"><a href="#__codelineno-0-1680">1680</a></span> |
| <span class="normal"><a href="#__codelineno-0-1681">1681</a></span> |
| <span class="normal"><a href="#__codelineno-0-1682">1682</a></span> |
| <span class="normal"><a href="#__codelineno-0-1683">1683</a></span> |
| <span class="normal"><a href="#__codelineno-0-1684">1684</a></span> |
| <span class="normal"><a href="#__codelineno-0-1685">1685</a></span> |
| <span class="normal"><a href="#__codelineno-0-1686">1686</a></span> |
| <span class="normal"><a href="#__codelineno-0-1687">1687</a></span> |
| <span class="normal"><a href="#__codelineno-0-1688">1688</a></span> |
| <span class="normal"><a href="#__codelineno-0-1689">1689</a></span> |
| <span class="normal"><a href="#__codelineno-0-1690">1690</a></span> |
| <span class="normal"><a href="#__codelineno-0-1691">1691</a></span> |
| <span class="normal"><a href="#__codelineno-0-1692">1692</a></span> |
| <span class="normal"><a href="#__codelineno-0-1693">1693</a></span> |
| <span class="normal"><a href="#__codelineno-0-1694">1694</a></span> |
| <span class="normal"><a href="#__codelineno-0-1695">1695</a></span> |
| <span class="normal"><a href="#__codelineno-0-1696">1696</a></span> |
| <span class="normal"><a href="#__codelineno-0-1697">1697</a></span> |
| <span class="normal"><a href="#__codelineno-0-1698">1698</a></span> |
| <span class="normal"><a href="#__codelineno-0-1699">1699</a></span> |
| <span class="normal"><a href="#__codelineno-0-1700">1700</a></span> |
| <span class="normal"><a href="#__codelineno-0-1701">1701</a></span> |
| <span class="normal"><a href="#__codelineno-0-1702">1702</a></span> |
| <span class="normal"><a href="#__codelineno-0-1703">1703</a></span> |
| <span class="normal"><a href="#__codelineno-0-1704">1704</a></span> |
| <span class="normal"><a href="#__codelineno-0-1705">1705</a></span> |
| <span class="normal"><a href="#__codelineno-0-1706">1706</a></span> |
| <span class="normal"><a href="#__codelineno-0-1707">1707</a></span> |
| <span class="normal"><a href="#__codelineno-0-1708">1708</a></span> |
| <span class="normal"><a href="#__codelineno-0-1709">1709</a></span> |
| <span class="normal"><a href="#__codelineno-0-1710">1710</a></span> |
| <span class="normal"><a href="#__codelineno-0-1711">1711</a></span> |
| <span class="normal"><a href="#__codelineno-0-1712">1712</a></span> |
| <span class="normal"><a href="#__codelineno-0-1713">1713</a></span> |
| <span class="normal"><a href="#__codelineno-0-1714">1714</a></span> |
| <span class="normal"><a href="#__codelineno-0-1715">1715</a></span> |
| <span class="normal"><a href="#__codelineno-0-1716">1716</a></span> |
| <span class="normal"><a href="#__codelineno-0-1717">1717</a></span> |
| <span class="normal"><a href="#__codelineno-0-1718">1718</a></span> |
| <span class="normal"><a href="#__codelineno-0-1719">1719</a></span> |
| <span class="normal"><a href="#__codelineno-0-1720">1720</a></span> |
| <span class="normal"><a href="#__codelineno-0-1721">1721</a></span> |
| <span class="normal"><a href="#__codelineno-0-1722">1722</a></span> |
| <span class="normal"><a href="#__codelineno-0-1723">1723</a></span> |
| <span class="normal"><a href="#__codelineno-0-1724">1724</a></span> |
| <span class="normal"><a href="#__codelineno-0-1725">1725</a></span> |
| <span class="normal"><a href="#__codelineno-0-1726">1726</a></span> |
| <span class="normal"><a href="#__codelineno-0-1727">1727</a></span> |
| <span class="normal"><a href="#__codelineno-0-1728">1728</a></span> |
| <span class="normal"><a href="#__codelineno-0-1729">1729</a></span> |
| <span class="normal"><a href="#__codelineno-0-1730">1730</a></span> |
| <span class="normal"><a href="#__codelineno-0-1731">1731</a></span> |
| <span class="normal"><a href="#__codelineno-0-1732">1732</a></span> |
| <span class="normal"><a href="#__codelineno-0-1733">1733</a></span> |
| <span class="normal"><a href="#__codelineno-0-1734">1734</a></span> |
| <span class="normal"><a href="#__codelineno-0-1735">1735</a></span> |
| <span class="normal"><a href="#__codelineno-0-1736">1736</a></span> |
| <span class="normal"><a href="#__codelineno-0-1737">1737</a></span> |
| <span class="normal"><a href="#__codelineno-0-1738">1738</a></span> |
| <span class="normal"><a href="#__codelineno-0-1739">1739</a></span> |
| <span class="normal"><a href="#__codelineno-0-1740">1740</a></span> |
| <span class="normal"><a href="#__codelineno-0-1741">1741</a></span> |
| <span class="normal"><a href="#__codelineno-0-1742">1742</a></span> |
| <span class="normal"><a href="#__codelineno-0-1743">1743</a></span> |
| <span class="normal"><a href="#__codelineno-0-1744">1744</a></span> |
| <span class="normal"><a href="#__codelineno-0-1745">1745</a></span> |
| <span class="normal"><a href="#__codelineno-0-1746">1746</a></span> |
| <span class="normal"><a href="#__codelineno-0-1747">1747</a></span> |
| <span class="normal"><a href="#__codelineno-0-1748">1748</a></span> |
| <span class="normal"><a href="#__codelineno-0-1749">1749</a></span> |
| <span class="normal"><a href="#__codelineno-0-1750">1750</a></span> |
| <span class="normal"><a href="#__codelineno-0-1751">1751</a></span> |
| <span class="normal"><a href="#__codelineno-0-1752">1752</a></span> |
| <span class="normal"><a href="#__codelineno-0-1753">1753</a></span> |
| <span class="normal"><a href="#__codelineno-0-1754">1754</a></span> |
| <span class="normal"><a href="#__codelineno-0-1755">1755</a></span> |
| <span class="normal"><a href="#__codelineno-0-1756">1756</a></span> |
| <span class="normal"><a href="#__codelineno-0-1757">1757</a></span> |
| <span class="normal"><a href="#__codelineno-0-1758">1758</a></span> |
| <span class="normal"><a href="#__codelineno-0-1759">1759</a></span> |
| <span class="normal"><a href="#__codelineno-0-1760">1760</a></span> |
| <span class="normal"><a href="#__codelineno-0-1761">1761</a></span> |
| <span class="normal"><a href="#__codelineno-0-1762">1762</a></span> |
| <span class="normal"><a href="#__codelineno-0-1763">1763</a></span> |
| <span class="normal"><a href="#__codelineno-0-1764">1764</a></span> |
| <span class="normal"><a href="#__codelineno-0-1765">1765</a></span> |
| <span class="normal"><a href="#__codelineno-0-1766">1766</a></span> |
| <span class="normal"><a href="#__codelineno-0-1767">1767</a></span> |
| <span class="normal"><a href="#__codelineno-0-1768">1768</a></span> |
| <span class="normal"><a href="#__codelineno-0-1769">1769</a></span> |
| <span class="normal"><a href="#__codelineno-0-1770">1770</a></span> |
| <span class="normal"><a href="#__codelineno-0-1771">1771</a></span> |
| <span class="normal"><a href="#__codelineno-0-1772">1772</a></span> |
| <span class="normal"><a href="#__codelineno-0-1773">1773</a></span> |
| <span class="normal"><a href="#__codelineno-0-1774">1774</a></span> |
| <span class="normal"><a href="#__codelineno-0-1775">1775</a></span> |
| <span class="normal"><a href="#__codelineno-0-1776">1776</a></span> |
| <span class="normal"><a href="#__codelineno-0-1777">1777</a></span> |
| <span class="normal"><a href="#__codelineno-0-1778">1778</a></span> |
| <span class="normal"><a href="#__codelineno-0-1779">1779</a></span> |
| <span class="normal"><a href="#__codelineno-0-1780">1780</a></span> |
| <span class="normal"><a href="#__codelineno-0-1781">1781</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1621" name="__codelineno-0-1621"></a><span class="k">class</span><span class="w"> </span><span class="nc">ArrowScan</span><span class="p">:</span> |
| <a id="__codelineno-0-1622" name="__codelineno-0-1622"></a> <span class="n">_table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span> |
| <a id="__codelineno-0-1623" name="__codelineno-0-1623"></a> <span class="n">_io</span><span class="p">:</span> <span class="n">FileIO</span> |
| <a id="__codelineno-0-1624" name="__codelineno-0-1624"></a> <span class="n">_projected_schema</span><span class="p">:</span> <span class="n">Schema</span> |
| <a id="__codelineno-0-1625" name="__codelineno-0-1625"></a> <span class="n">_bound_row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span> |
| <a id="__codelineno-0-1626" name="__codelineno-0-1626"></a> <span class="n">_case_sensitive</span><span class="p">:</span> <span class="nb">bool</span> |
| <a id="__codelineno-0-1627" name="__codelineno-0-1627"></a> <span class="n">_limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> |
| <a id="__codelineno-0-1628" name="__codelineno-0-1628"></a> <span class="n">_downcast_ns_timestamp_to_us</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> |
| <a id="__codelineno-0-1629" name="__codelineno-0-1629"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg Table and create an Arrow construct.</span> |
| <a id="__codelineno-0-1630" name="__codelineno-0-1630"></a> |
| <a id="__codelineno-0-1631" name="__codelineno-0-1631"></a><span class="sd"> Attributes:</span> |
| <a id="__codelineno-0-1632" name="__codelineno-0-1632"></a><span class="sd"> _table_metadata: Current table metadata of the Iceberg table</span> |
| <a id="__codelineno-0-1633" name="__codelineno-0-1633"></a><span class="sd"> _io: PyIceberg FileIO implementation from which to fetch the io properties</span> |
| <a id="__codelineno-0-1634" name="__codelineno-0-1634"></a><span class="sd"> _projected_schema: Iceberg Schema to project onto the data files</span> |
| <a id="__codelineno-0-1635" name="__codelineno-0-1635"></a><span class="sd"> _bound_row_filter: Schema bound row expression to filter the data with</span> |
| <a id="__codelineno-0-1636" name="__codelineno-0-1636"></a><span class="sd"> _case_sensitive: Case sensitivity when looking up column names</span> |
| <a id="__codelineno-0-1637" name="__codelineno-0-1637"></a><span class="sd"> _limit: Limit the number of records.</span> |
| <a id="__codelineno-0-1638" name="__codelineno-0-1638"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1639" name="__codelineno-0-1639"></a> |
| <a id="__codelineno-0-1640" name="__codelineno-0-1640"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span> |
| <a id="__codelineno-0-1641" name="__codelineno-0-1641"></a> <span class="bp">self</span><span class="p">,</span> |
| <a id="__codelineno-0-1642" name="__codelineno-0-1642"></a> <span class="n">table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span><span class="p">,</span> |
| <a id="__codelineno-0-1643" name="__codelineno-0-1643"></a> <span class="n">io</span><span class="p">:</span> <span class="n">FileIO</span><span class="p">,</span> |
| <a id="__codelineno-0-1644" name="__codelineno-0-1644"></a> <span class="n">projected_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> |
| <a id="__codelineno-0-1645" name="__codelineno-0-1645"></a> <span class="n">row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span> |
| <a id="__codelineno-0-1646" name="__codelineno-0-1646"></a> <span class="n">case_sensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <a id="__codelineno-0-1647" name="__codelineno-0-1647"></a> <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <a id="__codelineno-0-1648" name="__codelineno-0-1648"></a> <span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1649" name="__codelineno-0-1649"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span> <span class="o">=</span> <span class="n">table_metadata</span> |
| <a id="__codelineno-0-1650" name="__codelineno-0-1650"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span> <span class="o">=</span> <span class="n">io</span> |
| <a id="__codelineno-0-1651" name="__codelineno-0-1651"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span> <span class="o">=</span> <span class="n">projected_schema</span> |
| <a id="__codelineno-0-1652" name="__codelineno-0-1652"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span> <span class="o">=</span> <span class="n">bind</span><span class="p">(</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">(),</span> <span class="n">row_filter</span><span class="p">,</span> <span class="n">case_sensitive</span><span class="o">=</span><span class="n">case_sensitive</span><span class="p">)</span> |
| <a id="__codelineno-0-1653" name="__codelineno-0-1653"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span> <span class="o">=</span> <span class="n">case_sensitive</span> |
| <a id="__codelineno-0-1654" name="__codelineno-0-1654"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">=</span> <span class="n">limit</span> |
| <a id="__codelineno-0-1655" name="__codelineno-0-1655"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span> <span class="o">=</span> <span class="n">Config</span><span class="p">()</span><span class="o">.</span><span class="n">get_bool</span><span class="p">(</span><span class="n">DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE</span><span class="p">)</span> |
| <a id="__codelineno-0-1656" name="__codelineno-0-1656"></a> |
| <a id="__codelineno-0-1657" name="__codelineno-0-1657"></a> <span class="nd">@property</span> |
| <a id="__codelineno-0-1658" name="__codelineno-0-1658"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_projected_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> |
| <a id="__codelineno-0-1659" name="__codelineno-0-1659"></a><span class="w"> </span><span class="sd">"""Set of field IDs that should be projected from the data files."""</span> |
| <a id="__codelineno-0-1660" name="__codelineno-0-1660"></a> <span class="k">return</span> <span class="p">{</span> |
| <a id="__codelineno-0-1661" name="__codelineno-0-1661"></a> <span class="nb">id</span> |
| <a id="__codelineno-0-1662" name="__codelineno-0-1662"></a> <span class="k">for</span> <span class="nb">id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">field_ids</span> |
| <a id="__codelineno-0-1663" name="__codelineno-0-1663"></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">find_type</span><span class="p">(</span><span class="nb">id</span><span class="p">),</span> <span class="p">(</span><span class="n">MapType</span><span class="p">,</span> <span class="n">ListType</span><span class="p">))</span> |
| <a id="__codelineno-0-1664" name="__codelineno-0-1664"></a> <span class="p">}</span><span class="o">.</span><span class="n">union</span><span class="p">(</span><span class="n">extract_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">))</span> |
| <a id="__codelineno-0-1665" name="__codelineno-0-1665"></a> |
| <a id="__codelineno-0-1666" name="__codelineno-0-1666"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span> |
| <a id="__codelineno-0-1667" name="__codelineno-0-1667"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return a pa.Table.</span> |
| <a id="__codelineno-0-1668" name="__codelineno-0-1668"></a> |
| <a id="__codelineno-0-1669" name="__codelineno-0-1669"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span> |
| <a id="__codelineno-0-1670" name="__codelineno-0-1670"></a><span class="sd"> right columns that match the current table schema. Only data that</span> |
| <a id="__codelineno-0-1671" name="__codelineno-0-1671"></a><span class="sd"> matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1672" name="__codelineno-0-1672"></a> |
| <a id="__codelineno-0-1673" name="__codelineno-0-1673"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1674" name="__codelineno-0-1674"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1675" name="__codelineno-0-1675"></a> |
| <a id="__codelineno-0-1676" name="__codelineno-0-1676"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1677" name="__codelineno-0-1677"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1678" name="__codelineno-0-1678"></a> |
| <a id="__codelineno-0-1679" name="__codelineno-0-1679"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1680" name="__codelineno-0-1680"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1681" name="__codelineno-0-1681"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1682" name="__codelineno-0-1682"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1683" name="__codelineno-0-1683"></a> <span class="n">arrow_schema</span> <span class="o">=</span> <span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> |
| <a id="__codelineno-0-1684" name="__codelineno-0-1684"></a> |
| <a id="__codelineno-0-1685" name="__codelineno-0-1685"></a> <span class="n">batches</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1686" name="__codelineno-0-1686"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-1687" name="__codelineno-0-1687"></a> <span class="n">first_batch</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> |
| <a id="__codelineno-0-1688" name="__codelineno-0-1688"></a> <span class="k">except</span> <span class="ne">StopIteration</span><span class="p">:</span> |
| <a id="__codelineno-0-1689" name="__codelineno-0-1689"></a> <span class="c1"># Empty</span> |
| <a id="__codelineno-0-1690" name="__codelineno-0-1690"></a> <span class="k">return</span> <span class="n">arrow_schema</span><span class="o">.</span><span class="n">empty_table</span><span class="p">()</span> |
| <a id="__codelineno-0-1691" name="__codelineno-0-1691"></a> |
| <a id="__codelineno-0-1692" name="__codelineno-0-1692"></a> <span class="c1"># Note: cannot use pa.Table.from_batches(itertools.chain([first_batch], batches)))</span> |
| <a id="__codelineno-0-1693" name="__codelineno-0-1693"></a> <span class="c1"># as different batches can use different schema's (due to large_ types)</span> |
| <a id="__codelineno-0-1694" name="__codelineno-0-1694"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span> |
| <a id="__codelineno-0-1695" name="__codelineno-0-1695"></a> <span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([</span><span class="n">batch</span><span class="p">])</span> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">([</span><span class="n">first_batch</span><span class="p">],</span> <span class="n">batches</span><span class="p">)),</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">"permissive"</span> |
| <a id="__codelineno-0-1696" name="__codelineno-0-1696"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1697" name="__codelineno-0-1697"></a> |
| <a id="__codelineno-0-1698" name="__codelineno-0-1698"></a> <span class="k">if</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> |
| <a id="__codelineno-0-1699" name="__codelineno-0-1699"></a> <span class="n">deprecation_message</span><span class="p">(</span> |
| <a id="__codelineno-0-1700" name="__codelineno-0-1700"></a> <span class="n">deprecated_in</span><span class="o">=</span><span class="s2">"0.10.0"</span><span class="p">,</span> |
| <a id="__codelineno-0-1701" name="__codelineno-0-1701"></a> <span class="n">removed_in</span><span class="o">=</span><span class="s2">"0.11.0"</span><span class="p">,</span> |
| <a id="__codelineno-0-1702" name="__codelineno-0-1702"></a> <span class="n">help_message</span><span class="o">=</span><span class="sa">f</span><span class="s2">"Property `</span><span class="si">{</span><span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="si">}</span><span class="s2">` will be removed."</span><span class="p">,</span> |
| <a id="__codelineno-0-1703" name="__codelineno-0-1703"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1704" name="__codelineno-0-1704"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">arrow_schema</span><span class="p">)</span> |
| <a id="__codelineno-0-1705" name="__codelineno-0-1705"></a> |
| <a id="__codelineno-0-1706" name="__codelineno-0-1706"></a> <span class="k">return</span> <span class="n">result</span> |
| <a id="__codelineno-0-1707" name="__codelineno-0-1707"></a> |
| <a id="__codelineno-0-1708" name="__codelineno-0-1708"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1709" name="__codelineno-0-1709"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span> |
| <a id="__codelineno-0-1710" name="__codelineno-0-1710"></a> |
| <a id="__codelineno-0-1711" name="__codelineno-0-1711"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span> |
| <a id="__codelineno-0-1712" name="__codelineno-0-1712"></a><span class="sd"> by resolving the right columns that match the current table schema.</span> |
| <a id="__codelineno-0-1713" name="__codelineno-0-1713"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1714" name="__codelineno-0-1714"></a> |
| <a id="__codelineno-0-1715" name="__codelineno-0-1715"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1716" name="__codelineno-0-1716"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1717" name="__codelineno-0-1717"></a> |
| <a id="__codelineno-0-1718" name="__codelineno-0-1718"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1719" name="__codelineno-0-1719"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span> |
| <a id="__codelineno-0-1720" name="__codelineno-0-1720"></a><span class="sd"> Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1721" name="__codelineno-0-1721"></a> |
| <a id="__codelineno-0-1722" name="__codelineno-0-1722"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1723" name="__codelineno-0-1723"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1724" name="__codelineno-0-1724"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1725" name="__codelineno-0-1725"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1726" name="__codelineno-0-1726"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1727" name="__codelineno-0-1727"></a> |
| <a id="__codelineno-0-1728" name="__codelineno-0-1728"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span> |
| <a id="__codelineno-0-1729" name="__codelineno-0-1729"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span> |
| <a id="__codelineno-0-1730" name="__codelineno-0-1730"></a> |
| <a id="__codelineno-0-1731" name="__codelineno-0-1731"></a> <span class="k">def</span><span class="w"> </span><span class="nf">batches_for_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1732" name="__codelineno-0-1732"></a> <span class="c1"># Materialize the iterator here to ensure execution happens within the executor.</span> |
| <a id="__codelineno-0-1733" name="__codelineno-0-1733"></a> <span class="c1"># Otherwise, the iterator would be lazily consumed later (in the main thread),</span> |
| <a id="__codelineno-0-1734" name="__codelineno-0-1734"></a> <span class="c1"># defeating the purpose of using executor.map.</span> |
| <a id="__codelineno-0-1735" name="__codelineno-0-1735"></a> <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span> |
| <a id="__codelineno-0-1736" name="__codelineno-0-1736"></a> |
| <a id="__codelineno-0-1737" name="__codelineno-0-1737"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">False</span> |
| <a id="__codelineno-0-1738" name="__codelineno-0-1738"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">executor</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">batches_for_task</span><span class="p">,</span> <span class="n">tasks</span><span class="p">):</span> |
| <a id="__codelineno-0-1739" name="__codelineno-0-1739"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span> |
| <a id="__codelineno-0-1740" name="__codelineno-0-1740"></a> <span class="n">current_batch_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> |
| <a id="__codelineno-0-1741" name="__codelineno-0-1741"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="n">current_batch_size</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1742" name="__codelineno-0-1742"></a> <span class="k">yield</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span> |
| <a id="__codelineno-0-1743" name="__codelineno-0-1743"></a> |
| <a id="__codelineno-0-1744" name="__codelineno-0-1744"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">True</span> |
| <a id="__codelineno-0-1745" name="__codelineno-0-1745"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1746" name="__codelineno-0-1746"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1747" name="__codelineno-0-1747"></a> <span class="k">yield</span> <span class="n">batch</span> |
| <a id="__codelineno-0-1748" name="__codelineno-0-1748"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="n">current_batch_size</span> |
| <a id="__codelineno-0-1749" name="__codelineno-0-1749"></a> |
| <a id="__codelineno-0-1750" name="__codelineno-0-1750"></a> <span class="k">if</span> <span class="n">limit_reached</span><span class="p">:</span> |
| <a id="__codelineno-0-1751" name="__codelineno-0-1751"></a> <span class="c1"># This break will also cancel all running tasks in the executor</span> |
| <a id="__codelineno-0-1752" name="__codelineno-0-1752"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1753" name="__codelineno-0-1753"></a> |
| <a id="__codelineno-0-1754" name="__codelineno-0-1754"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span> |
| <a id="__codelineno-0-1755" name="__codelineno-0-1755"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">ChunkedArray</span><span class="p">]]</span> |
| <a id="__codelineno-0-1756" name="__codelineno-0-1756"></a> <span class="p">)</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1757" name="__codelineno-0-1757"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span> |
| <a id="__codelineno-0-1758" name="__codelineno-0-1758"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span><span class="p">:</span> |
| <a id="__codelineno-0-1759" name="__codelineno-0-1759"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1760" name="__codelineno-0-1760"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1761" name="__codelineno-0-1761"></a> <span class="n">batches</span> <span class="o">=</span> <span class="n">_task_to_record_batches</span><span class="p">(</span> |
| <a id="__codelineno-0-1762" name="__codelineno-0-1762"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> |
| <a id="__codelineno-0-1763" name="__codelineno-0-1763"></a> <span class="n">task</span><span class="p">,</span> |
| <a id="__codelineno-0-1764" name="__codelineno-0-1764"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">,</span> |
| <a id="__codelineno-0-1765" name="__codelineno-0-1765"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> |
| <a id="__codelineno-0-1766" name="__codelineno-0-1766"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_field_ids</span><span class="p">,</span> |
| <a id="__codelineno-0-1767" name="__codelineno-0-1767"></a> <span class="n">deletes_per_file</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">file_path</span><span class="p">),</span> |
| <a id="__codelineno-0-1768" name="__codelineno-0-1768"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span><span class="p">,</span> |
| <a id="__codelineno-0-1769" name="__codelineno-0-1769"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">name_mapping</span><span class="p">(),</span> |
| <a id="__codelineno-0-1770" name="__codelineno-0-1770"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">specs</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">spec_id</span><span class="p">),</span> |
| <a id="__codelineno-0-1771" name="__codelineno-0-1771"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">format_version</span><span class="p">,</span> |
| <a id="__codelineno-0-1772" name="__codelineno-0-1772"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span><span class="p">,</span> |
| <a id="__codelineno-0-1773" name="__codelineno-0-1773"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1774" name="__codelineno-0-1774"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span> |
| <a id="__codelineno-0-1775" name="__codelineno-0-1775"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1776" name="__codelineno-0-1776"></a> <span class="k">if</span> <span class="n">total_row_count</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1777" name="__codelineno-0-1777"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1778" name="__codelineno-0-1778"></a> <span class="k">elif</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1779" name="__codelineno-0-1779"></a> <span class="n">batch</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span> |
| <a id="__codelineno-0-1780" name="__codelineno-0-1780"></a> <span class="k">yield</span> <span class="n">batch</span> |
| <a id="__codelineno-0-1781" name="__codelineno-0-1781"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Scan the Iceberg table and return an Iterator[pa.RecordBatch].</p> |
| <p>Returns an Iterator of pa.RecordBatch with data from the Iceberg table |
| by resolving the right columns that match the current table schema. |
| Only data that matches the provided row_filter expression is returned.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>tasks</code> |
| </td> |
| <td> |
| <code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="FileScanTask |
| |
| |
| |
| dataclass |
| (pyiceberg.table.FileScanTask)" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>FileScanTasks representing the data files and delete files to read from.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>An Iterator of PyArrow RecordBatches.</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Total number of rows will be capped if specified.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a required field cannot be found in the file</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="ValueError">ValueError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a field type in the file cannot be projected to the schema type</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1708">1708</a></span> |
| <span class="normal"><a href="#__codelineno-0-1709">1709</a></span> |
| <span class="normal"><a href="#__codelineno-0-1710">1710</a></span> |
| <span class="normal"><a href="#__codelineno-0-1711">1711</a></span> |
| <span class="normal"><a href="#__codelineno-0-1712">1712</a></span> |
| <span class="normal"><a href="#__codelineno-0-1713">1713</a></span> |
| <span class="normal"><a href="#__codelineno-0-1714">1714</a></span> |
| <span class="normal"><a href="#__codelineno-0-1715">1715</a></span> |
| <span class="normal"><a href="#__codelineno-0-1716">1716</a></span> |
| <span class="normal"><a href="#__codelineno-0-1717">1717</a></span> |
| <span class="normal"><a href="#__codelineno-0-1718">1718</a></span> |
| <span class="normal"><a href="#__codelineno-0-1719">1719</a></span> |
| <span class="normal"><a href="#__codelineno-0-1720">1720</a></span> |
| <span class="normal"><a href="#__codelineno-0-1721">1721</a></span> |
| <span class="normal"><a href="#__codelineno-0-1722">1722</a></span> |
| <span class="normal"><a href="#__codelineno-0-1723">1723</a></span> |
| <span class="normal"><a href="#__codelineno-0-1724">1724</a></span> |
| <span class="normal"><a href="#__codelineno-0-1725">1725</a></span> |
| <span class="normal"><a href="#__codelineno-0-1726">1726</a></span> |
| <span class="normal"><a href="#__codelineno-0-1727">1727</a></span> |
| <span class="normal"><a href="#__codelineno-0-1728">1728</a></span> |
| <span class="normal"><a href="#__codelineno-0-1729">1729</a></span> |
| <span class="normal"><a href="#__codelineno-0-1730">1730</a></span> |
| <span class="normal"><a href="#__codelineno-0-1731">1731</a></span> |
| <span class="normal"><a href="#__codelineno-0-1732">1732</a></span> |
| <span class="normal"><a href="#__codelineno-0-1733">1733</a></span> |
| <span class="normal"><a href="#__codelineno-0-1734">1734</a></span> |
| <span class="normal"><a href="#__codelineno-0-1735">1735</a></span> |
| <span class="normal"><a href="#__codelineno-0-1736">1736</a></span> |
| <span class="normal"><a href="#__codelineno-0-1737">1737</a></span> |
| <span class="normal"><a href="#__codelineno-0-1738">1738</a></span> |
| <span class="normal"><a href="#__codelineno-0-1739">1739</a></span> |
| <span class="normal"><a href="#__codelineno-0-1740">1740</a></span> |
| <span class="normal"><a href="#__codelineno-0-1741">1741</a></span> |
| <span class="normal"><a href="#__codelineno-0-1742">1742</a></span> |
| <span class="normal"><a href="#__codelineno-0-1743">1743</a></span> |
| <span class="normal"><a href="#__codelineno-0-1744">1744</a></span> |
| <span class="normal"><a href="#__codelineno-0-1745">1745</a></span> |
| <span class="normal"><a href="#__codelineno-0-1746">1746</a></span> |
| <span class="normal"><a href="#__codelineno-0-1747">1747</a></span> |
| <span class="normal"><a href="#__codelineno-0-1748">1748</a></span> |
| <span class="normal"><a href="#__codelineno-0-1749">1749</a></span> |
| <span class="normal"><a href="#__codelineno-0-1750">1750</a></span> |
| <span class="normal"><a href="#__codelineno-0-1751">1751</a></span> |
| <span class="normal"><a href="#__codelineno-0-1752">1752</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1708" name="__codelineno-0-1708"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1709" name="__codelineno-0-1709"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span> |
| <a id="__codelineno-0-1710" name="__codelineno-0-1710"></a> |
| <a id="__codelineno-0-1711" name="__codelineno-0-1711"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span> |
| <a id="__codelineno-0-1712" name="__codelineno-0-1712"></a><span class="sd"> by resolving the right columns that match the current table schema.</span> |
| <a id="__codelineno-0-1713" name="__codelineno-0-1713"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1714" name="__codelineno-0-1714"></a> |
| <a id="__codelineno-0-1715" name="__codelineno-0-1715"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1716" name="__codelineno-0-1716"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1717" name="__codelineno-0-1717"></a> |
| <a id="__codelineno-0-1718" name="__codelineno-0-1718"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1719" name="__codelineno-0-1719"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span> |
| <a id="__codelineno-0-1720" name="__codelineno-0-1720"></a><span class="sd"> Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1721" name="__codelineno-0-1721"></a> |
| <a id="__codelineno-0-1722" name="__codelineno-0-1722"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1723" name="__codelineno-0-1723"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1724" name="__codelineno-0-1724"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1725" name="__codelineno-0-1725"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1726" name="__codelineno-0-1726"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1727" name="__codelineno-0-1727"></a> |
| <a id="__codelineno-0-1728" name="__codelineno-0-1728"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span> |
| <a id="__codelineno-0-1729" name="__codelineno-0-1729"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span> |
| <a id="__codelineno-0-1730" name="__codelineno-0-1730"></a> |
| <a id="__codelineno-0-1731" name="__codelineno-0-1731"></a> <span class="k">def</span><span class="w"> </span><span class="nf">batches_for_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1732" name="__codelineno-0-1732"></a> <span class="c1"># Materialize the iterator here to ensure execution happens within the executor.</span> |
| <a id="__codelineno-0-1733" name="__codelineno-0-1733"></a> <span class="c1"># Otherwise, the iterator would be lazily consumed later (in the main thread),</span> |
| <a id="__codelineno-0-1734" name="__codelineno-0-1734"></a> <span class="c1"># defeating the purpose of using executor.map.</span> |
| <a id="__codelineno-0-1735" name="__codelineno-0-1735"></a> <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span> |
| <a id="__codelineno-0-1736" name="__codelineno-0-1736"></a> |
| <a id="__codelineno-0-1737" name="__codelineno-0-1737"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">False</span> |
| <a id="__codelineno-0-1738" name="__codelineno-0-1738"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">executor</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">batches_for_task</span><span class="p">,</span> <span class="n">tasks</span><span class="p">):</span> |
| <a id="__codelineno-0-1739" name="__codelineno-0-1739"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span> |
| <a id="__codelineno-0-1740" name="__codelineno-0-1740"></a> <span class="n">current_batch_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> |
| <a id="__codelineno-0-1741" name="__codelineno-0-1741"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="n">current_batch_size</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1742" name="__codelineno-0-1742"></a> <span class="k">yield</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span> |
| <a id="__codelineno-0-1743" name="__codelineno-0-1743"></a> |
| <a id="__codelineno-0-1744" name="__codelineno-0-1744"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">True</span> |
| <a id="__codelineno-0-1745" name="__codelineno-0-1745"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1746" name="__codelineno-0-1746"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1747" name="__codelineno-0-1747"></a> <span class="k">yield</span> <span class="n">batch</span> |
| <a id="__codelineno-0-1748" name="__codelineno-0-1748"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="n">current_batch_size</span> |
| <a id="__codelineno-0-1749" name="__codelineno-0-1749"></a> |
| <a id="__codelineno-0-1750" name="__codelineno-0-1750"></a> <span class="k">if</span> <span class="n">limit_reached</span><span class="p">:</span> |
| <a id="__codelineno-0-1751" name="__codelineno-0-1751"></a> <span class="c1"># This break will also cancel all running tasks in the executor</span> |
| <a id="__codelineno-0-1752" name="__codelineno-0-1752"></a> <span class="k">break</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.ArrowScan.to_table" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">to_table</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Scan the Iceberg table and return a pa.Table.</p> |
| <p>Returns a pa.Table with data from the Iceberg table by resolving the |
| right columns that match the current table schema. Only data that |
| matches the provided row_filter expression is returned.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>tasks</code> |
| </td> |
| <td> |
| <code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="FileScanTask |
| |
| |
| |
| dataclass |
| (pyiceberg.table.FileScanTask)" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>FileScanTasks representing the data files and delete files to read from.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="pyarrow.Table">Table</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A PyArrow table. Total number of rows will be capped if specified.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a required field cannot be found in the file</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="ValueError">ValueError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a field type in the file cannot be projected to the schema type</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1666">1666</a></span> |
| <span class="normal"><a href="#__codelineno-0-1667">1667</a></span> |
| <span class="normal"><a href="#__codelineno-0-1668">1668</a></span> |
| <span class="normal"><a href="#__codelineno-0-1669">1669</a></span> |
| <span class="normal"><a href="#__codelineno-0-1670">1670</a></span> |
| <span class="normal"><a href="#__codelineno-0-1671">1671</a></span> |
| <span class="normal"><a href="#__codelineno-0-1672">1672</a></span> |
| <span class="normal"><a href="#__codelineno-0-1673">1673</a></span> |
| <span class="normal"><a href="#__codelineno-0-1674">1674</a></span> |
| <span class="normal"><a href="#__codelineno-0-1675">1675</a></span> |
| <span class="normal"><a href="#__codelineno-0-1676">1676</a></span> |
| <span class="normal"><a href="#__codelineno-0-1677">1677</a></span> |
| <span class="normal"><a href="#__codelineno-0-1678">1678</a></span> |
| <span class="normal"><a href="#__codelineno-0-1679">1679</a></span> |
| <span class="normal"><a href="#__codelineno-0-1680">1680</a></span> |
| <span class="normal"><a href="#__codelineno-0-1681">1681</a></span> |
| <span class="normal"><a href="#__codelineno-0-1682">1682</a></span> |
| <span class="normal"><a href="#__codelineno-0-1683">1683</a></span> |
| <span class="normal"><a href="#__codelineno-0-1684">1684</a></span> |
| <span class="normal"><a href="#__codelineno-0-1685">1685</a></span> |
| <span class="normal"><a href="#__codelineno-0-1686">1686</a></span> |
| <span class="normal"><a href="#__codelineno-0-1687">1687</a></span> |
| <span class="normal"><a href="#__codelineno-0-1688">1688</a></span> |
| <span class="normal"><a href="#__codelineno-0-1689">1689</a></span> |
| <span class="normal"><a href="#__codelineno-0-1690">1690</a></span> |
| <span class="normal"><a href="#__codelineno-0-1691">1691</a></span> |
| <span class="normal"><a href="#__codelineno-0-1692">1692</a></span> |
| <span class="normal"><a href="#__codelineno-0-1693">1693</a></span> |
| <span class="normal"><a href="#__codelineno-0-1694">1694</a></span> |
| <span class="normal"><a href="#__codelineno-0-1695">1695</a></span> |
| <span class="normal"><a href="#__codelineno-0-1696">1696</a></span> |
| <span class="normal"><a href="#__codelineno-0-1697">1697</a></span> |
| <span class="normal"><a href="#__codelineno-0-1698">1698</a></span> |
| <span class="normal"><a href="#__codelineno-0-1699">1699</a></span> |
| <span class="normal"><a href="#__codelineno-0-1700">1700</a></span> |
| <span class="normal"><a href="#__codelineno-0-1701">1701</a></span> |
| <span class="normal"><a href="#__codelineno-0-1702">1702</a></span> |
| <span class="normal"><a href="#__codelineno-0-1703">1703</a></span> |
| <span class="normal"><a href="#__codelineno-0-1704">1704</a></span> |
| <span class="normal"><a href="#__codelineno-0-1705">1705</a></span> |
| <span class="normal"><a href="#__codelineno-0-1706">1706</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1666" name="__codelineno-0-1666"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span> |
| <a id="__codelineno-0-1667" name="__codelineno-0-1667"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return a pa.Table.</span> |
| <a id="__codelineno-0-1668" name="__codelineno-0-1668"></a> |
| <a id="__codelineno-0-1669" name="__codelineno-0-1669"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span> |
| <a id="__codelineno-0-1670" name="__codelineno-0-1670"></a><span class="sd"> right columns that match the current table schema. Only data that</span> |
| <a id="__codelineno-0-1671" name="__codelineno-0-1671"></a><span class="sd"> matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1672" name="__codelineno-0-1672"></a> |
| <a id="__codelineno-0-1673" name="__codelineno-0-1673"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1674" name="__codelineno-0-1674"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1675" name="__codelineno-0-1675"></a> |
| <a id="__codelineno-0-1676" name="__codelineno-0-1676"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1677" name="__codelineno-0-1677"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1678" name="__codelineno-0-1678"></a> |
| <a id="__codelineno-0-1679" name="__codelineno-0-1679"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1680" name="__codelineno-0-1680"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1681" name="__codelineno-0-1681"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1682" name="__codelineno-0-1682"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1683" name="__codelineno-0-1683"></a> <span class="n">arrow_schema</span> <span class="o">=</span> <span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> |
| <a id="__codelineno-0-1684" name="__codelineno-0-1684"></a> |
| <a id="__codelineno-0-1685" name="__codelineno-0-1685"></a> <span class="n">batches</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1686" name="__codelineno-0-1686"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-1687" name="__codelineno-0-1687"></a> <span class="n">first_batch</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> |
| <a id="__codelineno-0-1688" name="__codelineno-0-1688"></a> <span class="k">except</span> <span class="ne">StopIteration</span><span class="p">:</span> |
| <a id="__codelineno-0-1689" name="__codelineno-0-1689"></a> <span class="c1"># Empty</span> |
| <a id="__codelineno-0-1690" name="__codelineno-0-1690"></a> <span class="k">return</span> <span class="n">arrow_schema</span><span class="o">.</span><span class="n">empty_table</span><span class="p">()</span> |
| <a id="__codelineno-0-1691" name="__codelineno-0-1691"></a> |
| <a id="__codelineno-0-1692" name="__codelineno-0-1692"></a> <span class="c1"># Note: cannot use pa.Table.from_batches(itertools.chain([first_batch], batches)))</span> |
| <a id="__codelineno-0-1693" name="__codelineno-0-1693"></a> <span class="c1"># as different batches can use different schema's (due to large_ types)</span> |
| <a id="__codelineno-0-1694" name="__codelineno-0-1694"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span> |
| <a id="__codelineno-0-1695" name="__codelineno-0-1695"></a> <span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([</span><span class="n">batch</span><span class="p">])</span> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">([</span><span class="n">first_batch</span><span class="p">],</span> <span class="n">batches</span><span class="p">)),</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">"permissive"</span> |
| <a id="__codelineno-0-1696" name="__codelineno-0-1696"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1697" name="__codelineno-0-1697"></a> |
| <a id="__codelineno-0-1698" name="__codelineno-0-1698"></a> <span class="k">if</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> |
| <a id="__codelineno-0-1699" name="__codelineno-0-1699"></a> <span class="n">deprecation_message</span><span class="p">(</span> |
| <a id="__codelineno-0-1700" name="__codelineno-0-1700"></a> <span class="n">deprecated_in</span><span class="o">=</span><span class="s2">"0.10.0"</span><span class="p">,</span> |
| <a id="__codelineno-0-1701" name="__codelineno-0-1701"></a> <span class="n">removed_in</span><span class="o">=</span><span class="s2">"0.11.0"</span><span class="p">,</span> |
| <a id="__codelineno-0-1702" name="__codelineno-0-1702"></a> <span class="n">help_message</span><span class="o">=</span><span class="sa">f</span><span class="s2">"Property `</span><span class="si">{</span><span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="si">}</span><span class="s2">` will be removed."</span><span class="p">,</span> |
| <a id="__codelineno-0-1703" name="__codelineno-0-1703"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1704" name="__codelineno-0-1704"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">arrow_schema</span><span class="p">)</span> |
| <a id="__codelineno-0-1705" name="__codelineno-0-1705"></a> |
| <a id="__codelineno-0-1706" name="__codelineno-0-1706"></a> <span class="k">return</span> <span class="n">result</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.PyArrowFile" class="doc doc-heading"> |
| <code>PyArrowFile</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><a class="autorefs autorefs-internal" title="InputFile (pyiceberg.io.InputFile)" href="../#pyiceberg.io.InputFile">InputFile</a></code>, <code><a class="autorefs autorefs-internal" title="OutputFile (pyiceberg.io.OutputFile)" href="../#pyiceberg.io.OutputFile">OutputFile</a></code></p> |
| |
| |
| <p>A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="str">str</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A URI or a path to a local file.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Attributes:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.PyArrowFile.location(str)">location(str)</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The URI or path to a local file for a PyArrowFile instance.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Examples:</span></p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.io.pyarrow</span><span class="w"> </span><span class="kn">import</span> <span class="n">PyArrowFile</span> |
| <a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">>>> </span><span class="c1"># input_file = PyArrowFile("s3://foo/bar.txt")</span> |
| <a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">>>> </span><span class="c1"># Read the contents of the PyArrowFile instance</span> |
| <a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">>>> </span><span class="c1"># Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="gp">>>> </span><span class="c1"># file_content = input_file.open().read()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">>>> </span><span class="c1"># output_file = PyArrowFile("s3://baz/qux.txt")</span> |
| <a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">>>> </span><span class="c1"># Write bytes to a file</span> |
| <a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">>>> </span><span class="c1"># Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">>>> </span><span class="c1"># output_file.create().write(b'foobytes')</span> |
| </code></pre></div> |
| |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-256">256</a></span> |
| <span class="normal"><a href="#__codelineno-0-257">257</a></span> |
| <span class="normal"><a href="#__codelineno-0-258">258</a></span> |
| <span class="normal"><a href="#__codelineno-0-259">259</a></span> |
| <span class="normal"><a href="#__codelineno-0-260">260</a></span> |
| <span class="normal"><a href="#__codelineno-0-261">261</a></span> |
| <span class="normal"><a href="#__codelineno-0-262">262</a></span> |
| <span class="normal"><a href="#__codelineno-0-263">263</a></span> |
| <span class="normal"><a href="#__codelineno-0-264">264</a></span> |
| <span class="normal"><a href="#__codelineno-0-265">265</a></span> |
| <span class="normal"><a href="#__codelineno-0-266">266</a></span> |
| <span class="normal"><a href="#__codelineno-0-267">267</a></span> |
| <span class="normal"><a href="#__codelineno-0-268">268</a></span> |
| <span class="normal"><a href="#__codelineno-0-269">269</a></span> |
| <span class="normal"><a href="#__codelineno-0-270">270</a></span> |
| <span class="normal"><a href="#__codelineno-0-271">271</a></span> |
| <span class="normal"><a href="#__codelineno-0-272">272</a></span> |
| <span class="normal"><a href="#__codelineno-0-273">273</a></span> |
| <span class="normal"><a href="#__codelineno-0-274">274</a></span> |
| <span class="normal"><a href="#__codelineno-0-275">275</a></span> |
| <span class="normal"><a href="#__codelineno-0-276">276</a></span> |
| <span class="normal"><a href="#__codelineno-0-277">277</a></span> |
| <span class="normal"><a href="#__codelineno-0-278">278</a></span> |
| <span class="normal"><a href="#__codelineno-0-279">279</a></span> |
| <span class="normal"><a href="#__codelineno-0-280">280</a></span> |
| <span class="normal"><a href="#__codelineno-0-281">281</a></span> |
| <span class="normal"><a href="#__codelineno-0-282">282</a></span> |
| <span class="normal"><a href="#__codelineno-0-283">283</a></span> |
| <span class="normal"><a href="#__codelineno-0-284">284</a></span> |
| <span class="normal"><a href="#__codelineno-0-285">285</a></span> |
| <span class="normal"><a href="#__codelineno-0-286">286</a></span> |
| <span class="normal"><a href="#__codelineno-0-287">287</a></span> |
| <span class="normal"><a href="#__codelineno-0-288">288</a></span> |
| <span class="normal"><a href="#__codelineno-0-289">289</a></span> |
| <span class="normal"><a href="#__codelineno-0-290">290</a></span> |
| <span class="normal"><a href="#__codelineno-0-291">291</a></span> |
| <span class="normal"><a href="#__codelineno-0-292">292</a></span> |
| <span class="normal"><a href="#__codelineno-0-293">293</a></span> |
| <span class="normal"><a href="#__codelineno-0-294">294</a></span> |
| <span class="normal"><a href="#__codelineno-0-295">295</a></span> |
| <span class="normal"><a href="#__codelineno-0-296">296</a></span> |
| <span class="normal"><a href="#__codelineno-0-297">297</a></span> |
| <span class="normal"><a href="#__codelineno-0-298">298</a></span> |
| <span class="normal"><a href="#__codelineno-0-299">299</a></span> |
| <span class="normal"><a href="#__codelineno-0-300">300</a></span> |
| <span class="normal"><a href="#__codelineno-0-301">301</a></span> |
| <span class="normal"><a href="#__codelineno-0-302">302</a></span> |
| <span class="normal"><a href="#__codelineno-0-303">303</a></span> |
| <span class="normal"><a href="#__codelineno-0-304">304</a></span> |
| <span class="normal"><a href="#__codelineno-0-305">305</a></span> |
| <span class="normal"><a href="#__codelineno-0-306">306</a></span> |
| <span class="normal"><a href="#__codelineno-0-307">307</a></span> |
| <span class="normal"><a href="#__codelineno-0-308">308</a></span> |
| <span class="normal"><a href="#__codelineno-0-309">309</a></span> |
| <span class="normal"><a href="#__codelineno-0-310">310</a></span> |
| <span class="normal"><a href="#__codelineno-0-311">311</a></span> |
| <span class="normal"><a href="#__codelineno-0-312">312</a></span> |
| <span class="normal"><a href="#__codelineno-0-313">313</a></span> |
| <span class="normal"><a href="#__codelineno-0-314">314</a></span> |
| <span class="normal"><a href="#__codelineno-0-315">315</a></span> |
| <span class="normal"><a href="#__codelineno-0-316">316</a></span> |
| <span class="normal"><a href="#__codelineno-0-317">317</a></span> |
| <span class="normal"><a href="#__codelineno-0-318">318</a></span> |
| <span class="normal"><a href="#__codelineno-0-319">319</a></span> |
| <span class="normal"><a href="#__codelineno-0-320">320</a></span> |
| <span class="normal"><a href="#__codelineno-0-321">321</a></span> |
| <span class="normal"><a href="#__codelineno-0-322">322</a></span> |
| <span class="normal"><a href="#__codelineno-0-323">323</a></span> |
| <span class="normal"><a href="#__codelineno-0-324">324</a></span> |
| <span class="normal"><a href="#__codelineno-0-325">325</a></span> |
| <span class="normal"><a href="#__codelineno-0-326">326</a></span> |
| <span class="normal"><a href="#__codelineno-0-327">327</a></span> |
| <span class="normal"><a href="#__codelineno-0-328">328</a></span> |
| <span class="normal"><a href="#__codelineno-0-329">329</a></span> |
| <span class="normal"><a href="#__codelineno-0-330">330</a></span> |
| <span class="normal"><a href="#__codelineno-0-331">331</a></span> |
| <span class="normal"><a href="#__codelineno-0-332">332</a></span> |
| <span class="normal"><a href="#__codelineno-0-333">333</a></span> |
| <span class="normal"><a href="#__codelineno-0-334">334</a></span> |
| <span class="normal"><a href="#__codelineno-0-335">335</a></span> |
| <span class="normal"><a href="#__codelineno-0-336">336</a></span> |
| <span class="normal"><a href="#__codelineno-0-337">337</a></span> |
| <span class="normal"><a href="#__codelineno-0-338">338</a></span> |
| <span class="normal"><a href="#__codelineno-0-339">339</a></span> |
| <span class="normal"><a href="#__codelineno-0-340">340</a></span> |
| <span class="normal"><a href="#__codelineno-0-341">341</a></span> |
| <span class="normal"><a href="#__codelineno-0-342">342</a></span> |
| <span class="normal"><a href="#__codelineno-0-343">343</a></span> |
| <span class="normal"><a href="#__codelineno-0-344">344</a></span> |
| <span class="normal"><a href="#__codelineno-0-345">345</a></span> |
| <span class="normal"><a href="#__codelineno-0-346">346</a></span> |
| <span class="normal"><a href="#__codelineno-0-347">347</a></span> |
| <span class="normal"><a href="#__codelineno-0-348">348</a></span> |
| <span class="normal"><a href="#__codelineno-0-349">349</a></span> |
| <span class="normal"><a href="#__codelineno-0-350">350</a></span> |
| <span class="normal"><a href="#__codelineno-0-351">351</a></span> |
| <span class="normal"><a href="#__codelineno-0-352">352</a></span> |
| <span class="normal"><a href="#__codelineno-0-353">353</a></span> |
| <span class="normal"><a href="#__codelineno-0-354">354</a></span> |
| <span class="normal"><a href="#__codelineno-0-355">355</a></span> |
| <span class="normal"><a href="#__codelineno-0-356">356</a></span> |
| <span class="normal"><a href="#__codelineno-0-357">357</a></span> |
| <span class="normal"><a href="#__codelineno-0-358">358</a></span> |
| <span class="normal"><a href="#__codelineno-0-359">359</a></span> |
| <span class="normal"><a href="#__codelineno-0-360">360</a></span> |
| <span class="normal"><a href="#__codelineno-0-361">361</a></span> |
| <span class="normal"><a href="#__codelineno-0-362">362</a></span> |
| <span class="normal"><a href="#__codelineno-0-363">363</a></span> |
| <span class="normal"><a href="#__codelineno-0-364">364</a></span> |
| <span class="normal"><a href="#__codelineno-0-365">365</a></span> |
| <span class="normal"><a href="#__codelineno-0-366">366</a></span> |
| <span class="normal"><a href="#__codelineno-0-367">367</a></span> |
| <span class="normal"><a href="#__codelineno-0-368">368</a></span> |
| <span class="normal"><a href="#__codelineno-0-369">369</a></span> |
| <span class="normal"><a href="#__codelineno-0-370">370</a></span> |
| <span class="normal"><a href="#__codelineno-0-371">371</a></span> |
| <span class="normal"><a href="#__codelineno-0-372">372</a></span> |
| <span class="normal"><a href="#__codelineno-0-373">373</a></span> |
| <span class="normal"><a href="#__codelineno-0-374">374</a></span> |
| <span class="normal"><a href="#__codelineno-0-375">375</a></span> |
| <span class="normal"><a href="#__codelineno-0-376">376</a></span> |
| <span class="normal"><a href="#__codelineno-0-377">377</a></span> |
| <span class="normal"><a href="#__codelineno-0-378">378</a></span> |
| <span class="normal"><a href="#__codelineno-0-379">379</a></span> |
| <span class="normal"><a href="#__codelineno-0-380">380</a></span> |
| <span class="normal"><a href="#__codelineno-0-381">381</a></span> |
| <span class="normal"><a href="#__codelineno-0-382">382</a></span> |
| <span class="normal"><a href="#__codelineno-0-383">383</a></span> |
| <span class="normal"><a href="#__codelineno-0-384">384</a></span> |
| <span class="normal"><a href="#__codelineno-0-385">385</a></span> |
| <span class="normal"><a href="#__codelineno-0-386">386</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-256" name="__codelineno-0-256"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFile</span><span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">):</span> |
| <a id="__codelineno-0-257" name="__codelineno-0-257"></a><span class="w"> </span><span class="sd">"""A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</span> |
| <a id="__codelineno-0-258" name="__codelineno-0-258"></a> |
| <a id="__codelineno-0-259" name="__codelineno-0-259"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-260" name="__codelineno-0-260"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-261" name="__codelineno-0-261"></a> |
| <a id="__codelineno-0-262" name="__codelineno-0-262"></a><span class="sd"> Attributes:</span> |
| <a id="__codelineno-0-263" name="__codelineno-0-263"></a><span class="sd"> location(str): The URI or path to a local file for a PyArrowFile instance.</span> |
| <a id="__codelineno-0-264" name="__codelineno-0-264"></a> |
| <a id="__codelineno-0-265" name="__codelineno-0-265"></a><span class="sd"> Examples:</span> |
| <a id="__codelineno-0-266" name="__codelineno-0-266"></a><span class="sd"> >>> from pyiceberg.io.pyarrow import PyArrowFile</span> |
| <a id="__codelineno-0-267" name="__codelineno-0-267"></a><span class="sd"> >>> # input_file = PyArrowFile("s3://foo/bar.txt")</span> |
| <a id="__codelineno-0-268" name="__codelineno-0-268"></a><span class="sd"> >>> # Read the contents of the PyArrowFile instance</span> |
| <a id="__codelineno-0-269" name="__codelineno-0-269"></a><span class="sd"> >>> # Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-270" name="__codelineno-0-270"></a><span class="sd"> >>> # file_content = input_file.open().read()</span> |
| <a id="__codelineno-0-271" name="__codelineno-0-271"></a> |
| <a id="__codelineno-0-272" name="__codelineno-0-272"></a><span class="sd"> >>> # output_file = PyArrowFile("s3://baz/qux.txt")</span> |
| <a id="__codelineno-0-273" name="__codelineno-0-273"></a><span class="sd"> >>> # Write bytes to a file</span> |
| <a id="__codelineno-0-274" name="__codelineno-0-274"></a><span class="sd"> >>> # Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-275" name="__codelineno-0-275"></a><span class="sd"> >>> # output_file.create().write(b'foobytes')</span> |
| <a id="__codelineno-0-276" name="__codelineno-0-276"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-277" name="__codelineno-0-277"></a> |
| <a id="__codelineno-0-278" name="__codelineno-0-278"></a> <span class="n">_filesystem</span><span class="p">:</span> <span class="n">FileSystem</span> |
| <a id="__codelineno-0-279" name="__codelineno-0-279"></a> <span class="n">_path</span><span class="p">:</span> <span class="nb">str</span> |
| <a id="__codelineno-0-280" name="__codelineno-0-280"></a> <span class="n">_buffer_size</span><span class="p">:</span> <span class="nb">int</span> |
| <a id="__codelineno-0-281" name="__codelineno-0-281"></a> |
| <a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">fs</span><span class="p">:</span> <span class="n">FileSystem</span><span class="p">,</span> <span class="n">buffer_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">ONE_MEGABYTE</span><span class="p">):</span> |
| <a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span> <span class="o">=</span> <span class="n">fs</span> |
| <a id="__codelineno-0-284" name="__codelineno-0-284"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_path</span> <span class="o">=</span> <span class="n">path</span> |
| <a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span> <span class="o">=</span> <span class="n">buffer_size</span> |
| <a id="__codelineno-0-286" name="__codelineno-0-286"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-287" name="__codelineno-0-287"></a> |
| <a id="__codelineno-0-288" name="__codelineno-0-288"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_file_info</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileInfo</span><span class="p">:</span> |
| <a id="__codelineno-0-289" name="__codelineno-0-289"></a><span class="w"> </span><span class="sd">"""Retrieve a pyarrow.fs.FileInfo object for the location.</span> |
| <a id="__codelineno-0-290" name="__codelineno-0-290"></a> |
| <a id="__codelineno-0-291" name="__codelineno-0-291"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-292" name="__codelineno-0-292"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-293" name="__codelineno-0-293"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-295" name="__codelineno-0-295"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-296" name="__codelineno-0-296"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">get_file_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span> |
| <a id="__codelineno-0-297" name="__codelineno-0-297"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-298" name="__codelineno-0-298"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-299" name="__codelineno-0-299"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot get file info, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-300" name="__codelineno-0-300"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-301" name="__codelineno-0-301"></a> |
| <a id="__codelineno-0-302" name="__codelineno-0-302"></a> <span class="k">if</span> <span class="n">file_info</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="n">FileType</span><span class="o">.</span><span class="n">NotFound</span><span class="p">:</span> |
| <a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot get file info, file not found: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-304" name="__codelineno-0-304"></a> <span class="k">return</span> <span class="n">file_info</span> |
| <a id="__codelineno-0-305" name="__codelineno-0-305"></a> |
| <a id="__codelineno-0-306" name="__codelineno-0-306"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <a id="__codelineno-0-307" name="__codelineno-0-307"></a><span class="w"> </span><span class="sd">"""Return the total length of the file, in bytes."""</span> |
| <a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> |
| <a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span> |
| <a id="__codelineno-0-310" name="__codelineno-0-310"></a> |
| <a id="__codelineno-0-311" name="__codelineno-0-311"></a> <span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <a id="__codelineno-0-312" name="__codelineno-0-312"></a><span class="w"> </span><span class="sd">"""Check whether the location exists."""</span> |
| <a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span> |
| <a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">return</span> <span class="kc">True</span> |
| <a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-317" name="__codelineno-0-317"></a> <span class="k">return</span> <span class="kc">False</span> |
| <a id="__codelineno-0-318" name="__codelineno-0-318"></a> |
| <a id="__codelineno-0-319" name="__codelineno-0-319"></a> <span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">InputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-320" name="__codelineno-0-320"></a><span class="w"> </span><span class="sd">"""Open the location using a PyArrow FileSystem inferred from the location.</span> |
| <a id="__codelineno-0-321" name="__codelineno-0-321"></a> |
| <a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-323" name="__codelineno-0-323"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span> |
| <a id="__codelineno-0-324" name="__codelineno-0-324"></a> |
| <a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-326" name="__codelineno-0-326"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span> |
| <a id="__codelineno-0-327" name="__codelineno-0-327"></a> |
| <a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-329" name="__codelineno-0-329"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span> |
| <a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-333" name="__codelineno-0-333"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-334" name="__codelineno-0-334"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span> |
| <a id="__codelineno-0-335" name="__codelineno-0-335"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span> |
| <a id="__codelineno-0-336" name="__codelineno-0-336"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">except</span> <span class="p">(</span><span class="ne">FileNotFoundError</span><span class="p">,</span> <span class="ne">PermissionError</span><span class="p">):</span> |
| <a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">return</span> <span class="n">input_file</span> |
| <a id="__codelineno-0-347" name="__codelineno-0-347"></a> |
| <a id="__codelineno-0-348" name="__codelineno-0-348"></a> <span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">OutputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-349" name="__codelineno-0-349"></a><span class="w"> </span><span class="sd">"""Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</span> |
| <a id="__codelineno-0-350" name="__codelineno-0-350"></a> |
| <a id="__codelineno-0-351" name="__codelineno-0-351"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span> |
| <a id="__codelineno-0-353" name="__codelineno-0-353"></a> |
| <a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span> |
| <a id="__codelineno-0-356" name="__codelineno-0-356"></a> |
| <a id="__codelineno-0-357" name="__codelineno-0-357"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-358" name="__codelineno-0-358"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span> |
| <a id="__codelineno-0-359" name="__codelineno-0-359"></a> |
| <a id="__codelineno-0-360" name="__codelineno-0-360"></a><span class="sd"> Note:</span> |
| <a id="__codelineno-0-361" name="__codelineno-0-361"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span> |
| <a id="__codelineno-0-362" name="__codelineno-0-362"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span> |
| <a id="__codelineno-0-363" name="__codelineno-0-363"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span> |
| <a id="__codelineno-0-364" name="__codelineno-0-364"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span> |
| <a id="__codelineno-0-365" name="__codelineno-0-365"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span> |
| <a id="__codelineno-0-366" name="__codelineno-0-366"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-367" name="__codelineno-0-367"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-368" name="__codelineno-0-368"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> |
| <a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-376" name="__codelineno-0-376"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-377" name="__codelineno-0-377"></a> <span class="k">return</span> <span class="n">output_file</span> |
| <a id="__codelineno-0-378" name="__codelineno-0-378"></a> |
| <a id="__codelineno-0-379" name="__codelineno-0-379"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-380" name="__codelineno-0-380"></a><span class="w"> </span><span class="sd">"""Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span> |
| <a id="__codelineno-0-381" name="__codelineno-0-381"></a> |
| <a id="__codelineno-0-382" name="__codelineno-0-382"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span> |
| <a id="__codelineno-0-383" name="__codelineno-0-383"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span> |
| <a id="__codelineno-0-384" name="__codelineno-0-384"></a><span class="sd"> a copy of the same instance.</span> |
| <a id="__codelineno-0-385" name="__codelineno-0-385"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.__len__" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="fm">__len__</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Return the total length of the file, in bytes.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-306">306</a></span> |
| <span class="normal"><a href="#__codelineno-0-307">307</a></span> |
| <span class="normal"><a href="#__codelineno-0-308">308</a></span> |
| <span class="normal"><a href="#__codelineno-0-309">309</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-306" name="__codelineno-0-306"></a><span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <a id="__codelineno-0-307" name="__codelineno-0-307"></a><span class="w"> </span><span class="sd">"""Return the total length of the file, in bytes."""</span> |
| <a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> |
| <a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.create" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">create</span><span class="p">(</span><span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>overwrite</code> |
| </td> |
| <td> |
| <code><span title="bool">bool</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Whether to overwrite the file if it already exists.</p> |
| </div> |
| </td> |
| <td> |
| <code>False</code> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="OutputStream (pyiceberg.io.OutputStream)" href="../#pyiceberg.io.OutputStream">OutputStream</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="FileExistsError">FileExistsError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file already exists at <code>self.location</code> and <code>overwrite</code> is False.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="note" open> |
| <summary>Note</summary> |
| <p>This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False, |
| a check is first performed to verify that the file does not exist. This is not thread-safe and |
| a possibility does exist that the file can be created by a concurrent process after the existence |
| check yet before the output stream is created. In such a case, the default pyarrow behavior will |
| truncate the contents of the existing file when opening the output stream.</p> |
| </details> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-348">348</a></span> |
| <span class="normal"><a href="#__codelineno-0-349">349</a></span> |
| <span class="normal"><a href="#__codelineno-0-350">350</a></span> |
| <span class="normal"><a href="#__codelineno-0-351">351</a></span> |
| <span class="normal"><a href="#__codelineno-0-352">352</a></span> |
| <span class="normal"><a href="#__codelineno-0-353">353</a></span> |
| <span class="normal"><a href="#__codelineno-0-354">354</a></span> |
| <span class="normal"><a href="#__codelineno-0-355">355</a></span> |
| <span class="normal"><a href="#__codelineno-0-356">356</a></span> |
| <span class="normal"><a href="#__codelineno-0-357">357</a></span> |
| <span class="normal"><a href="#__codelineno-0-358">358</a></span> |
| <span class="normal"><a href="#__codelineno-0-359">359</a></span> |
| <span class="normal"><a href="#__codelineno-0-360">360</a></span> |
| <span class="normal"><a href="#__codelineno-0-361">361</a></span> |
| <span class="normal"><a href="#__codelineno-0-362">362</a></span> |
| <span class="normal"><a href="#__codelineno-0-363">363</a></span> |
| <span class="normal"><a href="#__codelineno-0-364">364</a></span> |
| <span class="normal"><a href="#__codelineno-0-365">365</a></span> |
| <span class="normal"><a href="#__codelineno-0-366">366</a></span> |
| <span class="normal"><a href="#__codelineno-0-367">367</a></span> |
| <span class="normal"><a href="#__codelineno-0-368">368</a></span> |
| <span class="normal"><a href="#__codelineno-0-369">369</a></span> |
| <span class="normal"><a href="#__codelineno-0-370">370</a></span> |
| <span class="normal"><a href="#__codelineno-0-371">371</a></span> |
| <span class="normal"><a href="#__codelineno-0-372">372</a></span> |
| <span class="normal"><a href="#__codelineno-0-373">373</a></span> |
| <span class="normal"><a href="#__codelineno-0-374">374</a></span> |
| <span class="normal"><a href="#__codelineno-0-375">375</a></span> |
| <span class="normal"><a href="#__codelineno-0-376">376</a></span> |
| <span class="normal"><a href="#__codelineno-0-377">377</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-348" name="__codelineno-0-348"></a><span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">OutputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-349" name="__codelineno-0-349"></a><span class="w"> </span><span class="sd">"""Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</span> |
| <a id="__codelineno-0-350" name="__codelineno-0-350"></a> |
| <a id="__codelineno-0-351" name="__codelineno-0-351"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span> |
| <a id="__codelineno-0-353" name="__codelineno-0-353"></a> |
| <a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span> |
| <a id="__codelineno-0-356" name="__codelineno-0-356"></a> |
| <a id="__codelineno-0-357" name="__codelineno-0-357"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-358" name="__codelineno-0-358"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span> |
| <a id="__codelineno-0-359" name="__codelineno-0-359"></a> |
| <a id="__codelineno-0-360" name="__codelineno-0-360"></a><span class="sd"> Note:</span> |
| <a id="__codelineno-0-361" name="__codelineno-0-361"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span> |
| <a id="__codelineno-0-362" name="__codelineno-0-362"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span> |
| <a id="__codelineno-0-363" name="__codelineno-0-363"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span> |
| <a id="__codelineno-0-364" name="__codelineno-0-364"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span> |
| <a id="__codelineno-0-365" name="__codelineno-0-365"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span> |
| <a id="__codelineno-0-366" name="__codelineno-0-366"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-367" name="__codelineno-0-367"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-368" name="__codelineno-0-368"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> |
| <a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-376" name="__codelineno-0-376"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-377" name="__codelineno-0-377"></a> <span class="k">return</span> <span class="n">output_file</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.exists" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">exists</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Check whether the location exists.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-311">311</a></span> |
| <span class="normal"><a href="#__codelineno-0-312">312</a></span> |
| <span class="normal"><a href="#__codelineno-0-313">313</a></span> |
| <span class="normal"><a href="#__codelineno-0-314">314</a></span> |
| <span class="normal"><a href="#__codelineno-0-315">315</a></span> |
| <span class="normal"><a href="#__codelineno-0-316">316</a></span> |
| <span class="normal"><a href="#__codelineno-0-317">317</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-311" name="__codelineno-0-311"></a><span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <a id="__codelineno-0-312" name="__codelineno-0-312"></a><span class="w"> </span><span class="sd">"""Check whether the location exists."""</span> |
| <a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span> |
| <a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">return</span> <span class="kc">True</span> |
| <a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-317" name="__codelineno-0-317"></a> <span class="k">return</span> <span class="kc">False</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.open" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="nb">open</span><span class="p">(</span><span class="n">seekable</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Open the location using a PyArrow FileSystem inferred from the location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>seekable</code> |
| </td> |
| <td> |
| <code><span title="bool">bool</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the stream should support seek, or if it is consumed sequential.</p> |
| </div> |
| </td> |
| <td> |
| <code>True</code> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="InputStream (pyiceberg.io.InputStream)" href="../#pyiceberg.io.InputStream">InputStream</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at <code>self.location</code>.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="FileNotFoundError">FileNotFoundError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file at self.location does not exist.</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="PermissionError">PermissionError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file at self.location cannot be accessed due to a permission error such as |
| an AWS error code 15.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-319">319</a></span> |
| <span class="normal"><a href="#__codelineno-0-320">320</a></span> |
| <span class="normal"><a href="#__codelineno-0-321">321</a></span> |
| <span class="normal"><a href="#__codelineno-0-322">322</a></span> |
| <span class="normal"><a href="#__codelineno-0-323">323</a></span> |
| <span class="normal"><a href="#__codelineno-0-324">324</a></span> |
| <span class="normal"><a href="#__codelineno-0-325">325</a></span> |
| <span class="normal"><a href="#__codelineno-0-326">326</a></span> |
| <span class="normal"><a href="#__codelineno-0-327">327</a></span> |
| <span class="normal"><a href="#__codelineno-0-328">328</a></span> |
| <span class="normal"><a href="#__codelineno-0-329">329</a></span> |
| <span class="normal"><a href="#__codelineno-0-330">330</a></span> |
| <span class="normal"><a href="#__codelineno-0-331">331</a></span> |
| <span class="normal"><a href="#__codelineno-0-332">332</a></span> |
| <span class="normal"><a href="#__codelineno-0-333">333</a></span> |
| <span class="normal"><a href="#__codelineno-0-334">334</a></span> |
| <span class="normal"><a href="#__codelineno-0-335">335</a></span> |
| <span class="normal"><a href="#__codelineno-0-336">336</a></span> |
| <span class="normal"><a href="#__codelineno-0-337">337</a></span> |
| <span class="normal"><a href="#__codelineno-0-338">338</a></span> |
| <span class="normal"><a href="#__codelineno-0-339">339</a></span> |
| <span class="normal"><a href="#__codelineno-0-340">340</a></span> |
| <span class="normal"><a href="#__codelineno-0-341">341</a></span> |
| <span class="normal"><a href="#__codelineno-0-342">342</a></span> |
| <span class="normal"><a href="#__codelineno-0-343">343</a></span> |
| <span class="normal"><a href="#__codelineno-0-344">344</a></span> |
| <span class="normal"><a href="#__codelineno-0-345">345</a></span> |
| <span class="normal"><a href="#__codelineno-0-346">346</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">InputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-320" name="__codelineno-0-320"></a><span class="w"> </span><span class="sd">"""Open the location using a PyArrow FileSystem inferred from the location.</span> |
| <a id="__codelineno-0-321" name="__codelineno-0-321"></a> |
| <a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-323" name="__codelineno-0-323"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span> |
| <a id="__codelineno-0-324" name="__codelineno-0-324"></a> |
| <a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-326" name="__codelineno-0-326"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span> |
| <a id="__codelineno-0-327" name="__codelineno-0-327"></a> |
| <a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-329" name="__codelineno-0-329"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span> |
| <a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-333" name="__codelineno-0-333"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-334" name="__codelineno-0-334"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span> |
| <a id="__codelineno-0-335" name="__codelineno-0-335"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span> |
| <a id="__codelineno-0-336" name="__codelineno-0-336"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">except</span> <span class="p">(</span><span class="ne">FileNotFoundError</span><span class="p">,</span> <span class="ne">PermissionError</span><span class="p">):</span> |
| <a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">return</span> <span class="n">input_file</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">to_input_file</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Return a new PyArrowFile for the location of an existing PyArrowFile instance.</p> |
| <p>This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single |
| PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns |
| a copy of the same instance.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-379">379</a></span> |
| <span class="normal"><a href="#__codelineno-0-380">380</a></span> |
| <span class="normal"><a href="#__codelineno-0-381">381</a></span> |
| <span class="normal"><a href="#__codelineno-0-382">382</a></span> |
| <span class="normal"><a href="#__codelineno-0-383">383</a></span> |
| <span class="normal"><a href="#__codelineno-0-384">384</a></span> |
| <span class="normal"><a href="#__codelineno-0-385">385</a></span> |
| <span class="normal"><a href="#__codelineno-0-386">386</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-379" name="__codelineno-0-379"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-380" name="__codelineno-0-380"></a><span class="w"> </span><span class="sd">"""Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span> |
| <a id="__codelineno-0-381" name="__codelineno-0-381"></a> |
| <a id="__codelineno-0-382" name="__codelineno-0-382"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span> |
| <a id="__codelineno-0-383" name="__codelineno-0-383"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span> |
| <a id="__codelineno-0-384" name="__codelineno-0-384"></a><span class="sd"> a copy of the same instance.</span> |
| <a id="__codelineno-0-385" name="__codelineno-0-385"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.PyArrowFileIO" class="doc doc-heading"> |
| <code>PyArrowFileIO</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><a class="autorefs autorefs-internal" title="FileIO (pyiceberg.io.FileIO)" href="../#pyiceberg.io.FileIO">FileIO</a></code></p> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-389">389</a></span> |
| <span class="normal"><a href="#__codelineno-0-390">390</a></span> |
| <span class="normal"><a href="#__codelineno-0-391">391</a></span> |
| <span class="normal"><a href="#__codelineno-0-392">392</a></span> |
| <span class="normal"><a href="#__codelineno-0-393">393</a></span> |
| <span class="normal"><a href="#__codelineno-0-394">394</a></span> |
| <span class="normal"><a href="#__codelineno-0-395">395</a></span> |
| <span class="normal"><a href="#__codelineno-0-396">396</a></span> |
| <span class="normal"><a href="#__codelineno-0-397">397</a></span> |
| <span class="normal"><a href="#__codelineno-0-398">398</a></span> |
| <span class="normal"><a href="#__codelineno-0-399">399</a></span> |
| <span class="normal"><a href="#__codelineno-0-400">400</a></span> |
| <span class="normal"><a href="#__codelineno-0-401">401</a></span> |
| <span class="normal"><a href="#__codelineno-0-402">402</a></span> |
| <span class="normal"><a href="#__codelineno-0-403">403</a></span> |
| <span class="normal"><a href="#__codelineno-0-404">404</a></span> |
| <span class="normal"><a href="#__codelineno-0-405">405</a></span> |
| <span class="normal"><a href="#__codelineno-0-406">406</a></span> |
| <span class="normal"><a href="#__codelineno-0-407">407</a></span> |
| <span class="normal"><a href="#__codelineno-0-408">408</a></span> |
| <span class="normal"><a href="#__codelineno-0-409">409</a></span> |
| <span class="normal"><a href="#__codelineno-0-410">410</a></span> |
| <span class="normal"><a href="#__codelineno-0-411">411</a></span> |
| <span class="normal"><a href="#__codelineno-0-412">412</a></span> |
| <span class="normal"><a href="#__codelineno-0-413">413</a></span> |
| <span class="normal"><a href="#__codelineno-0-414">414</a></span> |
| <span class="normal"><a href="#__codelineno-0-415">415</a></span> |
| <span class="normal"><a href="#__codelineno-0-416">416</a></span> |
| <span class="normal"><a href="#__codelineno-0-417">417</a></span> |
| <span class="normal"><a href="#__codelineno-0-418">418</a></span> |
| <span class="normal"><a href="#__codelineno-0-419">419</a></span> |
| <span class="normal"><a href="#__codelineno-0-420">420</a></span> |
| <span class="normal"><a href="#__codelineno-0-421">421</a></span> |
| <span class="normal"><a href="#__codelineno-0-422">422</a></span> |
| <span class="normal"><a href="#__codelineno-0-423">423</a></span> |
| <span class="normal"><a href="#__codelineno-0-424">424</a></span> |
| <span class="normal"><a href="#__codelineno-0-425">425</a></span> |
| <span class="normal"><a href="#__codelineno-0-426">426</a></span> |
| <span class="normal"><a href="#__codelineno-0-427">427</a></span> |
| <span class="normal"><a href="#__codelineno-0-428">428</a></span> |
| <span class="normal"><a href="#__codelineno-0-429">429</a></span> |
| <span class="normal"><a href="#__codelineno-0-430">430</a></span> |
| <span class="normal"><a href="#__codelineno-0-431">431</a></span> |
| <span class="normal"><a href="#__codelineno-0-432">432</a></span> |
| <span class="normal"><a href="#__codelineno-0-433">433</a></span> |
| <span class="normal"><a href="#__codelineno-0-434">434</a></span> |
| <span class="normal"><a href="#__codelineno-0-435">435</a></span> |
| <span class="normal"><a href="#__codelineno-0-436">436</a></span> |
| <span class="normal"><a href="#__codelineno-0-437">437</a></span> |
| <span class="normal"><a href="#__codelineno-0-438">438</a></span> |
| <span class="normal"><a href="#__codelineno-0-439">439</a></span> |
| <span class="normal"><a href="#__codelineno-0-440">440</a></span> |
| <span class="normal"><a href="#__codelineno-0-441">441</a></span> |
| <span class="normal"><a href="#__codelineno-0-442">442</a></span> |
| <span class="normal"><a href="#__codelineno-0-443">443</a></span> |
| <span class="normal"><a href="#__codelineno-0-444">444</a></span> |
| <span class="normal"><a href="#__codelineno-0-445">445</a></span> |
| <span class="normal"><a href="#__codelineno-0-446">446</a></span> |
| <span class="normal"><a href="#__codelineno-0-447">447</a></span> |
| <span class="normal"><a href="#__codelineno-0-448">448</a></span> |
| <span class="normal"><a href="#__codelineno-0-449">449</a></span> |
| <span class="normal"><a href="#__codelineno-0-450">450</a></span> |
| <span class="normal"><a href="#__codelineno-0-451">451</a></span> |
| <span class="normal"><a href="#__codelineno-0-452">452</a></span> |
| <span class="normal"><a href="#__codelineno-0-453">453</a></span> |
| <span class="normal"><a href="#__codelineno-0-454">454</a></span> |
| <span class="normal"><a href="#__codelineno-0-455">455</a></span> |
| <span class="normal"><a href="#__codelineno-0-456">456</a></span> |
| <span class="normal"><a href="#__codelineno-0-457">457</a></span> |
| <span class="normal"><a href="#__codelineno-0-458">458</a></span> |
| <span class="normal"><a href="#__codelineno-0-459">459</a></span> |
| <span class="normal"><a href="#__codelineno-0-460">460</a></span> |
| <span class="normal"><a href="#__codelineno-0-461">461</a></span> |
| <span class="normal"><a href="#__codelineno-0-462">462</a></span> |
| <span class="normal"><a href="#__codelineno-0-463">463</a></span> |
| <span class="normal"><a href="#__codelineno-0-464">464</a></span> |
| <span class="normal"><a href="#__codelineno-0-465">465</a></span> |
| <span class="normal"><a href="#__codelineno-0-466">466</a></span> |
| <span class="normal"><a href="#__codelineno-0-467">467</a></span> |
| <span class="normal"><a href="#__codelineno-0-468">468</a></span> |
| <span class="normal"><a href="#__codelineno-0-469">469</a></span> |
| <span class="normal"><a href="#__codelineno-0-470">470</a></span> |
| <span class="normal"><a href="#__codelineno-0-471">471</a></span> |
| <span class="normal"><a href="#__codelineno-0-472">472</a></span> |
| <span class="normal"><a href="#__codelineno-0-473">473</a></span> |
| <span class="normal"><a href="#__codelineno-0-474">474</a></span> |
| <span class="normal"><a href="#__codelineno-0-475">475</a></span> |
| <span class="normal"><a href="#__codelineno-0-476">476</a></span> |
| <span class="normal"><a href="#__codelineno-0-477">477</a></span> |
| <span class="normal"><a href="#__codelineno-0-478">478</a></span> |
| <span class="normal"><a href="#__codelineno-0-479">479</a></span> |
| <span class="normal"><a href="#__codelineno-0-480">480</a></span> |
| <span class="normal"><a href="#__codelineno-0-481">481</a></span> |
| <span class="normal"><a href="#__codelineno-0-482">482</a></span> |
| <span class="normal"><a href="#__codelineno-0-483">483</a></span> |
| <span class="normal"><a href="#__codelineno-0-484">484</a></span> |
| <span class="normal"><a href="#__codelineno-0-485">485</a></span> |
| <span class="normal"><a href="#__codelineno-0-486">486</a></span> |
| <span class="normal"><a href="#__codelineno-0-487">487</a></span> |
| <span class="normal"><a href="#__codelineno-0-488">488</a></span> |
| <span class="normal"><a href="#__codelineno-0-489">489</a></span> |
| <span class="normal"><a href="#__codelineno-0-490">490</a></span> |
| <span class="normal"><a href="#__codelineno-0-491">491</a></span> |
| <span class="normal"><a href="#__codelineno-0-492">492</a></span> |
| <span class="normal"><a href="#__codelineno-0-493">493</a></span> |
| <span class="normal"><a href="#__codelineno-0-494">494</a></span> |
| <span class="normal"><a href="#__codelineno-0-495">495</a></span> |
| <span class="normal"><a href="#__codelineno-0-496">496</a></span> |
| <span class="normal"><a href="#__codelineno-0-497">497</a></span> |
| <span class="normal"><a href="#__codelineno-0-498">498</a></span> |
| <span class="normal"><a href="#__codelineno-0-499">499</a></span> |
| <span class="normal"><a href="#__codelineno-0-500">500</a></span> |
| <span class="normal"><a href="#__codelineno-0-501">501</a></span> |
| <span class="normal"><a href="#__codelineno-0-502">502</a></span> |
| <span class="normal"><a href="#__codelineno-0-503">503</a></span> |
| <span class="normal"><a href="#__codelineno-0-504">504</a></span> |
| <span class="normal"><a href="#__codelineno-0-505">505</a></span> |
| <span class="normal"><a href="#__codelineno-0-506">506</a></span> |
| <span class="normal"><a href="#__codelineno-0-507">507</a></span> |
| <span class="normal"><a href="#__codelineno-0-508">508</a></span> |
| <span class="normal"><a href="#__codelineno-0-509">509</a></span> |
| <span class="normal"><a href="#__codelineno-0-510">510</a></span> |
| <span class="normal"><a href="#__codelineno-0-511">511</a></span> |
| <span class="normal"><a href="#__codelineno-0-512">512</a></span> |
| <span class="normal"><a href="#__codelineno-0-513">513</a></span> |
| <span class="normal"><a href="#__codelineno-0-514">514</a></span> |
| <span class="normal"><a href="#__codelineno-0-515">515</a></span> |
| <span class="normal"><a href="#__codelineno-0-516">516</a></span> |
| <span class="normal"><a href="#__codelineno-0-517">517</a></span> |
| <span class="normal"><a href="#__codelineno-0-518">518</a></span> |
| <span class="normal"><a href="#__codelineno-0-519">519</a></span> |
| <span class="normal"><a href="#__codelineno-0-520">520</a></span> |
| <span class="normal"><a href="#__codelineno-0-521">521</a></span> |
| <span class="normal"><a href="#__codelineno-0-522">522</a></span> |
| <span class="normal"><a href="#__codelineno-0-523">523</a></span> |
| <span class="normal"><a href="#__codelineno-0-524">524</a></span> |
| <span class="normal"><a href="#__codelineno-0-525">525</a></span> |
| <span class="normal"><a href="#__codelineno-0-526">526</a></span> |
| <span class="normal"><a href="#__codelineno-0-527">527</a></span> |
| <span class="normal"><a href="#__codelineno-0-528">528</a></span> |
| <span class="normal"><a href="#__codelineno-0-529">529</a></span> |
| <span class="normal"><a href="#__codelineno-0-530">530</a></span> |
| <span class="normal"><a href="#__codelineno-0-531">531</a></span> |
| <span class="normal"><a href="#__codelineno-0-532">532</a></span> |
| <span class="normal"><a href="#__codelineno-0-533">533</a></span> |
| <span class="normal"><a href="#__codelineno-0-534">534</a></span> |
| <span class="normal"><a href="#__codelineno-0-535">535</a></span> |
| <span class="normal"><a href="#__codelineno-0-536">536</a></span> |
| <span class="normal"><a href="#__codelineno-0-537">537</a></span> |
| <span class="normal"><a href="#__codelineno-0-538">538</a></span> |
| <span class="normal"><a href="#__codelineno-0-539">539</a></span> |
| <span class="normal"><a href="#__codelineno-0-540">540</a></span> |
| <span class="normal"><a href="#__codelineno-0-541">541</a></span> |
| <span class="normal"><a href="#__codelineno-0-542">542</a></span> |
| <span class="normal"><a href="#__codelineno-0-543">543</a></span> |
| <span class="normal"><a href="#__codelineno-0-544">544</a></span> |
| <span class="normal"><a href="#__codelineno-0-545">545</a></span> |
| <span class="normal"><a href="#__codelineno-0-546">546</a></span> |
| <span class="normal"><a href="#__codelineno-0-547">547</a></span> |
| <span class="normal"><a href="#__codelineno-0-548">548</a></span> |
| <span class="normal"><a href="#__codelineno-0-549">549</a></span> |
| <span class="normal"><a href="#__codelineno-0-550">550</a></span> |
| <span class="normal"><a href="#__codelineno-0-551">551</a></span> |
| <span class="normal"><a href="#__codelineno-0-552">552</a></span> |
| <span class="normal"><a href="#__codelineno-0-553">553</a></span> |
| <span class="normal"><a href="#__codelineno-0-554">554</a></span> |
| <span class="normal"><a href="#__codelineno-0-555">555</a></span> |
| <span class="normal"><a href="#__codelineno-0-556">556</a></span> |
| <span class="normal"><a href="#__codelineno-0-557">557</a></span> |
| <span class="normal"><a href="#__codelineno-0-558">558</a></span> |
| <span class="normal"><a href="#__codelineno-0-559">559</a></span> |
| <span class="normal"><a href="#__codelineno-0-560">560</a></span> |
| <span class="normal"><a href="#__codelineno-0-561">561</a></span> |
| <span class="normal"><a href="#__codelineno-0-562">562</a></span> |
| <span class="normal"><a href="#__codelineno-0-563">563</a></span> |
| <span class="normal"><a href="#__codelineno-0-564">564</a></span> |
| <span class="normal"><a href="#__codelineno-0-565">565</a></span> |
| <span class="normal"><a href="#__codelineno-0-566">566</a></span> |
| <span class="normal"><a href="#__codelineno-0-567">567</a></span> |
| <span class="normal"><a href="#__codelineno-0-568">568</a></span> |
| <span class="normal"><a href="#__codelineno-0-569">569</a></span> |
| <span class="normal"><a href="#__codelineno-0-570">570</a></span> |
| <span class="normal"><a href="#__codelineno-0-571">571</a></span> |
| <span class="normal"><a href="#__codelineno-0-572">572</a></span> |
| <span class="normal"><a href="#__codelineno-0-573">573</a></span> |
| <span class="normal"><a href="#__codelineno-0-574">574</a></span> |
| <span class="normal"><a href="#__codelineno-0-575">575</a></span> |
| <span class="normal"><a href="#__codelineno-0-576">576</a></span> |
| <span class="normal"><a href="#__codelineno-0-577">577</a></span> |
| <span class="normal"><a href="#__codelineno-0-578">578</a></span> |
| <span class="normal"><a href="#__codelineno-0-579">579</a></span> |
| <span class="normal"><a href="#__codelineno-0-580">580</a></span> |
| <span class="normal"><a href="#__codelineno-0-581">581</a></span> |
| <span class="normal"><a href="#__codelineno-0-582">582</a></span> |
| <span class="normal"><a href="#__codelineno-0-583">583</a></span> |
| <span class="normal"><a href="#__codelineno-0-584">584</a></span> |
| <span class="normal"><a href="#__codelineno-0-585">585</a></span> |
| <span class="normal"><a href="#__codelineno-0-586">586</a></span> |
| <span class="normal"><a href="#__codelineno-0-587">587</a></span> |
| <span class="normal"><a href="#__codelineno-0-588">588</a></span> |
| <span class="normal"><a href="#__codelineno-0-589">589</a></span> |
| <span class="normal"><a href="#__codelineno-0-590">590</a></span> |
| <span class="normal"><a href="#__codelineno-0-591">591</a></span> |
| <span class="normal"><a href="#__codelineno-0-592">592</a></span> |
| <span class="normal"><a href="#__codelineno-0-593">593</a></span> |
| <span class="normal"><a href="#__codelineno-0-594">594</a></span> |
| <span class="normal"><a href="#__codelineno-0-595">595</a></span> |
| <span class="normal"><a href="#__codelineno-0-596">596</a></span> |
| <span class="normal"><a href="#__codelineno-0-597">597</a></span> |
| <span class="normal"><a href="#__codelineno-0-598">598</a></span> |
| <span class="normal"><a href="#__codelineno-0-599">599</a></span> |
| <span class="normal"><a href="#__codelineno-0-600">600</a></span> |
| <span class="normal"><a href="#__codelineno-0-601">601</a></span> |
| <span class="normal"><a href="#__codelineno-0-602">602</a></span> |
| <span class="normal"><a href="#__codelineno-0-603">603</a></span> |
| <span class="normal"><a href="#__codelineno-0-604">604</a></span> |
| <span class="normal"><a href="#__codelineno-0-605">605</a></span> |
| <span class="normal"><a href="#__codelineno-0-606">606</a></span> |
| <span class="normal"><a href="#__codelineno-0-607">607</a></span> |
| <span class="normal"><a href="#__codelineno-0-608">608</a></span> |
| <span class="normal"><a href="#__codelineno-0-609">609</a></span> |
| <span class="normal"><a href="#__codelineno-0-610">610</a></span> |
| <span class="normal"><a href="#__codelineno-0-611">611</a></span> |
| <span class="normal"><a href="#__codelineno-0-612">612</a></span> |
| <span class="normal"><a href="#__codelineno-0-613">613</a></span> |
| <span class="normal"><a href="#__codelineno-0-614">614</a></span> |
| <span class="normal"><a href="#__codelineno-0-615">615</a></span> |
| <span class="normal"><a href="#__codelineno-0-616">616</a></span> |
| <span class="normal"><a href="#__codelineno-0-617">617</a></span> |
| <span class="normal"><a href="#__codelineno-0-618">618</a></span> |
| <span class="normal"><a href="#__codelineno-0-619">619</a></span> |
| <span class="normal"><a href="#__codelineno-0-620">620</a></span> |
| <span class="normal"><a href="#__codelineno-0-621">621</a></span> |
| <span class="normal"><a href="#__codelineno-0-622">622</a></span> |
| <span class="normal"><a href="#__codelineno-0-623">623</a></span> |
| <span class="normal"><a href="#__codelineno-0-624">624</a></span> |
| <span class="normal"><a href="#__codelineno-0-625">625</a></span> |
| <span class="normal"><a href="#__codelineno-0-626">626</a></span> |
| <span class="normal"><a href="#__codelineno-0-627">627</a></span> |
| <span class="normal"><a href="#__codelineno-0-628">628</a></span> |
| <span class="normal"><a href="#__codelineno-0-629">629</a></span> |
| <span class="normal"><a href="#__codelineno-0-630">630</a></span> |
| <span class="normal"><a href="#__codelineno-0-631">631</a></span> |
| <span class="normal"><a href="#__codelineno-0-632">632</a></span> |
| <span class="normal"><a href="#__codelineno-0-633">633</a></span> |
| <span class="normal"><a href="#__codelineno-0-634">634</a></span> |
| <span class="normal"><a href="#__codelineno-0-635">635</a></span> |
| <span class="normal"><a href="#__codelineno-0-636">636</a></span> |
| <span class="normal"><a href="#__codelineno-0-637">637</a></span> |
| <span class="normal"><a href="#__codelineno-0-638">638</a></span> |
| <span class="normal"><a href="#__codelineno-0-639">639</a></span> |
| <span class="normal"><a href="#__codelineno-0-640">640</a></span> |
| <span class="normal"><a href="#__codelineno-0-641">641</a></span> |
| <span class="normal"><a href="#__codelineno-0-642">642</a></span> |
| <span class="normal"><a href="#__codelineno-0-643">643</a></span> |
| <span class="normal"><a href="#__codelineno-0-644">644</a></span> |
| <span class="normal"><a href="#__codelineno-0-645">645</a></span> |
| <span class="normal"><a href="#__codelineno-0-646">646</a></span> |
| <span class="normal"><a href="#__codelineno-0-647">647</a></span> |
| <span class="normal"><a href="#__codelineno-0-648">648</a></span> |
| <span class="normal"><a href="#__codelineno-0-649">649</a></span> |
| <span class="normal"><a href="#__codelineno-0-650">650</a></span> |
| <span class="normal"><a href="#__codelineno-0-651">651</a></span> |
| <span class="normal"><a href="#__codelineno-0-652">652</a></span> |
| <span class="normal"><a href="#__codelineno-0-653">653</a></span> |
| <span class="normal"><a href="#__codelineno-0-654">654</a></span> |
| <span class="normal"><a href="#__codelineno-0-655">655</a></span> |
| <span class="normal"><a href="#__codelineno-0-656">656</a></span> |
| <span class="normal"><a href="#__codelineno-0-657">657</a></span> |
| <span class="normal"><a href="#__codelineno-0-658">658</a></span> |
| <span class="normal"><a href="#__codelineno-0-659">659</a></span> |
| <span class="normal"><a href="#__codelineno-0-660">660</a></span> |
| <span class="normal"><a href="#__codelineno-0-661">661</a></span> |
| <span class="normal"><a href="#__codelineno-0-662">662</a></span> |
| <span class="normal"><a href="#__codelineno-0-663">663</a></span> |
| <span class="normal"><a href="#__codelineno-0-664">664</a></span> |
| <span class="normal"><a href="#__codelineno-0-665">665</a></span> |
| <span class="normal"><a href="#__codelineno-0-666">666</a></span> |
| <span class="normal"><a href="#__codelineno-0-667">667</a></span> |
| <span class="normal"><a href="#__codelineno-0-668">668</a></span> |
| <span class="normal"><a href="#__codelineno-0-669">669</a></span> |
| <span class="normal"><a href="#__codelineno-0-670">670</a></span> |
| <span class="normal"><a href="#__codelineno-0-671">671</a></span> |
| <span class="normal"><a href="#__codelineno-0-672">672</a></span> |
| <span class="normal"><a href="#__codelineno-0-673">673</a></span> |
| <span class="normal"><a href="#__codelineno-0-674">674</a></span> |
| <span class="normal"><a href="#__codelineno-0-675">675</a></span> |
| <span class="normal"><a href="#__codelineno-0-676">676</a></span> |
| <span class="normal"><a href="#__codelineno-0-677">677</a></span> |
| <span class="normal"><a href="#__codelineno-0-678">678</a></span> |
| <span class="normal"><a href="#__codelineno-0-679">679</a></span> |
| <span class="normal"><a href="#__codelineno-0-680">680</a></span> |
| <span class="normal"><a href="#__codelineno-0-681">681</a></span> |
| <span class="normal"><a href="#__codelineno-0-682">682</a></span> |
| <span class="normal"><a href="#__codelineno-0-683">683</a></span> |
| <span class="normal"><a href="#__codelineno-0-684">684</a></span> |
| <span class="normal"><a href="#__codelineno-0-685">685</a></span> |
| <span class="normal"><a href="#__codelineno-0-686">686</a></span> |
| <span class="normal"><a href="#__codelineno-0-687">687</a></span> |
| <span class="normal"><a href="#__codelineno-0-688">688</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-389" name="__codelineno-0-389"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFileIO</span><span class="p">(</span><span class="n">FileIO</span><span class="p">):</span> |
| <a id="__codelineno-0-390" name="__codelineno-0-390"></a> <span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span> |
| <a id="__codelineno-0-391" name="__codelineno-0-391"></a> |
| <a id="__codelineno-0-392" name="__codelineno-0-392"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">):</span> |
| <a id="__codelineno-0-393" name="__codelineno-0-393"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span> |
| <a id="__codelineno-0-394" name="__codelineno-0-394"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">properties</span><span class="o">=</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-395" name="__codelineno-0-395"></a> |
| <a id="__codelineno-0-396" name="__codelineno-0-396"></a> <span class="nd">@staticmethod</span> |
| <a id="__codelineno-0-397" name="__codelineno-0-397"></a> <span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> |
| <a id="__codelineno-0-398" name="__codelineno-0-398"></a><span class="w"> </span><span class="sd">"""Return (scheme, netloc, path) for the given location.</span> |
| <a id="__codelineno-0-399" name="__codelineno-0-399"></a> |
| <a id="__codelineno-0-400" name="__codelineno-0-400"></a><span class="sd"> Uses DEFAULT_SCHEME and DEFAULT_NETLOC if scheme/netloc are missing.</span> |
| <a id="__codelineno-0-401" name="__codelineno-0-401"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-402" name="__codelineno-0-402"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-403" name="__codelineno-0-403"></a> |
| <a id="__codelineno-0-404" name="__codelineno-0-404"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span> |
| <a id="__codelineno-0-405" name="__codelineno-0-405"></a> <span class="n">default_scheme</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"DEFAULT_SCHEME"</span><span class="p">,</span> <span class="s2">"file"</span><span class="p">)</span> |
| <a id="__codelineno-0-406" name="__codelineno-0-406"></a> <span class="n">default_netloc</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"DEFAULT_NETLOC"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span> |
| <a id="__codelineno-0-407" name="__codelineno-0-407"></a> <span class="k">return</span> <span class="n">default_scheme</span><span class="p">,</span> <span class="n">default_netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-408" name="__codelineno-0-408"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"hdfs"</span><span class="p">,</span> <span class="s2">"viewfs"</span><span class="p">):</span> |
| <a id="__codelineno-0-409" name="__codelineno-0-409"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span> |
| <a id="__codelineno-0-410" name="__codelineno-0-410"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-411" name="__codelineno-0-411"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">"</span> |
| <a id="__codelineno-0-412" name="__codelineno-0-412"></a> |
| <a id="__codelineno-0-413" name="__codelineno-0-413"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-414" name="__codelineno-0-414"></a><span class="w"> </span><span class="sd">"""Initialize FileSystem for different scheme."""</span> |
| <a id="__codelineno-0-415" name="__codelineno-0-415"></a> <span class="k">if</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"oss"</span><span class="p">}:</span> |
| <a id="__codelineno-0-416" name="__codelineno-0-416"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_oss_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-417" name="__codelineno-0-417"></a> |
| <a id="__codelineno-0-418" name="__codelineno-0-418"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"s3"</span><span class="p">,</span> <span class="s2">"s3a"</span><span class="p">,</span> <span class="s2">"s3n"</span><span class="p">}:</span> |
| <a id="__codelineno-0-419" name="__codelineno-0-419"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_s3_fs</span><span class="p">(</span><span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-420" name="__codelineno-0-420"></a> |
| <a id="__codelineno-0-421" name="__codelineno-0-421"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"hdfs"</span><span class="p">,</span> <span class="s2">"viewfs"</span><span class="p">}:</span> |
| <a id="__codelineno-0-422" name="__codelineno-0-422"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_hdfs_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-423" name="__codelineno-0-423"></a> |
| <a id="__codelineno-0-424" name="__codelineno-0-424"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"gs"</span><span class="p">,</span> <span class="s2">"gcs"</span><span class="p">}:</span> |
| <a id="__codelineno-0-425" name="__codelineno-0-425"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_gcs_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-426" name="__codelineno-0-426"></a> |
| <a id="__codelineno-0-427" name="__codelineno-0-427"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"abfs"</span><span class="p">,</span> <span class="s2">"abfss"</span><span class="p">,</span> <span class="s2">"wasb"</span><span class="p">,</span> <span class="s2">"wasbs"</span><span class="p">}:</span> |
| <a id="__codelineno-0-428" name="__codelineno-0-428"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_azure_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-429" name="__codelineno-0-429"></a> |
| <a id="__codelineno-0-430" name="__codelineno-0-430"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"file"</span><span class="p">}:</span> |
| <a id="__codelineno-0-431" name="__codelineno-0-431"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_local_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-432" name="__codelineno-0-432"></a> |
| <a id="__codelineno-0-433" name="__codelineno-0-433"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-434" name="__codelineno-0-434"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unrecognized filesystem type in URI: </span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-435" name="__codelineno-0-435"></a> |
| <a id="__codelineno-0-436" name="__codelineno-0-436"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_oss_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-437" name="__codelineno-0-437"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span> |
| <a id="__codelineno-0-438" name="__codelineno-0-438"></a> |
| <a id="__codelineno-0-439" name="__codelineno-0-439"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> |
| <a id="__codelineno-0-440" name="__codelineno-0-440"></a> <span class="s2">"endpoint_override"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span> |
| <a id="__codelineno-0-441" name="__codelineno-0-441"></a> <span class="s2">"access_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span> |
| <a id="__codelineno-0-442" name="__codelineno-0-442"></a> <span class="s2">"secret_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span> |
| <a id="__codelineno-0-443" name="__codelineno-0-443"></a> <span class="s2">"session_token"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span> |
| <a id="__codelineno-0-444" name="__codelineno-0-444"></a> <span class="s2">"region"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">),</span> |
| <a id="__codelineno-0-445" name="__codelineno-0-445"></a> <span class="s2">"force_virtual_addressing"</span><span class="p">:</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">,</span> <span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-0-446" name="__codelineno-0-446"></a> <span class="p">}</span> |
| <a id="__codelineno-0-447" name="__codelineno-0-447"></a> |
| <a id="__codelineno-0-448" name="__codelineno-0-448"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span> |
| <a id="__codelineno-0-449" name="__codelineno-0-449"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"proxy_options"</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span> |
| <a id="__codelineno-0-450" name="__codelineno-0-450"></a> |
| <a id="__codelineno-0-451" name="__codelineno-0-451"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-452" name="__codelineno-0-452"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"connect_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-453" name="__codelineno-0-453"></a> |
| <a id="__codelineno-0-454" name="__codelineno-0-454"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-455" name="__codelineno-0-455"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"request_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-456" name="__codelineno-0-456"></a> |
| <a id="__codelineno-0-457" name="__codelineno-0-457"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span> |
| <a id="__codelineno-0-458" name="__codelineno-0-458"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"role_arn"</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span> |
| <a id="__codelineno-0-459" name="__codelineno-0-459"></a> |
| <a id="__codelineno-0-460" name="__codelineno-0-460"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span> |
| <a id="__codelineno-0-461" name="__codelineno-0-461"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"session_name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span> |
| <a id="__codelineno-0-462" name="__codelineno-0-462"></a> |
| <a id="__codelineno-0-463" name="__codelineno-0-463"></a> <span class="k">if</span> <span class="n">s3_anonymous</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ANONYMOUS</span><span class="p">):</span> |
| <a id="__codelineno-0-464" name="__codelineno-0-464"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"anonymous"</span><span class="p">]</span> <span class="o">=</span> <span class="n">strtobool</span><span class="p">(</span><span class="n">s3_anonymous</span><span class="p">)</span> |
| <a id="__codelineno-0-465" name="__codelineno-0-465"></a> |
| <a id="__codelineno-0-466" name="__codelineno-0-466"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-467" name="__codelineno-0-467"></a> |
| <a id="__codelineno-0-468" name="__codelineno-0-468"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_s3_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-469" name="__codelineno-0-469"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span> |
| <a id="__codelineno-0-470" name="__codelineno-0-470"></a> |
| <a id="__codelineno-0-471" name="__codelineno-0-471"></a> <span class="n">provided_region</span> <span class="o">=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">)</span> |
| <a id="__codelineno-0-472" name="__codelineno-0-472"></a> |
| <a id="__codelineno-0-473" name="__codelineno-0-473"></a> <span class="c1"># Do this when we don't provide the region at all, or when we explicitly enable it</span> |
| <a id="__codelineno-0-474" name="__codelineno-0-474"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_RESOLVE_REGION</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> |
| <a id="__codelineno-0-475" name="__codelineno-0-475"></a> <span class="c1"># Resolve region from netloc(bucket), fallback to user-provided region</span> |
| <a id="__codelineno-0-476" name="__codelineno-0-476"></a> <span class="c1"># Only supported by buckets hosted by S3</span> |
| <a id="__codelineno-0-477" name="__codelineno-0-477"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">_cached_resolve_s3_region</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">netloc</span><span class="p">)</span> <span class="ow">or</span> <span class="n">provided_region</span> |
| <a id="__codelineno-0-478" name="__codelineno-0-478"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">bucket_region</span> <span class="o">!=</span> <span class="n">provided_region</span><span class="p">:</span> |
| <a id="__codelineno-0-479" name="__codelineno-0-479"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span> |
| <a id="__codelineno-0-480" name="__codelineno-0-480"></a> <span class="sa">f</span><span class="s2">"PyArrow FileIO overriding S3 bucket region for bucket </span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">: "</span> |
| <a id="__codelineno-0-481" name="__codelineno-0-481"></a> <span class="sa">f</span><span class="s2">"provided region </span><span class="si">{</span><span class="n">provided_region</span><span class="si">}</span><span class="s2">, actual region </span><span class="si">{</span><span class="n">bucket_region</span><span class="si">}</span><span class="s2">"</span> |
| <a id="__codelineno-0-482" name="__codelineno-0-482"></a> <span class="p">)</span> |
| <a id="__codelineno-0-483" name="__codelineno-0-483"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-484" name="__codelineno-0-484"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">provided_region</span> |
| <a id="__codelineno-0-485" name="__codelineno-0-485"></a> |
| <a id="__codelineno-0-486" name="__codelineno-0-486"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> |
| <a id="__codelineno-0-487" name="__codelineno-0-487"></a> <span class="s2">"endpoint_override"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span> |
| <a id="__codelineno-0-488" name="__codelineno-0-488"></a> <span class="s2">"access_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span> |
| <a id="__codelineno-0-489" name="__codelineno-0-489"></a> <span class="s2">"secret_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span> |
| <a id="__codelineno-0-490" name="__codelineno-0-490"></a> <span class="s2">"session_token"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span> |
| <a id="__codelineno-0-491" name="__codelineno-0-491"></a> <span class="s2">"region"</span><span class="p">:</span> <span class="n">bucket_region</span><span class="p">,</span> |
| <a id="__codelineno-0-492" name="__codelineno-0-492"></a> <span class="p">}</span> |
| <a id="__codelineno-0-493" name="__codelineno-0-493"></a> |
| <a id="__codelineno-0-494" name="__codelineno-0-494"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span> |
| <a id="__codelineno-0-495" name="__codelineno-0-495"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"proxy_options"</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span> |
| <a id="__codelineno-0-496" name="__codelineno-0-496"></a> |
| <a id="__codelineno-0-497" name="__codelineno-0-497"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-498" name="__codelineno-0-498"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"connect_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-499" name="__codelineno-0-499"></a> |
| <a id="__codelineno-0-500" name="__codelineno-0-500"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-501" name="__codelineno-0-501"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"request_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-502" name="__codelineno-0-502"></a> |
| <a id="__codelineno-0-503" name="__codelineno-0-503"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span> |
| <a id="__codelineno-0-504" name="__codelineno-0-504"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"role_arn"</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span> |
| <a id="__codelineno-0-505" name="__codelineno-0-505"></a> |
| <a id="__codelineno-0-506" name="__codelineno-0-506"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span> |
| <a id="__codelineno-0-507" name="__codelineno-0-507"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"session_name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span> |
| <a id="__codelineno-0-508" name="__codelineno-0-508"></a> |
| <a id="__codelineno-0-509" name="__codelineno-0-509"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-510" name="__codelineno-0-510"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"force_virtual_addressing"</span><span class="p">]</span> <span class="o">=</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> |
| <a id="__codelineno-0-511" name="__codelineno-0-511"></a> |
| <a id="__codelineno-0-512" name="__codelineno-0-512"></a> <span class="k">if</span> <span class="p">(</span><span class="n">retry_strategy_impl</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_RETRY_STRATEGY_IMPL</span><span class="p">))</span> <span class="ow">and</span> <span class="p">(</span> |
| <a id="__codelineno-0-513" name="__codelineno-0-513"></a> <span class="n">retry_instance</span> <span class="o">:=</span> <span class="n">_import_retry_strategy</span><span class="p">(</span><span class="n">retry_strategy_impl</span><span class="p">)</span> |
| <a id="__codelineno-0-514" name="__codelineno-0-514"></a> <span class="p">):</span> |
| <a id="__codelineno-0-515" name="__codelineno-0-515"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"retry_strategy"</span><span class="p">]</span> <span class="o">=</span> <span class="n">retry_instance</span> |
| <a id="__codelineno-0-516" name="__codelineno-0-516"></a> |
| <a id="__codelineno-0-517" name="__codelineno-0-517"></a> <span class="k">if</span> <span class="n">s3_anonymous</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ANONYMOUS</span><span class="p">):</span> |
| <a id="__codelineno-0-518" name="__codelineno-0-518"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"anonymous"</span><span class="p">]</span> <span class="o">=</span> <span class="n">strtobool</span><span class="p">(</span><span class="n">s3_anonymous</span><span class="p">)</span> |
| <a id="__codelineno-0-519" name="__codelineno-0-519"></a> |
| <a id="__codelineno-0-520" name="__codelineno-0-520"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-521" name="__codelineno-0-521"></a> |
| <a id="__codelineno-0-522" name="__codelineno-0-522"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_azure_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-523" name="__codelineno-0-523"></a> <span class="c1"># https://arrow.apache.org/docs/python/generated/pyarrow.fs.AzureFileSystem.html</span> |
| <a id="__codelineno-0-524" name="__codelineno-0-524"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">packaging</span><span class="w"> </span><span class="kn">import</span> <span class="n">version</span> |
| <a id="__codelineno-0-525" name="__codelineno-0-525"></a> |
| <a id="__codelineno-0-526" name="__codelineno-0-526"></a> <span class="n">MIN_PYARROW_VERSION_SUPPORTING_AZURE_FS</span> <span class="o">=</span> <span class="s2">"20.0.0"</span> |
| <a id="__codelineno-0-527" name="__codelineno-0-527"></a> <span class="k">if</span> <span class="n">version</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">__version__</span><span class="p">)</span> <span class="o"><</span> <span class="n">version</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">MIN_PYARROW_VERSION_SUPPORTING_AZURE_FS</span><span class="p">):</span> |
| <a id="__codelineno-0-528" name="__codelineno-0-528"></a> <span class="k">raise</span> <span class="ne">ImportError</span><span class="p">(</span> |
| <a id="__codelineno-0-529" name="__codelineno-0-529"></a> <span class="sa">f</span><span class="s2">"pyarrow version >= </span><span class="si">{</span><span class="n">MIN_PYARROW_VERSION_SUPPORTING_AZURE_FS</span><span class="si">}</span><span class="s2"> required for AzureFileSystem support, "</span> |
| <a id="__codelineno-0-530" name="__codelineno-0-530"></a> <span class="sa">f</span><span class="s2">"but found version </span><span class="si">{</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">__version__</span><span class="si">}</span><span class="s2">."</span> |
| <a id="__codelineno-0-531" name="__codelineno-0-531"></a> <span class="p">)</span> |
| <a id="__codelineno-0-532" name="__codelineno-0-532"></a> |
| <a id="__codelineno-0-533" name="__codelineno-0-533"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">AzureFileSystem</span> |
| <a id="__codelineno-0-534" name="__codelineno-0-534"></a> |
| <a id="__codelineno-0-535" name="__codelineno-0-535"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-536" name="__codelineno-0-536"></a> |
| <a id="__codelineno-0-537" name="__codelineno-0-537"></a> <span class="k">if</span> <span class="n">account_name</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_ACCOUNT_NAME</span><span class="p">):</span> |
| <a id="__codelineno-0-538" name="__codelineno-0-538"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"account_name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">account_name</span> |
| <a id="__codelineno-0-539" name="__codelineno-0-539"></a> |
| <a id="__codelineno-0-540" name="__codelineno-0-540"></a> <span class="k">if</span> <span class="n">account_key</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_ACCOUNT_KEY</span><span class="p">):</span> |
| <a id="__codelineno-0-541" name="__codelineno-0-541"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"account_key"</span><span class="p">]</span> <span class="o">=</span> <span class="n">account_key</span> |
| <a id="__codelineno-0-542" name="__codelineno-0-542"></a> |
| <a id="__codelineno-0-543" name="__codelineno-0-543"></a> <span class="k">if</span> <span class="n">blob_storage_authority</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_BLOB_STORAGE_AUTHORITY</span><span class="p">):</span> |
| <a id="__codelineno-0-544" name="__codelineno-0-544"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"blob_storage_authority"</span><span class="p">]</span> <span class="o">=</span> <span class="n">blob_storage_authority</span> |
| <a id="__codelineno-0-545" name="__codelineno-0-545"></a> |
| <a id="__codelineno-0-546" name="__codelineno-0-546"></a> <span class="k">if</span> <span class="n">dfs_storage_authority</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_DFS_STORAGE_AUTHORITY</span><span class="p">):</span> |
| <a id="__codelineno-0-547" name="__codelineno-0-547"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"dfs_storage_authority"</span><span class="p">]</span> <span class="o">=</span> <span class="n">dfs_storage_authority</span> |
| <a id="__codelineno-0-548" name="__codelineno-0-548"></a> |
| <a id="__codelineno-0-549" name="__codelineno-0-549"></a> <span class="k">if</span> <span class="n">blob_storage_scheme</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_BLOB_STORAGE_SCHEME</span><span class="p">):</span> |
| <a id="__codelineno-0-550" name="__codelineno-0-550"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"blob_storage_scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="n">blob_storage_scheme</span> |
| <a id="__codelineno-0-551" name="__codelineno-0-551"></a> |
| <a id="__codelineno-0-552" name="__codelineno-0-552"></a> <span class="k">if</span> <span class="n">dfs_storage_scheme</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_DFS_STORAGE_SCHEME</span><span class="p">):</span> |
| <a id="__codelineno-0-553" name="__codelineno-0-553"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"dfs_storage_scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="n">dfs_storage_scheme</span> |
| <a id="__codelineno-0-554" name="__codelineno-0-554"></a> |
| <a id="__codelineno-0-555" name="__codelineno-0-555"></a> <span class="k">if</span> <span class="n">sas_token</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_SAS_TOKEN</span><span class="p">):</span> |
| <a id="__codelineno-0-556" name="__codelineno-0-556"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"sas_token"</span><span class="p">]</span> <span class="o">=</span> <span class="n">sas_token</span> |
| <a id="__codelineno-0-557" name="__codelineno-0-557"></a> |
| <a id="__codelineno-0-558" name="__codelineno-0-558"></a> <span class="k">if</span> <span class="n">client_id</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_CLIENT_ID</span><span class="p">):</span> |
| <a id="__codelineno-0-559" name="__codelineno-0-559"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"client_id"</span><span class="p">]</span> <span class="o">=</span> <span class="n">client_id</span> |
| <a id="__codelineno-0-560" name="__codelineno-0-560"></a> <span class="k">if</span> <span class="n">client_secret</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_CLIENT_SECRET</span><span class="p">):</span> |
| <a id="__codelineno-0-561" name="__codelineno-0-561"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"client_secret"</span><span class="p">]</span> <span class="o">=</span> <span class="n">client_secret</span> |
| <a id="__codelineno-0-562" name="__codelineno-0-562"></a> <span class="k">if</span> <span class="n">tenant_id</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_TENANT_ID</span><span class="p">):</span> |
| <a id="__codelineno-0-563" name="__codelineno-0-563"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"tenant_id"</span><span class="p">]</span> <span class="o">=</span> <span class="n">tenant_id</span> |
| <a id="__codelineno-0-564" name="__codelineno-0-564"></a> |
| <a id="__codelineno-0-565" name="__codelineno-0-565"></a> <span class="c1"># Validate that all three are provided together for ClientSecretCredential</span> |
| <a id="__codelineno-0-566" name="__codelineno-0-566"></a> <span class="n">credential_keys</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"client_id"</span><span class="p">,</span> <span class="s2">"client_secret"</span><span class="p">,</span> <span class="s2">"tenant_id"</span><span class="p">]</span> |
| <a id="__codelineno-0-567" name="__codelineno-0-567"></a> <span class="n">provided_keys</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">credential_keys</span> <span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">client_kwargs</span><span class="p">]</span> |
| <a id="__codelineno-0-568" name="__codelineno-0-568"></a> <span class="k">if</span> <span class="n">provided_keys</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">provided_keys</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">credential_keys</span><span class="p">):</span> |
| <a id="__codelineno-0-569" name="__codelineno-0-569"></a> <span class="n">missing_keys</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">credential_keys</span> <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">client_kwargs</span><span class="p">]</span> |
| <a id="__codelineno-0-570" name="__codelineno-0-570"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <a id="__codelineno-0-571" name="__codelineno-0-571"></a> <span class="sa">f</span><span class="s2">"client_id, client_secret, and tenant_id must all be provided together "</span> |
| <a id="__codelineno-0-572" name="__codelineno-0-572"></a> <span class="sa">f</span><span class="s2">"to use ClientSecretCredential for Azure authentication. "</span> |
| <a id="__codelineno-0-573" name="__codelineno-0-573"></a> <span class="sa">f</span><span class="s2">"Provided: </span><span class="si">{</span><span class="n">provided_keys</span><span class="si">}</span><span class="s2">, Missing: </span><span class="si">{</span><span class="n">missing_keys</span><span class="si">}</span><span class="s2">"</span> |
| <a id="__codelineno-0-574" name="__codelineno-0-574"></a> <span class="p">)</span> |
| <a id="__codelineno-0-575" name="__codelineno-0-575"></a> |
| <a id="__codelineno-0-576" name="__codelineno-0-576"></a> <span class="k">return</span> <span class="n">AzureFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-577" name="__codelineno-0-577"></a> |
| <a id="__codelineno-0-578" name="__codelineno-0-578"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_hdfs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-579" name="__codelineno-0-579"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">HadoopFileSystem</span> |
| <a id="__codelineno-0-580" name="__codelineno-0-580"></a> |
| <a id="__codelineno-0-581" name="__codelineno-0-581"></a> <span class="n">hdfs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-582" name="__codelineno-0-582"></a> <span class="k">if</span> <span class="n">netloc</span><span class="p">:</span> |
| <a id="__codelineno-0-583" name="__codelineno-0-583"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="o">.</span><span class="n">from_uri</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">://</span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-584" name="__codelineno-0-584"></a> <span class="k">if</span> <span class="n">host</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_HOST</span><span class="p">):</span> |
| <a id="__codelineno-0-585" name="__codelineno-0-585"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"host"</span><span class="p">]</span> <span class="o">=</span> <span class="n">host</span> |
| <a id="__codelineno-0-586" name="__codelineno-0-586"></a> <span class="k">if</span> <span class="n">port</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_PORT</span><span class="p">):</span> |
| <a id="__codelineno-0-587" name="__codelineno-0-587"></a> <span class="c1"># port should be an integer type</span> |
| <a id="__codelineno-0-588" name="__codelineno-0-588"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"port"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">port</span><span class="p">)</span> |
| <a id="__codelineno-0-589" name="__codelineno-0-589"></a> <span class="k">if</span> <span class="n">user</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_USER</span><span class="p">):</span> |
| <a id="__codelineno-0-590" name="__codelineno-0-590"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"user"</span><span class="p">]</span> <span class="o">=</span> <span class="n">user</span> |
| <a id="__codelineno-0-591" name="__codelineno-0-591"></a> <span class="k">if</span> <span class="n">kerb_ticket</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_KERB_TICKET</span><span class="p">):</span> |
| <a id="__codelineno-0-592" name="__codelineno-0-592"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"kerb_ticket"</span><span class="p">]</span> <span class="o">=</span> <span class="n">kerb_ticket</span> |
| <a id="__codelineno-0-593" name="__codelineno-0-593"></a> |
| <a id="__codelineno-0-594" name="__codelineno-0-594"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">hdfs_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-595" name="__codelineno-0-595"></a> |
| <a id="__codelineno-0-596" name="__codelineno-0-596"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_gcs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-597" name="__codelineno-0-597"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">GcsFileSystem</span> |
| <a id="__codelineno-0-598" name="__codelineno-0-598"></a> |
| <a id="__codelineno-0-599" name="__codelineno-0-599"></a> <span class="n">gcs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-600" name="__codelineno-0-600"></a> <span class="k">if</span> <span class="n">access_token</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN</span><span class="p">):</span> |
| <a id="__codelineno-0-601" name="__codelineno-0-601"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"access_token"</span><span class="p">]</span> <span class="o">=</span> <span class="n">access_token</span> |
| <a id="__codelineno-0-602" name="__codelineno-0-602"></a> <span class="k">if</span> <span class="n">expiration</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN_EXPIRES_AT_MS</span><span class="p">):</span> |
| <a id="__codelineno-0-603" name="__codelineno-0-603"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"credential_token_expiration"</span><span class="p">]</span> <span class="o">=</span> <span class="n">millis_to_datetime</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">expiration</span><span class="p">))</span> |
| <a id="__codelineno-0-604" name="__codelineno-0-604"></a> <span class="k">if</span> <span class="n">bucket_location</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_DEFAULT_LOCATION</span><span class="p">):</span> |
| <a id="__codelineno-0-605" name="__codelineno-0-605"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"default_bucket_location"</span><span class="p">]</span> <span class="o">=</span> <span class="n">bucket_location</span> |
| <a id="__codelineno-0-606" name="__codelineno-0-606"></a> <span class="k">if</span> <span class="n">endpoint</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_SERVICE_HOST</span><span class="p">):</span> |
| <a id="__codelineno-0-607" name="__codelineno-0-607"></a> <span class="n">url_parts</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">endpoint</span><span class="p">)</span> |
| <a id="__codelineno-0-608" name="__codelineno-0-608"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">scheme</span> |
| <a id="__codelineno-0-609" name="__codelineno-0-609"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"endpoint_override"</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">netloc</span> |
| <a id="__codelineno-0-610" name="__codelineno-0-610"></a> |
| <a id="__codelineno-0-611" name="__codelineno-0-611"></a> <span class="k">return</span> <span class="n">GcsFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">gcs_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-612" name="__codelineno-0-612"></a> |
| <a id="__codelineno-0-613" name="__codelineno-0-613"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_local_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-614" name="__codelineno-0-614"></a> <span class="k">return</span> <span class="n">PyArrowLocalFileSystem</span><span class="p">()</span> |
| <a id="__codelineno-0-615" name="__codelineno-0-615"></a> |
| <a id="__codelineno-0-616" name="__codelineno-0-616"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-617" name="__codelineno-0-617"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to read bytes from the file at the given location.</span> |
| <a id="__codelineno-0-618" name="__codelineno-0-618"></a> |
| <a id="__codelineno-0-619" name="__codelineno-0-619"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-620" name="__codelineno-0-620"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-621" name="__codelineno-0-621"></a> |
| <a id="__codelineno-0-622" name="__codelineno-0-622"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-623" name="__codelineno-0-623"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-624" name="__codelineno-0-624"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-625" name="__codelineno-0-625"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-626" name="__codelineno-0-626"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-627" name="__codelineno-0-627"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-628" name="__codelineno-0-628"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-629" name="__codelineno-0-629"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-630" name="__codelineno-0-630"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-631" name="__codelineno-0-631"></a> <span class="p">)</span> |
| <a id="__codelineno-0-632" name="__codelineno-0-632"></a> |
| <a id="__codelineno-0-633" name="__codelineno-0-633"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-634" name="__codelineno-0-634"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to write bytes to the file at the given location.</span> |
| <a id="__codelineno-0-635" name="__codelineno-0-635"></a> |
| <a id="__codelineno-0-636" name="__codelineno-0-636"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-637" name="__codelineno-0-637"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-638" name="__codelineno-0-638"></a> |
| <a id="__codelineno-0-639" name="__codelineno-0-639"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-640" name="__codelineno-0-640"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-641" name="__codelineno-0-641"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-642" name="__codelineno-0-642"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-643" name="__codelineno-0-643"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-644" name="__codelineno-0-644"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-645" name="__codelineno-0-645"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-646" name="__codelineno-0-646"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-647" name="__codelineno-0-647"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-648" name="__codelineno-0-648"></a> <span class="p">)</span> |
| <a id="__codelineno-0-649" name="__codelineno-0-649"></a> |
| <a id="__codelineno-0-650" name="__codelineno-0-650"></a> <span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-651" name="__codelineno-0-651"></a><span class="w"> </span><span class="sd">"""Delete the file at the given location.</span> |
| <a id="__codelineno-0-652" name="__codelineno-0-652"></a> |
| <a id="__codelineno-0-653" name="__codelineno-0-653"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-654" name="__codelineno-0-654"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span> |
| <a id="__codelineno-0-655" name="__codelineno-0-655"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span> |
| <a id="__codelineno-0-656" name="__codelineno-0-656"></a> |
| <a id="__codelineno-0-657" name="__codelineno-0-657"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-658" name="__codelineno-0-658"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span> |
| <a id="__codelineno-0-659" name="__codelineno-0-659"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-660" name="__codelineno-0-660"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-661" name="__codelineno-0-661"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-662" name="__codelineno-0-662"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span> |
| <a id="__codelineno-0-663" name="__codelineno-0-663"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-664" name="__codelineno-0-664"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-665" name="__codelineno-0-665"></a> |
| <a id="__codelineno-0-666" name="__codelineno-0-666"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-667" name="__codelineno-0-667"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> |
| <a id="__codelineno-0-668" name="__codelineno-0-668"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-669" name="__codelineno-0-669"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-670" name="__codelineno-0-670"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-671" name="__codelineno-0-671"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-672" name="__codelineno-0-672"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-673" name="__codelineno-0-673"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-674" name="__codelineno-0-674"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-675" name="__codelineno-0-675"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-676" name="__codelineno-0-676"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-677" name="__codelineno-0-677"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-678" name="__codelineno-0-678"></a> |
| <a id="__codelineno-0-679" name="__codelineno-0-679"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> |
| <a id="__codelineno-0-680" name="__codelineno-0-680"></a><span class="w"> </span><span class="sd">"""Create a dictionary of the PyArrowFileIO fields used when pickling."""</span> |
| <a id="__codelineno-0-681" name="__codelineno-0-681"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span> |
| <a id="__codelineno-0-682" name="__codelineno-0-682"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">"fs_by_scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> |
| <a id="__codelineno-0-683" name="__codelineno-0-683"></a> <span class="k">return</span> <span class="n">fileio_copy</span> |
| <a id="__codelineno-0-684" name="__codelineno-0-684"></a> |
| <a id="__codelineno-0-685" name="__codelineno-0-685"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-686" name="__codelineno-0-686"></a><span class="w"> </span><span class="sd">"""Deserialize the state into a PyArrowFileIO instance."""</span> |
| <a id="__codelineno-0-687" name="__codelineno-0-687"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span> |
| <a id="__codelineno-0-688" name="__codelineno-0-688"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">__getstate__</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Create a dictionary of the PyArrowFileIO fields used when pickling.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-679">679</a></span> |
| <span class="normal"><a href="#__codelineno-0-680">680</a></span> |
| <span class="normal"><a href="#__codelineno-0-681">681</a></span> |
| <span class="normal"><a href="#__codelineno-0-682">682</a></span> |
| <span class="normal"><a href="#__codelineno-0-683">683</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-679" name="__codelineno-0-679"></a><span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> |
| <a id="__codelineno-0-680" name="__codelineno-0-680"></a><span class="w"> </span><span class="sd">"""Create a dictionary of the PyArrowFileIO fields used when pickling."""</span> |
| <a id="__codelineno-0-681" name="__codelineno-0-681"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span> |
| <a id="__codelineno-0-682" name="__codelineno-0-682"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">"fs_by_scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> |
| <a id="__codelineno-0-683" name="__codelineno-0-683"></a> <span class="k">return</span> <span class="n">fileio_copy</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">__setstate__</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Deserialize the state into a PyArrowFileIO instance.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-685">685</a></span> |
| <span class="normal"><a href="#__codelineno-0-686">686</a></span> |
| <span class="normal"><a href="#__codelineno-0-687">687</a></span> |
| <span class="normal"><a href="#__codelineno-0-688">688</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-685" name="__codelineno-0-685"></a><span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-686" name="__codelineno-0-686"></a><span class="w"> </span><span class="sd">"""Deserialize the state into a PyArrowFileIO instance."""</span> |
| <a id="__codelineno-0-687" name="__codelineno-0-687"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span> |
| <a id="__codelineno-0-688" name="__codelineno-0-688"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">delete</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Delete the file at the given location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="typing.Union">Union</span>[<span title="str">str</span>, <a class="autorefs autorefs-internal" title="InputFile (pyiceberg.io.InputFile)" href="../#pyiceberg.io.InputFile">InputFile</a>, <a class="autorefs autorefs-internal" title="OutputFile (pyiceberg.io.OutputFile)" href="../#pyiceberg.io.OutputFile">OutputFile</a>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The URI to the file--if an InputFile instance or an OutputFile instance is provided, |
| the location attribute for that instance is used as the location to delete.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="FileNotFoundError">FileNotFoundError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When the file at the provided location does not exist.</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="PermissionError">PermissionError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file at the provided location cannot be accessed due to a permission error such as |
| an AWS error code 15.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-650">650</a></span> |
| <span class="normal"><a href="#__codelineno-0-651">651</a></span> |
| <span class="normal"><a href="#__codelineno-0-652">652</a></span> |
| <span class="normal"><a href="#__codelineno-0-653">653</a></span> |
| <span class="normal"><a href="#__codelineno-0-654">654</a></span> |
| <span class="normal"><a href="#__codelineno-0-655">655</a></span> |
| <span class="normal"><a href="#__codelineno-0-656">656</a></span> |
| <span class="normal"><a href="#__codelineno-0-657">657</a></span> |
| <span class="normal"><a href="#__codelineno-0-658">658</a></span> |
| <span class="normal"><a href="#__codelineno-0-659">659</a></span> |
| <span class="normal"><a href="#__codelineno-0-660">660</a></span> |
| <span class="normal"><a href="#__codelineno-0-661">661</a></span> |
| <span class="normal"><a href="#__codelineno-0-662">662</a></span> |
| <span class="normal"><a href="#__codelineno-0-663">663</a></span> |
| <span class="normal"><a href="#__codelineno-0-664">664</a></span> |
| <span class="normal"><a href="#__codelineno-0-665">665</a></span> |
| <span class="normal"><a href="#__codelineno-0-666">666</a></span> |
| <span class="normal"><a href="#__codelineno-0-667">667</a></span> |
| <span class="normal"><a href="#__codelineno-0-668">668</a></span> |
| <span class="normal"><a href="#__codelineno-0-669">669</a></span> |
| <span class="normal"><a href="#__codelineno-0-670">670</a></span> |
| <span class="normal"><a href="#__codelineno-0-671">671</a></span> |
| <span class="normal"><a href="#__codelineno-0-672">672</a></span> |
| <span class="normal"><a href="#__codelineno-0-673">673</a></span> |
| <span class="normal"><a href="#__codelineno-0-674">674</a></span> |
| <span class="normal"><a href="#__codelineno-0-675">675</a></span> |
| <span class="normal"><a href="#__codelineno-0-676">676</a></span> |
| <span class="normal"><a href="#__codelineno-0-677">677</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-650" name="__codelineno-0-650"></a><span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-651" name="__codelineno-0-651"></a><span class="w"> </span><span class="sd">"""Delete the file at the given location.</span> |
| <a id="__codelineno-0-652" name="__codelineno-0-652"></a> |
| <a id="__codelineno-0-653" name="__codelineno-0-653"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-654" name="__codelineno-0-654"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span> |
| <a id="__codelineno-0-655" name="__codelineno-0-655"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span> |
| <a id="__codelineno-0-656" name="__codelineno-0-656"></a> |
| <a id="__codelineno-0-657" name="__codelineno-0-657"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-658" name="__codelineno-0-658"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span> |
| <a id="__codelineno-0-659" name="__codelineno-0-659"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-660" name="__codelineno-0-660"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-661" name="__codelineno-0-661"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-662" name="__codelineno-0-662"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span> |
| <a id="__codelineno-0-663" name="__codelineno-0-663"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-664" name="__codelineno-0-664"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-665" name="__codelineno-0-665"></a> |
| <a id="__codelineno-0-666" name="__codelineno-0-666"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-667" name="__codelineno-0-667"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> |
| <a id="__codelineno-0-668" name="__codelineno-0-668"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-669" name="__codelineno-0-669"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-670" name="__codelineno-0-670"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-671" name="__codelineno-0-671"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-672" name="__codelineno-0-672"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-673" name="__codelineno-0-673"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-674" name="__codelineno-0-674"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-675" name="__codelineno-0-675"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-676" name="__codelineno-0-676"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-677" name="__codelineno-0-677"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">new_input</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Get a PyArrowFile instance to read bytes from the file at the given location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="str">str</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A URI or a path to a local file.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td><code>PyArrowFile</code></td> <td> |
| <code><a class="autorefs autorefs-internal" title="PyArrowFile (pyiceberg.io.pyarrow.PyArrowFile)" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A PyArrowFile instance for the given location.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-616">616</a></span> |
| <span class="normal"><a href="#__codelineno-0-617">617</a></span> |
| <span class="normal"><a href="#__codelineno-0-618">618</a></span> |
| <span class="normal"><a href="#__codelineno-0-619">619</a></span> |
| <span class="normal"><a href="#__codelineno-0-620">620</a></span> |
| <span class="normal"><a href="#__codelineno-0-621">621</a></span> |
| <span class="normal"><a href="#__codelineno-0-622">622</a></span> |
| <span class="normal"><a href="#__codelineno-0-623">623</a></span> |
| <span class="normal"><a href="#__codelineno-0-624">624</a></span> |
| <span class="normal"><a href="#__codelineno-0-625">625</a></span> |
| <span class="normal"><a href="#__codelineno-0-626">626</a></span> |
| <span class="normal"><a href="#__codelineno-0-627">627</a></span> |
| <span class="normal"><a href="#__codelineno-0-628">628</a></span> |
| <span class="normal"><a href="#__codelineno-0-629">629</a></span> |
| <span class="normal"><a href="#__codelineno-0-630">630</a></span> |
| <span class="normal"><a href="#__codelineno-0-631">631</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-616" name="__codelineno-0-616"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-617" name="__codelineno-0-617"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to read bytes from the file at the given location.</span> |
| <a id="__codelineno-0-618" name="__codelineno-0-618"></a> |
| <a id="__codelineno-0-619" name="__codelineno-0-619"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-620" name="__codelineno-0-620"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-621" name="__codelineno-0-621"></a> |
| <a id="__codelineno-0-622" name="__codelineno-0-622"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-623" name="__codelineno-0-623"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-624" name="__codelineno-0-624"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-625" name="__codelineno-0-625"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-626" name="__codelineno-0-626"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-627" name="__codelineno-0-627"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-628" name="__codelineno-0-628"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-629" name="__codelineno-0-629"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-630" name="__codelineno-0-630"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-631" name="__codelineno-0-631"></a> <span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">new_output</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Get a PyArrowFile instance to write bytes to the file at the given location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="str">str</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A URI or a path to a local file.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td><code>PyArrowFile</code></td> <td> |
| <code><a class="autorefs autorefs-internal" title="PyArrowFile (pyiceberg.io.pyarrow.PyArrowFile)" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A PyArrowFile instance for the given location.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-633">633</a></span> |
| <span class="normal"><a href="#__codelineno-0-634">634</a></span> |
| <span class="normal"><a href="#__codelineno-0-635">635</a></span> |
| <span class="normal"><a href="#__codelineno-0-636">636</a></span> |
| <span class="normal"><a href="#__codelineno-0-637">637</a></span> |
| <span class="normal"><a href="#__codelineno-0-638">638</a></span> |
| <span class="normal"><a href="#__codelineno-0-639">639</a></span> |
| <span class="normal"><a href="#__codelineno-0-640">640</a></span> |
| <span class="normal"><a href="#__codelineno-0-641">641</a></span> |
| <span class="normal"><a href="#__codelineno-0-642">642</a></span> |
| <span class="normal"><a href="#__codelineno-0-643">643</a></span> |
| <span class="normal"><a href="#__codelineno-0-644">644</a></span> |
| <span class="normal"><a href="#__codelineno-0-645">645</a></span> |
| <span class="normal"><a href="#__codelineno-0-646">646</a></span> |
| <span class="normal"><a href="#__codelineno-0-647">647</a></span> |
| <span class="normal"><a href="#__codelineno-0-648">648</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-633" name="__codelineno-0-633"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-634" name="__codelineno-0-634"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to write bytes to the file at the given location.</span> |
| <a id="__codelineno-0-635" name="__codelineno-0-635"></a> |
| <a id="__codelineno-0-636" name="__codelineno-0-636"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-637" name="__codelineno-0-637"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-638" name="__codelineno-0-638"></a> |
| <a id="__codelineno-0-639" name="__codelineno-0-639"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-640" name="__codelineno-0-640"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-641" name="__codelineno-0-641"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-642" name="__codelineno-0-642"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-643" name="__codelineno-0-643"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-644" name="__codelineno-0-644"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-645" name="__codelineno-0-645"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-646" name="__codelineno-0-646"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-647" name="__codelineno-0-647"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-648" name="__codelineno-0-648"></a> <span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="n">properties</span><span class="o">=</span><span class="n">EMPTY_DICT</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Return (scheme, netloc, path) for the given location.</p> |
| <p>Uses DEFAULT_SCHEME and DEFAULT_NETLOC if scheme/netloc are missing.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-396">396</a></span> |
| <span class="normal"><a href="#__codelineno-0-397">397</a></span> |
| <span class="normal"><a href="#__codelineno-0-398">398</a></span> |
| <span class="normal"><a href="#__codelineno-0-399">399</a></span> |
| <span class="normal"><a href="#__codelineno-0-400">400</a></span> |
| <span class="normal"><a href="#__codelineno-0-401">401</a></span> |
| <span class="normal"><a href="#__codelineno-0-402">402</a></span> |
| <span class="normal"><a href="#__codelineno-0-403">403</a></span> |
| <span class="normal"><a href="#__codelineno-0-404">404</a></span> |
| <span class="normal"><a href="#__codelineno-0-405">405</a></span> |
| <span class="normal"><a href="#__codelineno-0-406">406</a></span> |
| <span class="normal"><a href="#__codelineno-0-407">407</a></span> |
| <span class="normal"><a href="#__codelineno-0-408">408</a></span> |
| <span class="normal"><a href="#__codelineno-0-409">409</a></span> |
| <span class="normal"><a href="#__codelineno-0-410">410</a></span> |
| <span class="normal"><a href="#__codelineno-0-411">411</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-396" name="__codelineno-0-396"></a><span class="nd">@staticmethod</span> |
| <a id="__codelineno-0-397" name="__codelineno-0-397"></a><span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> |
| <a id="__codelineno-0-398" name="__codelineno-0-398"></a><span class="w"> </span><span class="sd">"""Return (scheme, netloc, path) for the given location.</span> |
| <a id="__codelineno-0-399" name="__codelineno-0-399"></a> |
| <a id="__codelineno-0-400" name="__codelineno-0-400"></a><span class="sd"> Uses DEFAULT_SCHEME and DEFAULT_NETLOC if scheme/netloc are missing.</span> |
| <a id="__codelineno-0-401" name="__codelineno-0-401"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-402" name="__codelineno-0-402"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-403" name="__codelineno-0-403"></a> |
| <a id="__codelineno-0-404" name="__codelineno-0-404"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span> |
| <a id="__codelineno-0-405" name="__codelineno-0-405"></a> <span class="n">default_scheme</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"DEFAULT_SCHEME"</span><span class="p">,</span> <span class="s2">"file"</span><span class="p">)</span> |
| <a id="__codelineno-0-406" name="__codelineno-0-406"></a> <span class="n">default_netloc</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"DEFAULT_NETLOC"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span> |
| <a id="__codelineno-0-407" name="__codelineno-0-407"></a> <span class="k">return</span> <span class="n">default_scheme</span><span class="p">,</span> <span class="n">default_netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-408" name="__codelineno-0-408"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"hdfs"</span><span class="p">,</span> <span class="s2">"viewfs"</span><span class="p">):</span> |
| <a id="__codelineno-0-409" name="__codelineno-0-409"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span> |
| <a id="__codelineno-0-410" name="__codelineno-0-410"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-411" name="__codelineno-0-411"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">"</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="doc doc-heading"> |
| <code>PyArrowSchemaVisitor</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><span title="typing.Generic">Generic</span>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code>, <code><span title="abc.ABC">ABC</span></code></p> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1204">1204</a></span> |
| <span class="normal"><a href="#__codelineno-0-1205">1205</a></span> |
| <span class="normal"><a href="#__codelineno-0-1206">1206</a></span> |
| <span class="normal"><a href="#__codelineno-0-1207">1207</a></span> |
| <span class="normal"><a href="#__codelineno-0-1208">1208</a></span> |
| <span class="normal"><a href="#__codelineno-0-1209">1209</a></span> |
| <span class="normal"><a href="#__codelineno-0-1210">1210</a></span> |
| <span class="normal"><a href="#__codelineno-0-1211">1211</a></span> |
| <span class="normal"><a href="#__codelineno-0-1212">1212</a></span> |
| <span class="normal"><a href="#__codelineno-0-1213">1213</a></span> |
| <span class="normal"><a href="#__codelineno-0-1214">1214</a></span> |
| <span class="normal"><a href="#__codelineno-0-1215">1215</a></span> |
| <span class="normal"><a href="#__codelineno-0-1216">1216</a></span> |
| <span class="normal"><a href="#__codelineno-0-1217">1217</a></span> |
| <span class="normal"><a href="#__codelineno-0-1218">1218</a></span> |
| <span class="normal"><a href="#__codelineno-0-1219">1219</a></span> |
| <span class="normal"><a href="#__codelineno-0-1220">1220</a></span> |
| <span class="normal"><a href="#__codelineno-0-1221">1221</a></span> |
| <span class="normal"><a href="#__codelineno-0-1222">1222</a></span> |
| <span class="normal"><a href="#__codelineno-0-1223">1223</a></span> |
| <span class="normal"><a href="#__codelineno-0-1224">1224</a></span> |
| <span class="normal"><a href="#__codelineno-0-1225">1225</a></span> |
| <span class="normal"><a href="#__codelineno-0-1226">1226</a></span> |
| <span class="normal"><a href="#__codelineno-0-1227">1227</a></span> |
| <span class="normal"><a href="#__codelineno-0-1228">1228</a></span> |
| <span class="normal"><a href="#__codelineno-0-1229">1229</a></span> |
| <span class="normal"><a href="#__codelineno-0-1230">1230</a></span> |
| <span class="normal"><a href="#__codelineno-0-1231">1231</a></span> |
| <span class="normal"><a href="#__codelineno-0-1232">1232</a></span> |
| <span class="normal"><a href="#__codelineno-0-1233">1233</a></span> |
| <span class="normal"><a href="#__codelineno-0-1234">1234</a></span> |
| <span class="normal"><a href="#__codelineno-0-1235">1235</a></span> |
| <span class="normal"><a href="#__codelineno-0-1236">1236</a></span> |
| <span class="normal"><a href="#__codelineno-0-1237">1237</a></span> |
| <span class="normal"><a href="#__codelineno-0-1238">1238</a></span> |
| <span class="normal"><a href="#__codelineno-0-1239">1239</a></span> |
| <span class="normal"><a href="#__codelineno-0-1240">1240</a></span> |
| <span class="normal"><a href="#__codelineno-0-1241">1241</a></span> |
| <span class="normal"><a href="#__codelineno-0-1242">1242</a></span> |
| <span class="normal"><a href="#__codelineno-0-1243">1243</a></span> |
| <span class="normal"><a href="#__codelineno-0-1244">1244</a></span> |
| <span class="normal"><a href="#__codelineno-0-1245">1245</a></span> |
| <span class="normal"><a href="#__codelineno-0-1246">1246</a></span> |
| <span class="normal"><a href="#__codelineno-0-1247">1247</a></span> |
| <span class="normal"><a href="#__codelineno-0-1248">1248</a></span> |
| <span class="normal"><a href="#__codelineno-0-1249">1249</a></span> |
| <span class="normal"><a href="#__codelineno-0-1250">1250</a></span> |
| <span class="normal"><a href="#__codelineno-0-1251">1251</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1204" name="__codelineno-0-1204"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowSchemaVisitor</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">ABC</span><span class="p">):</span> |
| <a id="__codelineno-0-1205" name="__codelineno-0-1205"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1206" name="__codelineno-0-1206"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a field."""</span> |
| <a id="__codelineno-0-1207" name="__codelineno-0-1207"></a> |
| <a id="__codelineno-0-1208" name="__codelineno-0-1208"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1209" name="__codelineno-0-1209"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a field."""</span> |
| <a id="__codelineno-0-1210" name="__codelineno-0-1210"></a> |
| <a id="__codelineno-0-1211" name="__codelineno-0-1211"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1212" name="__codelineno-0-1212"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting an element within a ListType."""</span> |
| <a id="__codelineno-0-1213" name="__codelineno-0-1213"></a> |
| <a id="__codelineno-0-1214" name="__codelineno-0-1214"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1215" name="__codelineno-0-1215"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting an element within a ListType."""</span> |
| <a id="__codelineno-0-1216" name="__codelineno-0-1216"></a> |
| <a id="__codelineno-0-1217" name="__codelineno-0-1217"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1218" name="__codelineno-0-1218"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a key within a MapType."""</span> |
| <a id="__codelineno-0-1219" name="__codelineno-0-1219"></a> |
| <a id="__codelineno-0-1220" name="__codelineno-0-1220"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1221" name="__codelineno-0-1221"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a key within a MapType."""</span> |
| <a id="__codelineno-0-1222" name="__codelineno-0-1222"></a> |
| <a id="__codelineno-0-1223" name="__codelineno-0-1223"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1224" name="__codelineno-0-1224"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a value within a MapType."""</span> |
| <a id="__codelineno-0-1225" name="__codelineno-0-1225"></a> |
| <a id="__codelineno-0-1226" name="__codelineno-0-1226"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1227" name="__codelineno-0-1227"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a value within a MapType."""</span> |
| <a id="__codelineno-0-1228" name="__codelineno-0-1228"></a> |
| <a id="__codelineno-0-1229" name="__codelineno-0-1229"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1230" name="__codelineno-0-1230"></a> <span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1231" name="__codelineno-0-1231"></a><span class="w"> </span><span class="sd">"""Visit a schema."""</span> |
| <a id="__codelineno-0-1232" name="__codelineno-0-1232"></a> |
| <a id="__codelineno-0-1233" name="__codelineno-0-1233"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1234" name="__codelineno-0-1234"></a> <span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1235" name="__codelineno-0-1235"></a><span class="w"> </span><span class="sd">"""Visit a struct."""</span> |
| <a id="__codelineno-0-1236" name="__codelineno-0-1236"></a> |
| <a id="__codelineno-0-1237" name="__codelineno-0-1237"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1238" name="__codelineno-0-1238"></a> <span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1239" name="__codelineno-0-1239"></a><span class="w"> </span><span class="sd">"""Visit a field."""</span> |
| <a id="__codelineno-0-1240" name="__codelineno-0-1240"></a> |
| <a id="__codelineno-0-1241" name="__codelineno-0-1241"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1242" name="__codelineno-0-1242"></a> <span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1243" name="__codelineno-0-1243"></a><span class="w"> </span><span class="sd">"""Visit a list."""</span> |
| <a id="__codelineno-0-1244" name="__codelineno-0-1244"></a> |
| <a id="__codelineno-0-1245" name="__codelineno-0-1245"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1246" name="__codelineno-0-1246"></a> <span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1247" name="__codelineno-0-1247"></a><span class="w"> </span><span class="sd">"""Visit a map."""</span> |
| <a id="__codelineno-0-1248" name="__codelineno-0-1248"></a> |
| <a id="__codelineno-0-1249" name="__codelineno-0-1249"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1250" name="__codelineno-0-1250"></a> <span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1251" name="__codelineno-0-1251"></a><span class="w"> </span><span class="sd">"""Visit a primitive type."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting a field.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1208">1208</a></span> |
| <span class="normal"><a href="#__codelineno-0-1209">1209</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1208" name="__codelineno-0-1208"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1209" name="__codelineno-0-1209"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a field."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting an element within a ListType.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1214">1214</a></span> |
| <span class="normal"><a href="#__codelineno-0-1215">1215</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1214" name="__codelineno-0-1214"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1215" name="__codelineno-0-1215"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting an element within a ListType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting a key within a MapType.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1220">1220</a></span> |
| <span class="normal"><a href="#__codelineno-0-1221">1221</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1220" name="__codelineno-0-1220"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1221" name="__codelineno-0-1221"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a key within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting a value within a MapType.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1226">1226</a></span> |
| <span class="normal"><a href="#__codelineno-0-1227">1227</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1226" name="__codelineno-0-1226"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1227" name="__codelineno-0-1227"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a value within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting a field.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1205">1205</a></span> |
| <span class="normal"><a href="#__codelineno-0-1206">1206</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1205" name="__codelineno-0-1205"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1206" name="__codelineno-0-1206"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a field."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting an element within a ListType.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1211">1211</a></span> |
| <span class="normal"><a href="#__codelineno-0-1212">1212</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1211" name="__codelineno-0-1211"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1212" name="__codelineno-0-1212"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting an element within a ListType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting a key within a MapType.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1217">1217</a></span> |
| <span class="normal"><a href="#__codelineno-0-1218">1218</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1217" name="__codelineno-0-1217"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1218" name="__codelineno-0-1218"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a key within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting a value within a MapType.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1223">1223</a></span> |
| <span class="normal"><a href="#__codelineno-0-1224">1224</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1223" name="__codelineno-0-1223"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1224" name="__codelineno-0-1224"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a value within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">field</span><span class="p">(</span><span class="n">field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a field.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1237">1237</a></span> |
| <span class="normal"><a href="#__codelineno-0-1238">1238</a></span> |
| <span class="normal"><a href="#__codelineno-0-1239">1239</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1237" name="__codelineno-0-1237"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1238" name="__codelineno-0-1238"></a><span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1239" name="__codelineno-0-1239"></a><span class="w"> </span><span class="sd">"""Visit a field."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="nb">list</span><span class="p">(</span><span class="n">list_type</span><span class="p">,</span> <span class="n">element_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a list.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1241">1241</a></span> |
| <span class="normal"><a href="#__codelineno-0-1242">1242</a></span> |
| <span class="normal"><a href="#__codelineno-0-1243">1243</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1241" name="__codelineno-0-1241"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1242" name="__codelineno-0-1242"></a><span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1243" name="__codelineno-0-1243"></a><span class="w"> </span><span class="sd">"""Visit a list."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="nb">map</span><span class="p">(</span><span class="n">map_type</span><span class="p">,</span> <span class="n">key_result</span><span class="p">,</span> <span class="n">value_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a map.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1245">1245</a></span> |
| <span class="normal"><a href="#__codelineno-0-1246">1246</a></span> |
| <span class="normal"><a href="#__codelineno-0-1247">1247</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1245" name="__codelineno-0-1245"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1246" name="__codelineno-0-1246"></a><span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1247" name="__codelineno-0-1247"></a><span class="w"> </span><span class="sd">"""Visit a map."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">primitive</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a primitive type.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1249">1249</a></span> |
| <span class="normal"><a href="#__codelineno-0-1250">1250</a></span> |
| <span class="normal"><a href="#__codelineno-0-1251">1251</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1249" name="__codelineno-0-1249"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1250" name="__codelineno-0-1250"></a><span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1251" name="__codelineno-0-1251"></a><span class="w"> </span><span class="sd">"""Visit a primitive type."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a schema.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1229">1229</a></span> |
| <span class="normal"><a href="#__codelineno-0-1230">1230</a></span> |
| <span class="normal"><a href="#__codelineno-0-1231">1231</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1229" name="__codelineno-0-1229"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1230" name="__codelineno-0-1230"></a><span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1231" name="__codelineno-0-1231"></a><span class="w"> </span><span class="sd">"""Visit a schema."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">struct</span><span class="p">(</span><span class="n">struct</span><span class="p">,</span> <span class="n">field_results</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a struct.</p> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1233">1233</a></span> |
| <span class="normal"><a href="#__codelineno-0-1234">1234</a></span> |
| <span class="normal"><a href="#__codelineno-0-1235">1235</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1233" name="__codelineno-0-1233"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1234" name="__codelineno-0-1234"></a><span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1235" name="__codelineno-0-1235"></a><span class="w"> </span><span class="sd">"""Visit a struct."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="doc doc-heading"> |
| <code>UnsupportedPyArrowTypeException</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><span title="Exception">Exception</span></code></p> |
| |
| |
| <p>Cannot convert PyArrow type to corresponding Iceberg type.</p> |
| |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-241">241</a></span> |
| <span class="normal"><a href="#__codelineno-0-242">242</a></span> |
| <span class="normal"><a href="#__codelineno-0-243">243</a></span> |
| <span class="normal"><a href="#__codelineno-0-244">244</a></span> |
| <span class="normal"><a href="#__codelineno-0-245">245</a></span> |
| <span class="normal"><a href="#__codelineno-0-246">246</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="k">class</span><span class="w"> </span><span class="nc">UnsupportedPyArrowTypeException</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span> |
| <a id="__codelineno-0-242" name="__codelineno-0-242"></a><span class="w"> </span><span class="sd">"""Cannot convert PyArrow type to corresponding Iceberg type."""</span> |
| <a id="__codelineno-0-243" name="__codelineno-0-243"></a> |
| <a id="__codelineno-0-244" name="__codelineno-0-244"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <a id="__codelineno-0-245" name="__codelineno-0-245"></a> <span class="bp">self</span><span class="o">.</span><span class="n">field</span> <span class="o">=</span> <span class="n">field</span> |
| <a id="__codelineno-0-246" name="__codelineno-0-246"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.compute_statistics_plan" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">compute_statistics_plan</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Compute the statistics plan for all columns.</p> |
| <p>The resulting list is assumed to have the same length and same order as the columns in the pyarrow table. |
| This allows the list to map from the column index to the Iceberg column ID. |
| For each element, the desired metrics collection that was provided by the user in the configuration |
| is computed and then adjusted according to the data type of the column. For nested columns the minimum |
| and maximum values are not computed. And truncation is only applied to text of binary strings.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>table_properties</code> |
| </td> |
| <td> |
| <code>from pyiceberg.table.metadata.TableMetadata</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The Iceberg table metadata properties. |
| They are required to compute the mapping of column position to iceberg schema type id. It's also |
| used to set the mode for column metrics collection</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2260">2260</a></span> |
| <span class="normal"><a href="#__codelineno-0-2261">2261</a></span> |
| <span class="normal"><a href="#__codelineno-0-2262">2262</a></span> |
| <span class="normal"><a href="#__codelineno-0-2263">2263</a></span> |
| <span class="normal"><a href="#__codelineno-0-2264">2264</a></span> |
| <span class="normal"><a href="#__codelineno-0-2265">2265</a></span> |
| <span class="normal"><a href="#__codelineno-0-2266">2266</a></span> |
| <span class="normal"><a href="#__codelineno-0-2267">2267</a></span> |
| <span class="normal"><a href="#__codelineno-0-2268">2268</a></span> |
| <span class="normal"><a href="#__codelineno-0-2269">2269</a></span> |
| <span class="normal"><a href="#__codelineno-0-2270">2270</a></span> |
| <span class="normal"><a href="#__codelineno-0-2271">2271</a></span> |
| <span class="normal"><a href="#__codelineno-0-2272">2272</a></span> |
| <span class="normal"><a href="#__codelineno-0-2273">2273</a></span> |
| <span class="normal"><a href="#__codelineno-0-2274">2274</a></span> |
| <span class="normal"><a href="#__codelineno-0-2275">2275</a></span> |
| <span class="normal"><a href="#__codelineno-0-2276">2276</a></span> |
| <span class="normal"><a href="#__codelineno-0-2277">2277</a></span> |
| <span class="normal"><a href="#__codelineno-0-2278">2278</a></span> |
| <span class="normal"><a href="#__codelineno-0-2279">2279</a></span> |
| <span class="normal"><a href="#__codelineno-0-2280">2280</a></span> |
| <span class="normal"><a href="#__codelineno-0-2281">2281</a></span> |
| <span class="normal"><a href="#__codelineno-0-2282">2282</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2260" name="__codelineno-0-2260"></a><span class="k">def</span><span class="w"> </span><span class="nf">compute_statistics_plan</span><span class="p">(</span> |
| <a id="__codelineno-0-2261" name="__codelineno-0-2261"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> |
| <a id="__codelineno-0-2262" name="__codelineno-0-2262"></a> <span class="n">table_properties</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> |
| <a id="__codelineno-0-2263" name="__codelineno-0-2263"></a><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]:</span> |
| <a id="__codelineno-0-2264" name="__codelineno-0-2264"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-2265" name="__codelineno-0-2265"></a><span class="sd"> Compute the statistics plan for all columns.</span> |
| <a id="__codelineno-0-2266" name="__codelineno-0-2266"></a> |
| <a id="__codelineno-0-2267" name="__codelineno-0-2267"></a><span class="sd"> The resulting list is assumed to have the same length and same order as the columns in the pyarrow table.</span> |
| <a id="__codelineno-0-2268" name="__codelineno-0-2268"></a><span class="sd"> This allows the list to map from the column index to the Iceberg column ID.</span> |
| <a id="__codelineno-0-2269" name="__codelineno-0-2269"></a><span class="sd"> For each element, the desired metrics collection that was provided by the user in the configuration</span> |
| <a id="__codelineno-0-2270" name="__codelineno-0-2270"></a><span class="sd"> is computed and then adjusted according to the data type of the column. For nested columns the minimum</span> |
| <a id="__codelineno-0-2271" name="__codelineno-0-2271"></a><span class="sd"> and maximum values are not computed. And truncation is only applied to text of binary strings.</span> |
| <a id="__codelineno-0-2272" name="__codelineno-0-2272"></a> |
| <a id="__codelineno-0-2273" name="__codelineno-0-2273"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-2274" name="__codelineno-0-2274"></a><span class="sd"> table_properties (from pyiceberg.table.metadata.TableMetadata): The Iceberg table metadata properties.</span> |
| <a id="__codelineno-0-2275" name="__codelineno-0-2275"></a><span class="sd"> They are required to compute the mapping of column position to iceberg schema type id. It's also</span> |
| <a id="__codelineno-0-2276" name="__codelineno-0-2276"></a><span class="sd"> used to set the mode for column metrics collection</span> |
| <a id="__codelineno-0-2277" name="__codelineno-0-2277"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2278" name="__codelineno-0-2278"></a> <span class="n">stats_cols</span> <span class="o">=</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">PyArrowStatisticsCollector</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">))</span> |
| <a id="__codelineno-0-2279" name="__codelineno-0-2279"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2280" name="__codelineno-0-2280"></a> <span class="k">for</span> <span class="n">stats_col</span> <span class="ow">in</span> <span class="n">stats_cols</span><span class="p">:</span> |
| <a id="__codelineno-0-2281" name="__codelineno-0-2281"></a> <span class="n">result</span><span class="p">[</span><span class="n">stats_col</span><span class="o">.</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">stats_col</span> |
| <a id="__codelineno-0-2282" name="__codelineno-0-2282"></a> <span class="k">return</span> <span class="n">result</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">data_file_statistics_from_parquet_metadata</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="p">,</span> <span class="n">stats_columns</span><span class="p">,</span> <span class="n">parquet_column_mapping</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Compute and return DataFileStatistics that includes the following.</p> |
| <ul> |
| <li>record_count</li> |
| <li>column_sizes</li> |
| <li>value_counts</li> |
| <li>null_value_counts</li> |
| <li>nan_value_counts</li> |
| <li>column_aggregates</li> |
| <li>split_offsets</li> |
| </ul> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>parquet_metadata</code> |
| </td> |
| <td> |
| <code><span title="pyarrow.parquet.FileMetaData">FileMetaData</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A pyarrow metadata object.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code>stats_columns</code> |
| </td> |
| <td> |
| <code><span title="typing.Dict">Dict</span>[<span title="int">int</span>, <span title="pyiceberg.io.pyarrow.StatisticsCollector">StatisticsCollector</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The statistics gathering plan. It is required to |
| set the mode for column metrics collection</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code>parquet_column_mapping</code> |
| </td> |
| <td> |
| <code><span title="typing.Dict">Dict</span>[<span title="str">str</span>, <span title="int">int</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The mapping of the parquet file name to the field ID</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2428">2428</a></span> |
| <span class="normal"><a href="#__codelineno-0-2429">2429</a></span> |
| <span class="normal"><a href="#__codelineno-0-2430">2430</a></span> |
| <span class="normal"><a href="#__codelineno-0-2431">2431</a></span> |
| <span class="normal"><a href="#__codelineno-0-2432">2432</a></span> |
| <span class="normal"><a href="#__codelineno-0-2433">2433</a></span> |
| <span class="normal"><a href="#__codelineno-0-2434">2434</a></span> |
| <span class="normal"><a href="#__codelineno-0-2435">2435</a></span> |
| <span class="normal"><a href="#__codelineno-0-2436">2436</a></span> |
| <span class="normal"><a href="#__codelineno-0-2437">2437</a></span> |
| <span class="normal"><a href="#__codelineno-0-2438">2438</a></span> |
| <span class="normal"><a href="#__codelineno-0-2439">2439</a></span> |
| <span class="normal"><a href="#__codelineno-0-2440">2440</a></span> |
| <span class="normal"><a href="#__codelineno-0-2441">2441</a></span> |
| <span class="normal"><a href="#__codelineno-0-2442">2442</a></span> |
| <span class="normal"><a href="#__codelineno-0-2443">2443</a></span> |
| <span class="normal"><a href="#__codelineno-0-2444">2444</a></span> |
| <span class="normal"><a href="#__codelineno-0-2445">2445</a></span> |
| <span class="normal"><a href="#__codelineno-0-2446">2446</a></span> |
| <span class="normal"><a href="#__codelineno-0-2447">2447</a></span> |
| <span class="normal"><a href="#__codelineno-0-2448">2448</a></span> |
| <span class="normal"><a href="#__codelineno-0-2449">2449</a></span> |
| <span class="normal"><a href="#__codelineno-0-2450">2450</a></span> |
| <span class="normal"><a href="#__codelineno-0-2451">2451</a></span> |
| <span class="normal"><a href="#__codelineno-0-2452">2452</a></span> |
| <span class="normal"><a href="#__codelineno-0-2453">2453</a></span> |
| <span class="normal"><a href="#__codelineno-0-2454">2454</a></span> |
| <span class="normal"><a href="#__codelineno-0-2455">2455</a></span> |
| <span class="normal"><a href="#__codelineno-0-2456">2456</a></span> |
| <span class="normal"><a href="#__codelineno-0-2457">2457</a></span> |
| <span class="normal"><a href="#__codelineno-0-2458">2458</a></span> |
| <span class="normal"><a href="#__codelineno-0-2459">2459</a></span> |
| <span class="normal"><a href="#__codelineno-0-2460">2460</a></span> |
| <span class="normal"><a href="#__codelineno-0-2461">2461</a></span> |
| <span class="normal"><a href="#__codelineno-0-2462">2462</a></span> |
| <span class="normal"><a href="#__codelineno-0-2463">2463</a></span> |
| <span class="normal"><a href="#__codelineno-0-2464">2464</a></span> |
| <span class="normal"><a href="#__codelineno-0-2465">2465</a></span> |
| <span class="normal"><a href="#__codelineno-0-2466">2466</a></span> |
| <span class="normal"><a href="#__codelineno-0-2467">2467</a></span> |
| <span class="normal"><a href="#__codelineno-0-2468">2468</a></span> |
| <span class="normal"><a href="#__codelineno-0-2469">2469</a></span> |
| <span class="normal"><a href="#__codelineno-0-2470">2470</a></span> |
| <span class="normal"><a href="#__codelineno-0-2471">2471</a></span> |
| <span class="normal"><a href="#__codelineno-0-2472">2472</a></span> |
| <span class="normal"><a href="#__codelineno-0-2473">2473</a></span> |
| <span class="normal"><a href="#__codelineno-0-2474">2474</a></span> |
| <span class="normal"><a href="#__codelineno-0-2475">2475</a></span> |
| <span class="normal"><a href="#__codelineno-0-2476">2476</a></span> |
| <span class="normal"><a href="#__codelineno-0-2477">2477</a></span> |
| <span class="normal"><a href="#__codelineno-0-2478">2478</a></span> |
| <span class="normal"><a href="#__codelineno-0-2479">2479</a></span> |
| <span class="normal"><a href="#__codelineno-0-2480">2480</a></span> |
| <span class="normal"><a href="#__codelineno-0-2481">2481</a></span> |
| <span class="normal"><a href="#__codelineno-0-2482">2482</a></span> |
| <span class="normal"><a href="#__codelineno-0-2483">2483</a></span> |
| <span class="normal"><a href="#__codelineno-0-2484">2484</a></span> |
| <span class="normal"><a href="#__codelineno-0-2485">2485</a></span> |
| <span class="normal"><a href="#__codelineno-0-2486">2486</a></span> |
| <span class="normal"><a href="#__codelineno-0-2487">2487</a></span> |
| <span class="normal"><a href="#__codelineno-0-2488">2488</a></span> |
| <span class="normal"><a href="#__codelineno-0-2489">2489</a></span> |
| <span class="normal"><a href="#__codelineno-0-2490">2490</a></span> |
| <span class="normal"><a href="#__codelineno-0-2491">2491</a></span> |
| <span class="normal"><a href="#__codelineno-0-2492">2492</a></span> |
| <span class="normal"><a href="#__codelineno-0-2493">2493</a></span> |
| <span class="normal"><a href="#__codelineno-0-2494">2494</a></span> |
| <span class="normal"><a href="#__codelineno-0-2495">2495</a></span> |
| <span class="normal"><a href="#__codelineno-0-2496">2496</a></span> |
| <span class="normal"><a href="#__codelineno-0-2497">2497</a></span> |
| <span class="normal"><a href="#__codelineno-0-2498">2498</a></span> |
| <span class="normal"><a href="#__codelineno-0-2499">2499</a></span> |
| <span class="normal"><a href="#__codelineno-0-2500">2500</a></span> |
| <span class="normal"><a href="#__codelineno-0-2501">2501</a></span> |
| <span class="normal"><a href="#__codelineno-0-2502">2502</a></span> |
| <span class="normal"><a href="#__codelineno-0-2503">2503</a></span> |
| <span class="normal"><a href="#__codelineno-0-2504">2504</a></span> |
| <span class="normal"><a href="#__codelineno-0-2505">2505</a></span> |
| <span class="normal"><a href="#__codelineno-0-2506">2506</a></span> |
| <span class="normal"><a href="#__codelineno-0-2507">2507</a></span> |
| <span class="normal"><a href="#__codelineno-0-2508">2508</a></span> |
| <span class="normal"><a href="#__codelineno-0-2509">2509</a></span> |
| <span class="normal"><a href="#__codelineno-0-2510">2510</a></span> |
| <span class="normal"><a href="#__codelineno-0-2511">2511</a></span> |
| <span class="normal"><a href="#__codelineno-0-2512">2512</a></span> |
| <span class="normal"><a href="#__codelineno-0-2513">2513</a></span> |
| <span class="normal"><a href="#__codelineno-0-2514">2514</a></span> |
| <span class="normal"><a href="#__codelineno-0-2515">2515</a></span> |
| <span class="normal"><a href="#__codelineno-0-2516">2516</a></span> |
| <span class="normal"><a href="#__codelineno-0-2517">2517</a></span> |
| <span class="normal"><a href="#__codelineno-0-2518">2518</a></span> |
| <span class="normal"><a href="#__codelineno-0-2519">2519</a></span> |
| <span class="normal"><a href="#__codelineno-0-2520">2520</a></span> |
| <span class="normal"><a href="#__codelineno-0-2521">2521</a></span> |
| <span class="normal"><a href="#__codelineno-0-2522">2522</a></span> |
| <span class="normal"><a href="#__codelineno-0-2523">2523</a></span> |
| <span class="normal"><a href="#__codelineno-0-2524">2524</a></span> |
| <span class="normal"><a href="#__codelineno-0-2525">2525</a></span> |
| <span class="normal"><a href="#__codelineno-0-2526">2526</a></span> |
| <span class="normal"><a href="#__codelineno-0-2527">2527</a></span> |
| <span class="normal"><a href="#__codelineno-0-2528">2528</a></span> |
| <span class="normal"><a href="#__codelineno-0-2529">2529</a></span> |
| <span class="normal"><a href="#__codelineno-0-2530">2530</a></span> |
| <span class="normal"><a href="#__codelineno-0-2531">2531</a></span> |
| <span class="normal"><a href="#__codelineno-0-2532">2532</a></span> |
| <span class="normal"><a href="#__codelineno-0-2533">2533</a></span> |
| <span class="normal"><a href="#__codelineno-0-2534">2534</a></span> |
| <span class="normal"><a href="#__codelineno-0-2535">2535</a></span> |
| <span class="normal"><a href="#__codelineno-0-2536">2536</a></span> |
| <span class="normal"><a href="#__codelineno-0-2537">2537</a></span> |
| <span class="normal"><a href="#__codelineno-0-2538">2538</a></span> |
| <span class="normal"><a href="#__codelineno-0-2539">2539</a></span> |
| <span class="normal"><a href="#__codelineno-0-2540">2540</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2428" name="__codelineno-0-2428"></a><span class="k">def</span><span class="w"> </span><span class="nf">data_file_statistics_from_parquet_metadata</span><span class="p">(</span> |
| <a id="__codelineno-0-2429" name="__codelineno-0-2429"></a> <span class="n">parquet_metadata</span><span class="p">:</span> <span class="n">pq</span><span class="o">.</span><span class="n">FileMetaData</span><span class="p">,</span> |
| <a id="__codelineno-0-2430" name="__codelineno-0-2430"></a> <span class="n">stats_columns</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">],</span> |
| <a id="__codelineno-0-2431" name="__codelineno-0-2431"></a> <span class="n">parquet_column_mapping</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> |
| <a id="__codelineno-0-2432" name="__codelineno-0-2432"></a><span class="p">)</span> <span class="o">-></span> <span class="n">DataFileStatistics</span><span class="p">:</span> |
| <a id="__codelineno-0-2433" name="__codelineno-0-2433"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-2434" name="__codelineno-0-2434"></a><span class="sd"> Compute and return DataFileStatistics that includes the following.</span> |
| <a id="__codelineno-0-2435" name="__codelineno-0-2435"></a> |
| <a id="__codelineno-0-2436" name="__codelineno-0-2436"></a><span class="sd"> - record_count</span> |
| <a id="__codelineno-0-2437" name="__codelineno-0-2437"></a><span class="sd"> - column_sizes</span> |
| <a id="__codelineno-0-2438" name="__codelineno-0-2438"></a><span class="sd"> - value_counts</span> |
| <a id="__codelineno-0-2439" name="__codelineno-0-2439"></a><span class="sd"> - null_value_counts</span> |
| <a id="__codelineno-0-2440" name="__codelineno-0-2440"></a><span class="sd"> - nan_value_counts</span> |
| <a id="__codelineno-0-2441" name="__codelineno-0-2441"></a><span class="sd"> - column_aggregates</span> |
| <a id="__codelineno-0-2442" name="__codelineno-0-2442"></a><span class="sd"> - split_offsets</span> |
| <a id="__codelineno-0-2443" name="__codelineno-0-2443"></a> |
| <a id="__codelineno-0-2444" name="__codelineno-0-2444"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-2445" name="__codelineno-0-2445"></a><span class="sd"> parquet_metadata (pyarrow.parquet.FileMetaData): A pyarrow metadata object.</span> |
| <a id="__codelineno-0-2446" name="__codelineno-0-2446"></a><span class="sd"> stats_columns (Dict[int, StatisticsCollector]): The statistics gathering plan. It is required to</span> |
| <a id="__codelineno-0-2447" name="__codelineno-0-2447"></a><span class="sd"> set the mode for column metrics collection</span> |
| <a id="__codelineno-0-2448" name="__codelineno-0-2448"></a><span class="sd"> parquet_column_mapping (Dict[str, int]): The mapping of the parquet file name to the field ID</span> |
| <a id="__codelineno-0-2449" name="__codelineno-0-2449"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2450" name="__codelineno-0-2450"></a> <span class="n">column_sizes</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2451" name="__codelineno-0-2451"></a> <span class="n">value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2452" name="__codelineno-0-2452"></a> <span class="n">split_offsets</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> |
| <a id="__codelineno-0-2453" name="__codelineno-0-2453"></a> |
| <a id="__codelineno-0-2454" name="__codelineno-0-2454"></a> <span class="n">null_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2455" name="__codelineno-0-2455"></a> <span class="n">nan_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2456" name="__codelineno-0-2456"></a> |
| <a id="__codelineno-0-2457" name="__codelineno-0-2457"></a> <span class="n">col_aggs</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2458" name="__codelineno-0-2458"></a> |
| <a id="__codelineno-0-2459" name="__codelineno-0-2459"></a> <span class="n">invalidate_col</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> |
| <a id="__codelineno-0-2460" name="__codelineno-0-2460"></a> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_row_groups</span><span class="p">):</span> |
| <a id="__codelineno-0-2461" name="__codelineno-0-2461"></a> <span class="c1"># References:</span> |
| <a id="__codelineno-0-2462" name="__codelineno-0-2462"></a> <span class="c1"># https://github.com/apache/iceberg/blob/fc381a81a1fdb8f51a0637ca27cd30673bd7aad3/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java#L232</span> |
| <a id="__codelineno-0-2463" name="__codelineno-0-2463"></a> <span class="c1"># https://github.com/apache/parquet-mr/blob/ac29db4611f86a07cc6877b416aa4b183e09b353/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java#L184</span> |
| <a id="__codelineno-0-2464" name="__codelineno-0-2464"></a> |
| <a id="__codelineno-0-2465" name="__codelineno-0-2465"></a> <span class="n">row_group</span> <span class="o">=</span> <span class="n">parquet_metadata</span><span class="o">.</span><span class="n">row_group</span><span class="p">(</span><span class="n">r</span><span class="p">)</span> |
| <a id="__codelineno-0-2466" name="__codelineno-0-2466"></a> |
| <a id="__codelineno-0-2467" name="__codelineno-0-2467"></a> <span class="n">data_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">data_page_offset</span> |
| <a id="__codelineno-0-2468" name="__codelineno-0-2468"></a> <span class="n">dictionary_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">dictionary_page_offset</span> |
| <a id="__codelineno-0-2469" name="__codelineno-0-2469"></a> |
| <a id="__codelineno-0-2470" name="__codelineno-0-2470"></a> <span class="k">if</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">has_dictionary_page</span> <span class="ow">and</span> <span class="n">dictionary_offset</span> <span class="o"><</span> <span class="n">data_offset</span><span class="p">:</span> |
| <a id="__codelineno-0-2471" name="__codelineno-0-2471"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dictionary_offset</span><span class="p">)</span> |
| <a id="__codelineno-0-2472" name="__codelineno-0-2472"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-2473" name="__codelineno-0-2473"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">data_offset</span><span class="p">)</span> |
| <a id="__codelineno-0-2474" name="__codelineno-0-2474"></a> |
| <a id="__codelineno-0-2475" name="__codelineno-0-2475"></a> <span class="k">for</span> <span class="n">pos</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_columns</span><span class="p">):</span> |
| <a id="__codelineno-0-2476" name="__codelineno-0-2476"></a> <span class="n">column</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="n">pos</span><span class="p">)</span> |
| <a id="__codelineno-0-2477" name="__codelineno-0-2477"></a> <span class="n">field_id</span> <span class="o">=</span> <span class="n">parquet_column_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">path_in_schema</span><span class="p">]</span> |
| <a id="__codelineno-0-2478" name="__codelineno-0-2478"></a> |
| <a id="__codelineno-0-2479" name="__codelineno-0-2479"></a> <span class="n">stats_col</span> <span class="o">=</span> <span class="n">stats_columns</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> |
| <a id="__codelineno-0-2480" name="__codelineno-0-2480"></a> |
| <a id="__codelineno-0-2481" name="__codelineno-0-2481"></a> <span class="n">column_sizes</span><span class="o">.</span><span class="n">setdefault</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> |
| <a id="__codelineno-0-2482" name="__codelineno-0-2482"></a> <span class="n">column_sizes</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">+=</span> <span class="n">column</span><span class="o">.</span><span class="n">total_compressed_size</span> |
| <a id="__codelineno-0-2483" name="__codelineno-0-2483"></a> |
| <a id="__codelineno-0-2484" name="__codelineno-0-2484"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">NONE</span><span class="p">):</span> |
| <a id="__codelineno-0-2485" name="__codelineno-0-2485"></a> <span class="k">continue</span> |
| <a id="__codelineno-0-2486" name="__codelineno-0-2486"></a> |
| <a id="__codelineno-0-2487" name="__codelineno-0-2487"></a> <span class="n">value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">column</span><span class="o">.</span><span class="n">num_values</span> |
| <a id="__codelineno-0-2488" name="__codelineno-0-2488"></a> |
| <a id="__codelineno-0-2489" name="__codelineno-0-2489"></a> <span class="k">if</span> <span class="n">column</span><span class="o">.</span><span class="n">is_stats_set</span><span class="p">:</span> |
| <a id="__codelineno-0-2490" name="__codelineno-0-2490"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-2491" name="__codelineno-0-2491"></a> <span class="n">statistics</span> <span class="o">=</span> <span class="n">column</span><span class="o">.</span><span class="n">statistics</span> |
| <a id="__codelineno-0-2492" name="__codelineno-0-2492"></a> |
| <a id="__codelineno-0-2493" name="__codelineno-0-2493"></a> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">has_null_count</span><span class="p">:</span> |
| <a id="__codelineno-0-2494" name="__codelineno-0-2494"></a> <span class="n">null_value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">statistics</span><span class="o">.</span><span class="n">null_count</span> |
| <a id="__codelineno-0-2495" name="__codelineno-0-2495"></a> |
| <a id="__codelineno-0-2496" name="__codelineno-0-2496"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">COUNTS</span><span class="p">):</span> |
| <a id="__codelineno-0-2497" name="__codelineno-0-2497"></a> <span class="k">continue</span> |
| <a id="__codelineno-0-2498" name="__codelineno-0-2498"></a> |
| <a id="__codelineno-0-2499" name="__codelineno-0-2499"></a> <span class="k">if</span> <span class="n">field_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">col_aggs</span><span class="p">:</span> |
| <a id="__codelineno-0-2500" name="__codelineno-0-2500"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-2501" name="__codelineno-0-2501"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">StatsAggregator</span><span class="p">(</span> |
| <a id="__codelineno-0-2502" name="__codelineno-0-2502"></a> <span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="p">,</span> <span class="n">statistics</span><span class="o">.</span><span class="n">physical_type</span><span class="p">,</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span><span class="o">.</span><span class="n">length</span> |
| <a id="__codelineno-0-2503" name="__codelineno-0-2503"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2504" name="__codelineno-0-2504"></a> <span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-2505" name="__codelineno-0-2505"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2"> for column '</span><span class="si">{</span><span class="n">stats_col</span><span class="o">.</span><span class="n">column_name</span><span class="si">}</span><span class="s2">'"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-2506" name="__codelineno-0-2506"></a> |
| <a id="__codelineno-0-2507" name="__codelineno-0-2507"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="p">,</span> <span class="n">DecimalType</span><span class="p">)</span> <span class="ow">and</span> <span class="n">statistics</span><span class="o">.</span><span class="n">physical_type</span> <span class="o">!=</span> <span class="s2">"FIXED_LEN_BYTE_ARRAY"</span><span class="p">:</span> |
| <a id="__codelineno-0-2508" name="__codelineno-0-2508"></a> <span class="n">scale</span> <span class="o">=</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="o">.</span><span class="n">scale</span> |
| <a id="__codelineno-0-2509" name="__codelineno-0-2509"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_min</span><span class="p">(</span> |
| <a id="__codelineno-0-2510" name="__codelineno-0-2510"></a> <span class="n">unscaled_to_decimal</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">min_raw</span><span class="p">,</span> <span class="n">scale</span><span class="p">)</span> |
| <a id="__codelineno-0-2511" name="__codelineno-0-2511"></a> <span class="p">)</span> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">min_raw</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span> |
| <a id="__codelineno-0-2512" name="__codelineno-0-2512"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_max</span><span class="p">(</span> |
| <a id="__codelineno-0-2513" name="__codelineno-0-2513"></a> <span class="n">unscaled_to_decimal</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">max_raw</span><span class="p">,</span> <span class="n">scale</span><span class="p">)</span> |
| <a id="__codelineno-0-2514" name="__codelineno-0-2514"></a> <span class="p">)</span> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">max_raw</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span> |
| <a id="__codelineno-0-2515" name="__codelineno-0-2515"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-2516" name="__codelineno-0-2516"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_min</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">min</span><span class="p">)</span> |
| <a id="__codelineno-0-2517" name="__codelineno-0-2517"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_max</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">max</span><span class="p">)</span> |
| <a id="__codelineno-0-2518" name="__codelineno-0-2518"></a> |
| <a id="__codelineno-0-2519" name="__codelineno-0-2519"></a> <span class="k">except</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">lib</span><span class="o">.</span><span class="n">ArrowNotImplementedError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-2520" name="__codelineno-0-2520"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span> |
| <a id="__codelineno-0-2521" name="__codelineno-0-2521"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> |
| <a id="__codelineno-0-2522" name="__codelineno-0-2522"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-2523" name="__codelineno-0-2523"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span> |
| <a id="__codelineno-0-2524" name="__codelineno-0-2524"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"PyArrow statistics missing for column </span><span class="si">%d</span><span class="s2"> when writing file"</span><span class="p">,</span> <span class="n">pos</span><span class="p">)</span> |
| <a id="__codelineno-0-2525" name="__codelineno-0-2525"></a> |
| <a id="__codelineno-0-2526" name="__codelineno-0-2526"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span> |
| <a id="__codelineno-0-2527" name="__codelineno-0-2527"></a> |
| <a id="__codelineno-0-2528" name="__codelineno-0-2528"></a> <span class="k">for</span> <span class="n">field_id</span> <span class="ow">in</span> <span class="n">invalidate_col</span><span class="p">:</span> |
| <a id="__codelineno-0-2529" name="__codelineno-0-2529"></a> <span class="n">col_aggs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <a id="__codelineno-0-2530" name="__codelineno-0-2530"></a> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <a id="__codelineno-0-2531" name="__codelineno-0-2531"></a> |
| <a id="__codelineno-0-2532" name="__codelineno-0-2532"></a> <span class="k">return</span> <span class="n">DataFileStatistics</span><span class="p">(</span> |
| <a id="__codelineno-0-2533" name="__codelineno-0-2533"></a> <span class="n">record_count</span><span class="o">=</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_rows</span><span class="p">,</span> |
| <a id="__codelineno-0-2534" name="__codelineno-0-2534"></a> <span class="n">column_sizes</span><span class="o">=</span><span class="n">column_sizes</span><span class="p">,</span> |
| <a id="__codelineno-0-2535" name="__codelineno-0-2535"></a> <span class="n">value_counts</span><span class="o">=</span><span class="n">value_counts</span><span class="p">,</span> |
| <a id="__codelineno-0-2536" name="__codelineno-0-2536"></a> <span class="n">null_value_counts</span><span class="o">=</span><span class="n">null_value_counts</span><span class="p">,</span> |
| <a id="__codelineno-0-2537" name="__codelineno-0-2537"></a> <span class="n">nan_value_counts</span><span class="o">=</span><span class="n">nan_value_counts</span><span class="p">,</span> |
| <a id="__codelineno-0-2538" name="__codelineno-0-2538"></a> <span class="n">column_aggregates</span><span class="o">=</span><span class="n">col_aggs</span><span class="p">,</span> |
| <a id="__codelineno-0-2539" name="__codelineno-0-2539"></a> <span class="n">split_offsets</span><span class="o">=</span><span class="n">split_offsets</span><span class="p">,</span> |
| <a id="__codelineno-0-2540" name="__codelineno-0-2540"></a> <span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">parquet_path_to_id_mapping</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Compute the mapping of parquet column path to Iceberg ID.</p> |
| <p>For each column, the parquet file metadata has a path_in_schema attribute that follows |
| a specific naming scheme for nested columns. This function computes a mapping of |
| the full paths to the corresponding Iceberg IDs.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>schema</code> |
| </td> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="Schema (pyiceberg.schema.Schema)" href="../../schema/#pyiceberg.schema.Schema">Schema</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The current table schema.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2338">2338</a></span> |
| <span class="normal"><a href="#__codelineno-0-2339">2339</a></span> |
| <span class="normal"><a href="#__codelineno-0-2340">2340</a></span> |
| <span class="normal"><a href="#__codelineno-0-2341">2341</a></span> |
| <span class="normal"><a href="#__codelineno-0-2342">2342</a></span> |
| <span class="normal"><a href="#__codelineno-0-2343">2343</a></span> |
| <span class="normal"><a href="#__codelineno-0-2344">2344</a></span> |
| <span class="normal"><a href="#__codelineno-0-2345">2345</a></span> |
| <span class="normal"><a href="#__codelineno-0-2346">2346</a></span> |
| <span class="normal"><a href="#__codelineno-0-2347">2347</a></span> |
| <span class="normal"><a href="#__codelineno-0-2348">2348</a></span> |
| <span class="normal"><a href="#__codelineno-0-2349">2349</a></span> |
| <span class="normal"><a href="#__codelineno-0-2350">2350</a></span> |
| <span class="normal"><a href="#__codelineno-0-2351">2351</a></span> |
| <span class="normal"><a href="#__codelineno-0-2352">2352</a></span> |
| <span class="normal"><a href="#__codelineno-0-2353">2353</a></span> |
| <span class="normal"><a href="#__codelineno-0-2354">2354</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2338" name="__codelineno-0-2338"></a><span class="k">def</span><span class="w"> </span><span class="nf">parquet_path_to_id_mapping</span><span class="p">(</span> |
| <a id="__codelineno-0-2339" name="__codelineno-0-2339"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> |
| <a id="__codelineno-0-2340" name="__codelineno-0-2340"></a><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span> |
| <a id="__codelineno-0-2341" name="__codelineno-0-2341"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-2342" name="__codelineno-0-2342"></a><span class="sd"> Compute the mapping of parquet column path to Iceberg ID.</span> |
| <a id="__codelineno-0-2343" name="__codelineno-0-2343"></a> |
| <a id="__codelineno-0-2344" name="__codelineno-0-2344"></a><span class="sd"> For each column, the parquet file metadata has a path_in_schema attribute that follows</span> |
| <a id="__codelineno-0-2345" name="__codelineno-0-2345"></a><span class="sd"> a specific naming scheme for nested columns. This function computes a mapping of</span> |
| <a id="__codelineno-0-2346" name="__codelineno-0-2346"></a><span class="sd"> the full paths to the corresponding Iceberg IDs.</span> |
| <a id="__codelineno-0-2347" name="__codelineno-0-2347"></a> |
| <a id="__codelineno-0-2348" name="__codelineno-0-2348"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-2349" name="__codelineno-0-2349"></a><span class="sd"> schema (pyiceberg.schema.Schema): The current table schema.</span> |
| <a id="__codelineno-0-2350" name="__codelineno-0-2350"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2351" name="__codelineno-0-2351"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2352" name="__codelineno-0-2352"></a> <span class="k">for</span> <span class="n">pair</span> <span class="ow">in</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">ID2ParquetPathVisitor</span><span class="p">()):</span> |
| <a id="__codelineno-0-2353" name="__codelineno-0-2353"></a> <span class="n">result</span><span class="p">[</span><span class="n">pair</span><span class="o">.</span><span class="n">parquet_path</span><span class="p">]</span> <span class="o">=</span> <span class="n">pair</span><span class="o">.</span><span class="n">field_id</span> |
| <a id="__codelineno-0-2354" name="__codelineno-0-2354"></a> <span class="k">return</span> <span class="n">result</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.visit_pyarrow" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">visitor</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Apply a pyarrow schema visitor to any point within a schema.</p> |
| <p>The function traverses the schema in post-order fashion.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>obj</code> |
| </td> |
| <td> |
| <code><span title="typing.Union">Union</span>[<span title="pyarrow.DataType">DataType</span>, <span title="pyarrow.Schema">Schema</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>An instance of a Schema or an IcebergType.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code>visitor</code> |
| </td> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="PyArrowSchemaVisitor (pyiceberg.io.pyarrow.PyArrowSchemaVisitor)" href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor">PyArrowSchemaVisitor</a>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>An instance of an implementation of the generic PyarrowSchemaVisitor base class.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="NotImplementedError">NotImplementedError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If attempting to visit an unrecognized object type.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1122">1122</a></span> |
| <span class="normal"><a href="#__codelineno-0-1123">1123</a></span> |
| <span class="normal"><a href="#__codelineno-0-1124">1124</a></span> |
| <span class="normal"><a href="#__codelineno-0-1125">1125</a></span> |
| <span class="normal"><a href="#__codelineno-0-1126">1126</a></span> |
| <span class="normal"><a href="#__codelineno-0-1127">1127</a></span> |
| <span class="normal"><a href="#__codelineno-0-1128">1128</a></span> |
| <span class="normal"><a href="#__codelineno-0-1129">1129</a></span> |
| <span class="normal"><a href="#__codelineno-0-1130">1130</a></span> |
| <span class="normal"><a href="#__codelineno-0-1131">1131</a></span> |
| <span class="normal"><a href="#__codelineno-0-1132">1132</a></span> |
| <span class="normal"><a href="#__codelineno-0-1133">1133</a></span> |
| <span class="normal"><a href="#__codelineno-0-1134">1134</a></span> |
| <span class="normal"><a href="#__codelineno-0-1135">1135</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1122" name="__codelineno-0-1122"></a><span class="nd">@singledispatch</span> |
| <a id="__codelineno-0-1123" name="__codelineno-0-1123"></a><span class="k">def</span><span class="w"> </span><span class="nf">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">],</span> <span class="n">visitor</span><span class="p">:</span> <span class="n">PyArrowSchemaVisitor</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1124" name="__codelineno-0-1124"></a><span class="w"> </span><span class="sd">"""Apply a pyarrow schema visitor to any point within a schema.</span> |
| <a id="__codelineno-0-1125" name="__codelineno-0-1125"></a> |
| <a id="__codelineno-0-1126" name="__codelineno-0-1126"></a><span class="sd"> The function traverses the schema in post-order fashion.</span> |
| <a id="__codelineno-0-1127" name="__codelineno-0-1127"></a> |
| <a id="__codelineno-0-1128" name="__codelineno-0-1128"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1129" name="__codelineno-0-1129"></a><span class="sd"> obj (Union[pa.DataType, pa.Schema]): An instance of a Schema or an IcebergType.</span> |
| <a id="__codelineno-0-1130" name="__codelineno-0-1130"></a><span class="sd"> visitor (PyArrowSchemaVisitor[T]): An instance of an implementation of the generic PyarrowSchemaVisitor base class.</span> |
| <a id="__codelineno-0-1131" name="__codelineno-0-1131"></a> |
| <a id="__codelineno-0-1132" name="__codelineno-0-1132"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1133" name="__codelineno-0-1133"></a><span class="sd"> NotImplementedError: If attempting to visit an unrecognized object type.</span> |
| <a id="__codelineno-0-1134" name="__codelineno-0-1134"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1135" name="__codelineno-0-1135"></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot visit non-type: </span><span class="si">{</span><span class="n">obj</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </article> |
| </div> |
| |
| |
| <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script> |
| </div> |
| |
| <button type="button" class="md-top md-icon" data-md-component="top" hidden> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg> |
| Back to top |
| </button> |
| |
| </main> |
| |
| <footer class="md-footer"> |
| |
| <div class="md-footer-meta md-typeset"> |
| <div class="md-footer-meta__inner md-grid"> |
| <div class="md-copyright"> |
| |
| |
| Made with |
| <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener"> |
| Material for MkDocs |
| </a> |
| |
| </div> |
| |
| </div> |
| </div> |
| </footer> |
| |
| </div> |
| <div class="md-dialog" data-md-component="dialog"> |
| <div class="md-dialog__inner md-typeset"></div> |
| </div> |
| |
| |
| |
| |
| <script id="__config" type="application/json">{"base": "../../../..", "features": ["navigation.top", "navigation.tracking", "navigation.tabs", "navigation.tabs.sticky", "content.code.copy"], "search": "../../../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script> |
| |
| |
| <script src="../../../../assets/javascripts/bundle.f55a23d4.min.js"></script> |
| |
| |
| </body> |
| </html> |