| |
| <!doctype html> |
| <html lang="en" class="no-js"> |
| <head> |
| |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| |
| |
| |
| <link rel="canonical" href="https://py.iceberg.apache.org/reference/pyiceberg/io/pyarrow/"> |
| |
| |
| <link rel="prev" href="../fsspec/"> |
| |
| |
| <link rel="next" href="../../manifest/"> |
| |
| |
| <link rel="icon" href="../../../../assets/images/iceberg-logo-icon.png"> |
| <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.5"> |
| |
| |
| |
| <title>pyarrow - PyIceberg</title> |
| |
| |
| |
| <link rel="stylesheet" href="../../../../assets/stylesheets/main.8608ea7d.min.css"> |
| |
| |
| <link rel="stylesheet" href="../../../../assets/stylesheets/palette.06af60db.min.css"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Lato:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback"> |
| <style>:root{--md-text-font:"Lato";--md-code-font:"Roboto Mono"}</style> |
| |
| |
| |
| <link rel="stylesheet" href="../../../../assets/_mkdocstrings.css"> |
| |
| <script>__md_scope=new URL("../../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script> |
| |
| |
| |
| |
| |
| |
| </head> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo"> |
| |
| |
| <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off"> |
| <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off"> |
| <label class="md-overlay" for="__drawer"></label> |
| <div data-md-component="skip"> |
| |
| |
| <a href="#pyiceberg.io.pyarrow" class="md-skip"> |
| Skip to content |
| </a> |
| |
| </div> |
| <div data-md-component="announce"> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <header class="md-header md-header--shadow md-header--lifted" data-md-component="header"> |
| <nav class="md-header__inner md-grid" aria-label="Header"> |
| <a href="../../../.." title="PyIceberg" class="md-header__button md-logo" aria-label="PyIceberg" data-md-component="logo"> |
| |
| <img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo"> |
| |
| </a> |
| <label class="md-header__button md-icon" for="__drawer"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg> |
| </label> |
| <div class="md-header__title" data-md-component="header-title"> |
| <div class="md-header__ellipsis"> |
| <div class="md-header__topic"> |
| <span class="md-ellipsis"> |
| PyIceberg |
| </span> |
| </div> |
| <div class="md-header__topic" data-md-component="header-topic"> |
| <span class="md-ellipsis"> |
| |
| pyarrow |
| |
| </span> |
| </div> |
| </div> |
| </div> |
| |
| |
| <form class="md-header__option" data-md-component="palette"> |
| |
| |
| |
| |
| <input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0"> |
| |
| <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg> |
| </label> |
| |
| |
| |
| |
| |
| <input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1"> |
| |
| <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg> |
| </label> |
| |
| |
| </form> |
| |
| |
| |
| <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script> |
| |
| |
| |
| <label class="md-header__button md-icon" for="__search"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg> |
| </label> |
| <div class="md-search" data-md-component="search" role="dialog"> |
| <label class="md-search__overlay" for="__search"></label> |
| <div class="md-search__inner" role="search"> |
| <form class="md-search__form" name="search"> |
| <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required> |
| <label class="md-search__icon md-icon" for="__search"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg> |
| </label> |
| <nav class="md-search__options" aria-label="Search"> |
| |
| <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg> |
| </button> |
| </nav> |
| |
| </form> |
| <div class="md-search__output"> |
| <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix> |
| <div class="md-search-result" data-md-component="search-result"> |
| <div class="md-search-result__meta"> |
| Initializing search |
| </div> |
| <ol class="md-search-result__list" role="presentation"></ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| |
| |
| <div class="md-header__source"> |
| <a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source"> |
| <div class="md-source__icon md-icon"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> |
| </div> |
| <div class="md-source__repository"> |
| apache/iceberg-python |
| </div> |
| </a> |
| </div> |
| |
| </nav> |
| |
| |
| |
| <nav class="md-tabs" aria-label="Tabs" data-md-component="tabs"> |
| <div class="md-grid"> |
| <ul class="md-tabs__list"> |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../.." class="md-tabs__link"> |
| |
| |
| |
| |
| Getting started |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../configuration/" class="md-tabs__link"> |
| |
| |
| |
| |
| Configuration |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../cli/" class="md-tabs__link"> |
| |
| |
| |
| |
| CLI |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../api/" class="md-tabs__link"> |
| |
| |
| |
| |
| API |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../contributing/" class="md-tabs__link"> |
| |
| |
| |
| |
| Contributing |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../community/" class="md-tabs__link"> |
| |
| |
| |
| |
| Community |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../../../../verify-release/" class="md-tabs__link"> |
| |
| |
| Releases |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item md-tabs__item--active"> |
| <a href="../../" class="md-tabs__link"> |
| |
| |
| Code Reference |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </div> |
| </nav> |
| |
| |
| </header> |
| |
| <div class="md-container" data-md-component="container"> |
| |
| |
| |
| |
| <main class="md-main" data-md-component="main"> |
| <div class="md-main__inner md-grid"> |
| |
| |
| |
| <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" > |
| <div class="md-sidebar__scrollwrap"> |
| <div class="md-sidebar__inner"> |
| |
| |
| |
| |
| |
| |
| <nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0"> |
| <label class="md-nav__title" for="__drawer"> |
| <a href="../../../.." title="PyIceberg" class="md-nav__button md-logo" aria-label="PyIceberg" data-md-component="logo"> |
| |
| <img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo"> |
| |
| </a> |
| PyIceberg |
| </label> |
| |
| <div class="md-nav__source"> |
| <a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source"> |
| <div class="md-source__icon md-icon"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> |
| </div> |
| <div class="md-source__repository"> |
| apache/iceberg-python |
| </div> |
| </a> |
| </div> |
| |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../.." class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| Getting started |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../configuration/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| Configuration |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../cli/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| CLI |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../api/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| API |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../contributing/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| Contributing |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../community/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| Community |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" > |
| |
| |
| <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0"> |
| |
| |
| <span class="md-ellipsis"> |
| Releases |
| |
| </span> |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_7"> |
| <span class="md-nav__icon md-icon"></span> |
| Releases |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../verify-release/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| Verify a release |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../how-to-release/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| How to release |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="https://github.com/apache/iceberg-python/releases" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| Release Notes |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../../../nightly-build/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| Nightly Build |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" checked> |
| |
| |
| <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex=""> |
| |
| |
| <span class="md-ellipsis"> |
| Code Reference |
| |
| </span> |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="true"> |
| <label class="md-nav__title" for="__nav_8"> |
| <span class="md-nav__icon md-icon"></span> |
| Code Reference |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1" checked> |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| pyiceberg |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1" id="__nav_8_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_8_1_label" aria-expanded="true"> |
| <label class="md-nav__title" for="__nav_8_1"> |
| <span class="md-nav__icon md-icon"></span> |
| pyiceberg |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../avro/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| avro |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_1" id="__nav_8_1_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_1_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_1"> |
| <span class="md-nav__icon md-icon"></span> |
| avro |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1_1" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../avro/codecs/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| codecs |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_1_1" id="__nav_8_1_1_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_1_1_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_1_1"> |
| <span class="md-nav__icon md-icon"></span> |
| codecs |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/bzip2/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| bzip2 |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/codec/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| codec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/deflate/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| deflate |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/snappy_codec/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| snappy_codec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/codecs/zstandard_codec/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| zstandard_codec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/decoder/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| decoder |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/encoder/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| encoder |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/file/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| file |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/reader/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| reader |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/resolver/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| resolver |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../avro/writer/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| writer |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../catalog/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| catalog |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_2" id="__nav_8_1_2_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_2_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_2"> |
| <span class="md-nav__icon md-icon"></span> |
| catalog |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/dynamodb/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| dynamodb |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/glue/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| glue |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/hive/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| hive |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/memory/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| memory |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/noop/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| noop |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/rest/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| rest |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../catalog/sql/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| sql |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_3" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../cli/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| cli |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_3" id="__nav_8_1_3_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_3_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_3"> |
| <span class="md-nav__icon md-icon"></span> |
| cli |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../cli/console/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| console |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../cli/output/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| output |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../conversions/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| conversions |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../exceptions/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| exceptions |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_6" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../expressions/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| expressions |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_6" id="__nav_8_1_6_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_6_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_6"> |
| <span class="md-nav__icon md-icon"></span> |
| expressions |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../expressions/literals/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| literals |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../expressions/parser/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| parser |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../expressions/visitors/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| visitors |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_7" checked> |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| io |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_7" id="__nav_8_1_7_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_7_label" aria-expanded="true"> |
| <label class="md-nav__title" for="__nav_8_1_7"> |
| <span class="md-nav__icon md-icon"></span> |
| io |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../fsspec/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| fsspec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active"> |
| |
| <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc"> |
| |
| |
| |
| <label class="md-nav__link md-nav__link--active" for="__toc"> |
| |
| |
| <span class="md-ellipsis"> |
| pyarrow |
| |
| </span> |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <a href="./" class="md-nav__link md-nav__link--active"> |
| |
| |
| <span class="md-ellipsis"> |
| pyarrow |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| |
| <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> |
| |
| |
| |
| |
| <label class="md-nav__title" for="__toc"> |
| <span class="md-nav__icon md-icon"></span> |
| Table of contents |
| </label> |
| <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| ArrowScan |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="ArrowScan"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._limit" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _limit |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _projected_field_ids |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _use_large_types |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_record_batches |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_table |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFile |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFile"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __len__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile._file_info" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _file_info |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| create |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| exists |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| open |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_input_file |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFileIO |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFileIO"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __getstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __setstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _initialize_fs |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| delete |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_input |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_output |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parse_location |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowSchemaVisitor |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowSchemaVisitor"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| list |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| map |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| primitive |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| schema |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| struct |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| UnsupportedPyArrowTypeException |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._ConvertToIceberg" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _ConvertToIceberg |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _ConvertToIcebergWithoutIDs |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _NullNaNUnmentionedTermsCollector |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="_NullNaNUnmentionedTermsCollector"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_explicit_is_nan_or_not |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_explicit_is_null_or_not |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_nan_unmentioned |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_null_unmentioned |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| collect |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _check_pyarrow_schema_compatible |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._dataframe_to_data_files" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _dataframe_to_data_files |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._determine_partitions" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _determine_partitions |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _expression_to_complementary_pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._get_column_projection_values" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _get_column_projection_values |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| compute_statistics_plan |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| data_file_statistics_from_parquet_metadata |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parquet_path_to_id_mapping |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| visit_pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../manifest/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| manifest |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../partitioning/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| partitioning |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../schema/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| schema |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../serializers/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| serializers |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../table/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| table |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_12" id="__nav_8_1_12_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_12_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_12"> |
| <span class="md-nav__icon md-icon"></span> |
| table |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/inspect/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| inspect |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/locations/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| locations |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/metadata/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| metadata |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/name_mapping/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| name_mapping |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/refs/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| refs |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/snapshots/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| snapshots |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/sorting/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| sorting |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/statistics/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| statistics |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12_9" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../table/update/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| update |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_12_9" id="__nav_8_1_12_9_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_12_9_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_12_9"> |
| <span class="md-nav__icon md-icon"></span> |
| update |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/schema/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| schema |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/snapshot/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| snapshot |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/spec/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| spec |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/update/statistics/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| statistics |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../table/upsert_util/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| upsert_util |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../transforms/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| transforms |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../typedef/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| typedef |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../types/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| types |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_16" > |
| |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../../utils/" class="md-nav__link "> |
| |
| |
| <span class="md-ellipsis"> |
| utils |
| |
| </span> |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_16" id="__nav_8_1_16_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_16_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_16"> |
| <span class="md-nav__icon md-icon"></span> |
| utils |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/bin_packing/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| bin_packing |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/concurrent/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| concurrent |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/config/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| config |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/datetime/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| datetime |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/decimal/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| decimal |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/deprecated/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| deprecated |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/lazydict/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| lazydict |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/parsing/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| parsing |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/properties/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| properties |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/schema_conversion/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| schema_conversion |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/singleton/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| singleton |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../../utils/truncate/" class="md-nav__link"> |
| |
| |
| <span class="md-ellipsis"> |
| truncate |
| |
| </span> |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| </ul> |
| </nav> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" > |
| <div class="md-sidebar__scrollwrap"> |
| <div class="md-sidebar__inner"> |
| |
| |
| <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> |
| |
| |
| |
| |
| <label class="md-nav__title" for="__toc"> |
| <span class="md-nav__icon md-icon"></span> |
| Table of contents |
| </label> |
| <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| ArrowScan |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="ArrowScan"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._limit" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _limit |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _projected_field_ids |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _use_large_types |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_record_batches |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_table |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFile |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFile"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __len__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile._file_info" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _file_info |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| create |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| exists |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| open |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| to_input_file |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowFileIO |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowFileIO"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __getstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| __setstate__ |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _initialize_fs |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| delete |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_input |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| new_output |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parse_location |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| PyArrowSchemaVisitor |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="PyArrowSchemaVisitor"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| after_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_list_element |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_key |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| before_map_value |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| field |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| list |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| map |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| primitive |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| schema |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| struct |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| UnsupportedPyArrowTypeException |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._ConvertToIceberg" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _ConvertToIceberg |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _ConvertToIcebergWithoutIDs |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _NullNaNUnmentionedTermsCollector |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="_NullNaNUnmentionedTermsCollector"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_explicit_is_nan_or_not |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_explicit_is_null_or_not |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_nan_unmentioned |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _handle_null_unmentioned |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| collect |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _check_pyarrow_schema_compatible |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._dataframe_to_data_files" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _dataframe_to_data_files |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._determine_partitions" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _determine_partitions |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _expression_to_complementary_pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow._get_column_projection_values" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| _get_column_projection_values |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| compute_statistics_plan |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| data_file_statistics_from_parquet_metadata |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| parquet_path_to_id_mapping |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| visit_pyarrow |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| |
| </nav> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| <div class="md-content" data-md-component="content"> |
| <article class="md-content__inner md-typeset"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <h1>pyarrow</h1> |
| |
| <div class="doc doc-object doc-module"> |
| |
| |
| |
| <a id="pyiceberg.io.pyarrow"></a> |
| <div class="doc doc-contents first"> |
| |
| <p>FileIO implementation for reading and writing table files that uses pyarrow.fs.</p> |
| <p>This file contains a FileIO implementation that relies on the filesystem interface provided |
| by PyArrow. It relies on PyArrow's <code>from_uri</code> method that infers the correct filesystem |
| type to use. Theoretically, this allows the supported storage types to grow naturally |
| with the pyarrow library.</p> |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.ArrowScan" class="doc doc-heading"> |
| <code>ArrowScan</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1484">1484</a></span> |
| <span class="normal"><a href="#__codelineno-0-1485">1485</a></span> |
| <span class="normal"><a href="#__codelineno-0-1486">1486</a></span> |
| <span class="normal"><a href="#__codelineno-0-1487">1487</a></span> |
| <span class="normal"><a href="#__codelineno-0-1488">1488</a></span> |
| <span class="normal"><a href="#__codelineno-0-1489">1489</a></span> |
| <span class="normal"><a href="#__codelineno-0-1490">1490</a></span> |
| <span class="normal"><a href="#__codelineno-0-1491">1491</a></span> |
| <span class="normal"><a href="#__codelineno-0-1492">1492</a></span> |
| <span class="normal"><a href="#__codelineno-0-1493">1493</a></span> |
| <span class="normal"><a href="#__codelineno-0-1494">1494</a></span> |
| <span class="normal"><a href="#__codelineno-0-1495">1495</a></span> |
| <span class="normal"><a href="#__codelineno-0-1496">1496</a></span> |
| <span class="normal"><a href="#__codelineno-0-1497">1497</a></span> |
| <span class="normal"><a href="#__codelineno-0-1498">1498</a></span> |
| <span class="normal"><a href="#__codelineno-0-1499">1499</a></span> |
| <span class="normal"><a href="#__codelineno-0-1500">1500</a></span> |
| <span class="normal"><a href="#__codelineno-0-1501">1501</a></span> |
| <span class="normal"><a href="#__codelineno-0-1502">1502</a></span> |
| <span class="normal"><a href="#__codelineno-0-1503">1503</a></span> |
| <span class="normal"><a href="#__codelineno-0-1504">1504</a></span> |
| <span class="normal"><a href="#__codelineno-0-1505">1505</a></span> |
| <span class="normal"><a href="#__codelineno-0-1506">1506</a></span> |
| <span class="normal"><a href="#__codelineno-0-1507">1507</a></span> |
| <span class="normal"><a href="#__codelineno-0-1508">1508</a></span> |
| <span class="normal"><a href="#__codelineno-0-1509">1509</a></span> |
| <span class="normal"><a href="#__codelineno-0-1510">1510</a></span> |
| <span class="normal"><a href="#__codelineno-0-1511">1511</a></span> |
| <span class="normal"><a href="#__codelineno-0-1512">1512</a></span> |
| <span class="normal"><a href="#__codelineno-0-1513">1513</a></span> |
| <span class="normal"><a href="#__codelineno-0-1514">1514</a></span> |
| <span class="normal"><a href="#__codelineno-0-1515">1515</a></span> |
| <span class="normal"><a href="#__codelineno-0-1516">1516</a></span> |
| <span class="normal"><a href="#__codelineno-0-1517">1517</a></span> |
| <span class="normal"><a href="#__codelineno-0-1518">1518</a></span> |
| <span class="normal"><a href="#__codelineno-0-1519">1519</a></span> |
| <span class="normal"><a href="#__codelineno-0-1520">1520</a></span> |
| <span class="normal"><a href="#__codelineno-0-1521">1521</a></span> |
| <span class="normal"><a href="#__codelineno-0-1522">1522</a></span> |
| <span class="normal"><a href="#__codelineno-0-1523">1523</a></span> |
| <span class="normal"><a href="#__codelineno-0-1524">1524</a></span> |
| <span class="normal"><a href="#__codelineno-0-1525">1525</a></span> |
| <span class="normal"><a href="#__codelineno-0-1526">1526</a></span> |
| <span class="normal"><a href="#__codelineno-0-1527">1527</a></span> |
| <span class="normal"><a href="#__codelineno-0-1528">1528</a></span> |
| <span class="normal"><a href="#__codelineno-0-1529">1529</a></span> |
| <span class="normal"><a href="#__codelineno-0-1530">1530</a></span> |
| <span class="normal"><a href="#__codelineno-0-1531">1531</a></span> |
| <span class="normal"><a href="#__codelineno-0-1532">1532</a></span> |
| <span class="normal"><a href="#__codelineno-0-1533">1533</a></span> |
| <span class="normal"><a href="#__codelineno-0-1534">1534</a></span> |
| <span class="normal"><a href="#__codelineno-0-1535">1535</a></span> |
| <span class="normal"><a href="#__codelineno-0-1536">1536</a></span> |
| <span class="normal"><a href="#__codelineno-0-1537">1537</a></span> |
| <span class="normal"><a href="#__codelineno-0-1538">1538</a></span> |
| <span class="normal"><a href="#__codelineno-0-1539">1539</a></span> |
| <span class="normal"><a href="#__codelineno-0-1540">1540</a></span> |
| <span class="normal"><a href="#__codelineno-0-1541">1541</a></span> |
| <span class="normal"><a href="#__codelineno-0-1542">1542</a></span> |
| <span class="normal"><a href="#__codelineno-0-1543">1543</a></span> |
| <span class="normal"><a href="#__codelineno-0-1544">1544</a></span> |
| <span class="normal"><a href="#__codelineno-0-1545">1545</a></span> |
| <span class="normal"><a href="#__codelineno-0-1546">1546</a></span> |
| <span class="normal"><a href="#__codelineno-0-1547">1547</a></span> |
| <span class="normal"><a href="#__codelineno-0-1548">1548</a></span> |
| <span class="normal"><a href="#__codelineno-0-1549">1549</a></span> |
| <span class="normal"><a href="#__codelineno-0-1550">1550</a></span> |
| <span class="normal"><a href="#__codelineno-0-1551">1551</a></span> |
| <span class="normal"><a href="#__codelineno-0-1552">1552</a></span> |
| <span class="normal"><a href="#__codelineno-0-1553">1553</a></span> |
| <span class="normal"><a href="#__codelineno-0-1554">1554</a></span> |
| <span class="normal"><a href="#__codelineno-0-1555">1555</a></span> |
| <span class="normal"><a href="#__codelineno-0-1556">1556</a></span> |
| <span class="normal"><a href="#__codelineno-0-1557">1557</a></span> |
| <span class="normal"><a href="#__codelineno-0-1558">1558</a></span> |
| <span class="normal"><a href="#__codelineno-0-1559">1559</a></span> |
| <span class="normal"><a href="#__codelineno-0-1560">1560</a></span> |
| <span class="normal"><a href="#__codelineno-0-1561">1561</a></span> |
| <span class="normal"><a href="#__codelineno-0-1562">1562</a></span> |
| <span class="normal"><a href="#__codelineno-0-1563">1563</a></span> |
| <span class="normal"><a href="#__codelineno-0-1564">1564</a></span> |
| <span class="normal"><a href="#__codelineno-0-1565">1565</a></span> |
| <span class="normal"><a href="#__codelineno-0-1566">1566</a></span> |
| <span class="normal"><a href="#__codelineno-0-1567">1567</a></span> |
| <span class="normal"><a href="#__codelineno-0-1568">1568</a></span> |
| <span class="normal"><a href="#__codelineno-0-1569">1569</a></span> |
| <span class="normal"><a href="#__codelineno-0-1570">1570</a></span> |
| <span class="normal"><a href="#__codelineno-0-1571">1571</a></span> |
| <span class="normal"><a href="#__codelineno-0-1572">1572</a></span> |
| <span class="normal"><a href="#__codelineno-0-1573">1573</a></span> |
| <span class="normal"><a href="#__codelineno-0-1574">1574</a></span> |
| <span class="normal"><a href="#__codelineno-0-1575">1575</a></span> |
| <span class="normal"><a href="#__codelineno-0-1576">1576</a></span> |
| <span class="normal"><a href="#__codelineno-0-1577">1577</a></span> |
| <span class="normal"><a href="#__codelineno-0-1578">1578</a></span> |
| <span class="normal"><a href="#__codelineno-0-1579">1579</a></span> |
| <span class="normal"><a href="#__codelineno-0-1580">1580</a></span> |
| <span class="normal"><a href="#__codelineno-0-1581">1581</a></span> |
| <span class="normal"><a href="#__codelineno-0-1582">1582</a></span> |
| <span class="normal"><a href="#__codelineno-0-1583">1583</a></span> |
| <span class="normal"><a href="#__codelineno-0-1584">1584</a></span> |
| <span class="normal"><a href="#__codelineno-0-1585">1585</a></span> |
| <span class="normal"><a href="#__codelineno-0-1586">1586</a></span> |
| <span class="normal"><a href="#__codelineno-0-1587">1587</a></span> |
| <span class="normal"><a href="#__codelineno-0-1588">1588</a></span> |
| <span class="normal"><a href="#__codelineno-0-1589">1589</a></span> |
| <span class="normal"><a href="#__codelineno-0-1590">1590</a></span> |
| <span class="normal"><a href="#__codelineno-0-1591">1591</a></span> |
| <span class="normal"><a href="#__codelineno-0-1592">1592</a></span> |
| <span class="normal"><a href="#__codelineno-0-1593">1593</a></span> |
| <span class="normal"><a href="#__codelineno-0-1594">1594</a></span> |
| <span class="normal"><a href="#__codelineno-0-1595">1595</a></span> |
| <span class="normal"><a href="#__codelineno-0-1596">1596</a></span> |
| <span class="normal"><a href="#__codelineno-0-1597">1597</a></span> |
| <span class="normal"><a href="#__codelineno-0-1598">1598</a></span> |
| <span class="normal"><a href="#__codelineno-0-1599">1599</a></span> |
| <span class="normal"><a href="#__codelineno-0-1600">1600</a></span> |
| <span class="normal"><a href="#__codelineno-0-1601">1601</a></span> |
| <span class="normal"><a href="#__codelineno-0-1602">1602</a></span> |
| <span class="normal"><a href="#__codelineno-0-1603">1603</a></span> |
| <span class="normal"><a href="#__codelineno-0-1604">1604</a></span> |
| <span class="normal"><a href="#__codelineno-0-1605">1605</a></span> |
| <span class="normal"><a href="#__codelineno-0-1606">1606</a></span> |
| <span class="normal"><a href="#__codelineno-0-1607">1607</a></span> |
| <span class="normal"><a href="#__codelineno-0-1608">1608</a></span> |
| <span class="normal"><a href="#__codelineno-0-1609">1609</a></span> |
| <span class="normal"><a href="#__codelineno-0-1610">1610</a></span> |
| <span class="normal"><a href="#__codelineno-0-1611">1611</a></span> |
| <span class="normal"><a href="#__codelineno-0-1612">1612</a></span> |
| <span class="normal"><a href="#__codelineno-0-1613">1613</a></span> |
| <span class="normal"><a href="#__codelineno-0-1614">1614</a></span> |
| <span class="normal"><a href="#__codelineno-0-1615">1615</a></span> |
| <span class="normal"><a href="#__codelineno-0-1616">1616</a></span> |
| <span class="normal"><a href="#__codelineno-0-1617">1617</a></span> |
| <span class="normal"><a href="#__codelineno-0-1618">1618</a></span> |
| <span class="normal"><a href="#__codelineno-0-1619">1619</a></span> |
| <span class="normal"><a href="#__codelineno-0-1620">1620</a></span> |
| <span class="normal"><a href="#__codelineno-0-1621">1621</a></span> |
| <span class="normal"><a href="#__codelineno-0-1622">1622</a></span> |
| <span class="normal"><a href="#__codelineno-0-1623">1623</a></span> |
| <span class="normal"><a href="#__codelineno-0-1624">1624</a></span> |
| <span class="normal"><a href="#__codelineno-0-1625">1625</a></span> |
| <span class="normal"><a href="#__codelineno-0-1626">1626</a></span> |
| <span class="normal"><a href="#__codelineno-0-1627">1627</a></span> |
| <span class="normal"><a href="#__codelineno-0-1628">1628</a></span> |
| <span class="normal"><a href="#__codelineno-0-1629">1629</a></span> |
| <span class="normal"><a href="#__codelineno-0-1630">1630</a></span> |
| <span class="normal"><a href="#__codelineno-0-1631">1631</a></span> |
| <span class="normal"><a href="#__codelineno-0-1632">1632</a></span> |
| <span class="normal"><a href="#__codelineno-0-1633">1633</a></span> |
| <span class="normal"><a href="#__codelineno-0-1634">1634</a></span> |
| <span class="normal"><a href="#__codelineno-0-1635">1635</a></span> |
| <span class="normal"><a href="#__codelineno-0-1636">1636</a></span> |
| <span class="normal"><a href="#__codelineno-0-1637">1637</a></span> |
| <span class="normal"><a href="#__codelineno-0-1638">1638</a></span> |
| <span class="normal"><a href="#__codelineno-0-1639">1639</a></span> |
| <span class="normal"><a href="#__codelineno-0-1640">1640</a></span> |
| <span class="normal"><a href="#__codelineno-0-1641">1641</a></span> |
| <span class="normal"><a href="#__codelineno-0-1642">1642</a></span> |
| <span class="normal"><a href="#__codelineno-0-1643">1643</a></span> |
| <span class="normal"><a href="#__codelineno-0-1644">1644</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1484" name="__codelineno-0-1484"></a><span class="k">class</span><span class="w"> </span><span class="nc">ArrowScan</span><span class="p">:</span> |
| <a id="__codelineno-0-1485" name="__codelineno-0-1485"></a> <span class="n">_table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span> |
| <a id="__codelineno-0-1486" name="__codelineno-0-1486"></a> <span class="n">_io</span><span class="p">:</span> <span class="n">FileIO</span> |
| <a id="__codelineno-0-1487" name="__codelineno-0-1487"></a> <span class="n">_projected_schema</span><span class="p">:</span> <span class="n">Schema</span> |
| <a id="__codelineno-0-1488" name="__codelineno-0-1488"></a> <span class="n">_bound_row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span> |
| <a id="__codelineno-0-1489" name="__codelineno-0-1489"></a> <span class="n">_case_sensitive</span><span class="p">:</span> <span class="nb">bool</span> |
| <a id="__codelineno-0-1490" name="__codelineno-0-1490"></a> <span class="n">_limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> |
| <a id="__codelineno-0-1491" name="__codelineno-0-1491"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg Table and create an Arrow construct.</span> |
| <a id="__codelineno-0-1492" name="__codelineno-0-1492"></a> |
| <a id="__codelineno-0-1493" name="__codelineno-0-1493"></a><span class="sd"> Attributes:</span> |
| <a id="__codelineno-0-1494" name="__codelineno-0-1494"></a><span class="sd"> _table_metadata: Current table metadata of the Iceberg table</span> |
| <a id="__codelineno-0-1495" name="__codelineno-0-1495"></a><span class="sd"> _io: PyIceberg FileIO implementation from which to fetch the io properties</span> |
| <a id="__codelineno-0-1496" name="__codelineno-0-1496"></a><span class="sd"> _projected_schema: Iceberg Schema to project onto the data files</span> |
| <a id="__codelineno-0-1497" name="__codelineno-0-1497"></a><span class="sd"> _bound_row_filter: Schema bound row expression to filter the data with</span> |
| <a id="__codelineno-0-1498" name="__codelineno-0-1498"></a><span class="sd"> _case_sensitive: Case sensitivity when looking up column names</span> |
| <a id="__codelineno-0-1499" name="__codelineno-0-1499"></a><span class="sd"> _limit: Limit the number of records.</span> |
| <a id="__codelineno-0-1500" name="__codelineno-0-1500"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1501" name="__codelineno-0-1501"></a> |
| <a id="__codelineno-0-1502" name="__codelineno-0-1502"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span> |
| <a id="__codelineno-0-1503" name="__codelineno-0-1503"></a> <span class="bp">self</span><span class="p">,</span> |
| <a id="__codelineno-0-1504" name="__codelineno-0-1504"></a> <span class="n">table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span><span class="p">,</span> |
| <a id="__codelineno-0-1505" name="__codelineno-0-1505"></a> <span class="n">io</span><span class="p">:</span> <span class="n">FileIO</span><span class="p">,</span> |
| <a id="__codelineno-0-1506" name="__codelineno-0-1506"></a> <span class="n">projected_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> |
| <a id="__codelineno-0-1507" name="__codelineno-0-1507"></a> <span class="n">row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span> |
| <a id="__codelineno-0-1508" name="__codelineno-0-1508"></a> <span class="n">case_sensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <a id="__codelineno-0-1509" name="__codelineno-0-1509"></a> <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <a id="__codelineno-0-1510" name="__codelineno-0-1510"></a> <span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1511" name="__codelineno-0-1511"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span> <span class="o">=</span> <span class="n">table_metadata</span> |
| <a id="__codelineno-0-1512" name="__codelineno-0-1512"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span> <span class="o">=</span> <span class="n">io</span> |
| <a id="__codelineno-0-1513" name="__codelineno-0-1513"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span> <span class="o">=</span> <span class="n">projected_schema</span> |
| <a id="__codelineno-0-1514" name="__codelineno-0-1514"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span> <span class="o">=</span> <span class="n">bind</span><span class="p">(</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">(),</span> <span class="n">row_filter</span><span class="p">,</span> <span class="n">case_sensitive</span><span class="o">=</span><span class="n">case_sensitive</span><span class="p">)</span> |
| <a id="__codelineno-0-1515" name="__codelineno-0-1515"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span> <span class="o">=</span> <span class="n">case_sensitive</span> |
| <a id="__codelineno-0-1516" name="__codelineno-0-1516"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">=</span> <span class="n">limit</span> |
| <a id="__codelineno-0-1517" name="__codelineno-0-1517"></a> |
| <a id="__codelineno-0-1518" name="__codelineno-0-1518"></a> <span class="nd">@property</span> |
| <a id="__codelineno-0-1519" name="__codelineno-0-1519"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_use_large_types</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <a id="__codelineno-0-1520" name="__codelineno-0-1520"></a><span class="w"> </span><span class="sd">"""Whether to represent data as large arrow types.</span> |
| <a id="__codelineno-0-1521" name="__codelineno-0-1521"></a> |
| <a id="__codelineno-0-1522" name="__codelineno-0-1522"></a><span class="sd"> Defaults to True.</span> |
| <a id="__codelineno-0-1523" name="__codelineno-0-1523"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1524" name="__codelineno-0-1524"></a> <span class="k">return</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span> |
| <a id="__codelineno-0-1525" name="__codelineno-0-1525"></a> |
| <a id="__codelineno-0-1526" name="__codelineno-0-1526"></a> <span class="nd">@property</span> |
| <a id="__codelineno-0-1527" name="__codelineno-0-1527"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_projected_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> |
| <a id="__codelineno-0-1528" name="__codelineno-0-1528"></a><span class="w"> </span><span class="sd">"""Set of field IDs that should be projected from the data files."""</span> |
| <a id="__codelineno-0-1529" name="__codelineno-0-1529"></a> <span class="k">return</span> <span class="p">{</span> |
| <a id="__codelineno-0-1530" name="__codelineno-0-1530"></a> <span class="nb">id</span> |
| <a id="__codelineno-0-1531" name="__codelineno-0-1531"></a> <span class="k">for</span> <span class="nb">id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">field_ids</span> |
| <a id="__codelineno-0-1532" name="__codelineno-0-1532"></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">find_type</span><span class="p">(</span><span class="nb">id</span><span class="p">),</span> <span class="p">(</span><span class="n">MapType</span><span class="p">,</span> <span class="n">ListType</span><span class="p">))</span> |
| <a id="__codelineno-0-1533" name="__codelineno-0-1533"></a> <span class="p">}</span><span class="o">.</span><span class="n">union</span><span class="p">(</span><span class="n">extract_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">))</span> |
| <a id="__codelineno-0-1534" name="__codelineno-0-1534"></a> |
| <a id="__codelineno-0-1535" name="__codelineno-0-1535"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span> |
| <a id="__codelineno-0-1536" name="__codelineno-0-1536"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return a pa.Table.</span> |
| <a id="__codelineno-0-1537" name="__codelineno-0-1537"></a> |
| <a id="__codelineno-0-1538" name="__codelineno-0-1538"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span> |
| <a id="__codelineno-0-1539" name="__codelineno-0-1539"></a><span class="sd"> right columns that match the current table schema. Only data that</span> |
| <a id="__codelineno-0-1540" name="__codelineno-0-1540"></a><span class="sd"> matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1541" name="__codelineno-0-1541"></a> |
| <a id="__codelineno-0-1542" name="__codelineno-0-1542"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1543" name="__codelineno-0-1543"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1544" name="__codelineno-0-1544"></a> |
| <a id="__codelineno-0-1545" name="__codelineno-0-1545"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1546" name="__codelineno-0-1546"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1547" name="__codelineno-0-1547"></a> |
| <a id="__codelineno-0-1548" name="__codelineno-0-1548"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1549" name="__codelineno-0-1549"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1550" name="__codelineno-0-1550"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1551" name="__codelineno-0-1551"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1552" name="__codelineno-0-1552"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1553" name="__codelineno-0-1553"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span> |
| <a id="__codelineno-0-1554" name="__codelineno-0-1554"></a> |
| <a id="__codelineno-0-1555" name="__codelineno-0-1555"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_table_from_scan_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-></span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span> |
| <a id="__codelineno-0-1556" name="__codelineno-0-1556"></a> <span class="n">batches</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span> |
| <a id="__codelineno-0-1557" name="__codelineno-0-1557"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> |
| <a id="__codelineno-0-1558" name="__codelineno-0-1558"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> |
| <a id="__codelineno-0-1559" name="__codelineno-0-1559"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1560" name="__codelineno-0-1560"></a> <span class="k">return</span> <span class="kc">None</span> |
| <a id="__codelineno-0-1561" name="__codelineno-0-1561"></a> |
| <a id="__codelineno-0-1562" name="__codelineno-0-1562"></a> <span class="n">futures</span> <span class="o">=</span> <span class="p">[</span> |
| <a id="__codelineno-0-1563" name="__codelineno-0-1563"></a> <span class="n">executor</span><span class="o">.</span><span class="n">submit</span><span class="p">(</span> |
| <a id="__codelineno-0-1564" name="__codelineno-0-1564"></a> <span class="n">_table_from_scan_task</span><span class="p">,</span> |
| <a id="__codelineno-0-1565" name="__codelineno-0-1565"></a> <span class="n">task</span><span class="p">,</span> |
| <a id="__codelineno-0-1566" name="__codelineno-0-1566"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1567" name="__codelineno-0-1567"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span> |
| <a id="__codelineno-0-1568" name="__codelineno-0-1568"></a> <span class="p">]</span> |
| <a id="__codelineno-0-1569" name="__codelineno-0-1569"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span> |
| <a id="__codelineno-0-1570" name="__codelineno-0-1570"></a> <span class="c1"># for consistent ordering, we need to maintain future order</span> |
| <a id="__codelineno-0-1571" name="__codelineno-0-1571"></a> <span class="n">futures_index</span> <span class="o">=</span> <span class="p">{</span><span class="n">f</span><span class="p">:</span> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">f</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">futures</span><span class="p">)}</span> |
| <a id="__codelineno-0-1572" name="__codelineno-0-1572"></a> <span class="n">completed_futures</span><span class="p">:</span> <span class="n">SortedList</span><span class="p">[</span><span class="n">Future</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">]]</span> <span class="o">=</span> <span class="n">SortedList</span><span class="p">(</span><span class="n">iterable</span><span class="o">=</span><span class="p">[],</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">f</span><span class="p">:</span> <span class="n">futures_index</span><span class="p">[</span><span class="n">f</span><span class="p">])</span> |
| <a id="__codelineno-0-1573" name="__codelineno-0-1573"></a> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">concurrent</span><span class="o">.</span><span class="n">futures</span><span class="o">.</span><span class="n">as_completed</span><span class="p">(</span><span class="n">futures</span><span class="p">):</span> |
| <a id="__codelineno-0-1574" name="__codelineno-0-1574"></a> <span class="n">completed_futures</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">future</span><span class="p">)</span> |
| <a id="__codelineno-0-1575" name="__codelineno-0-1575"></a> <span class="k">if</span> <span class="n">table_result</span> <span class="o">:=</span> <span class="n">future</span><span class="o">.</span><span class="n">result</span><span class="p">():</span> |
| <a id="__codelineno-0-1576" name="__codelineno-0-1576"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">table_result</span><span class="p">)</span> |
| <a id="__codelineno-0-1577" name="__codelineno-0-1577"></a> <span class="c1"># stop early if limit is satisfied</span> |
| <a id="__codelineno-0-1578" name="__codelineno-0-1578"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1579" name="__codelineno-0-1579"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1580" name="__codelineno-0-1580"></a> |
| <a id="__codelineno-0-1581" name="__codelineno-0-1581"></a> <span class="c1"># by now, we've either completed all tasks or satisfied the limit</span> |
| <a id="__codelineno-0-1582" name="__codelineno-0-1582"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1583" name="__codelineno-0-1583"></a> <span class="n">_</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">cancel</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">futures</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">f</span><span class="o">.</span><span class="n">done</span><span class="p">()]</span> |
| <a id="__codelineno-0-1584" name="__codelineno-0-1584"></a> |
| <a id="__codelineno-0-1585" name="__codelineno-0-1585"></a> <span class="n">tables</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">completed_futures</span> <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()]</span> |
| <a id="__codelineno-0-1586" name="__codelineno-0-1586"></a> |
| <a id="__codelineno-0-1587" name="__codelineno-0-1587"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">tables</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="p">:</span> |
| <a id="__codelineno-0-1588" name="__codelineno-0-1588"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([],</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span> |
| <a id="__codelineno-0-1589" name="__codelineno-0-1589"></a> |
| <a id="__codelineno-0-1590" name="__codelineno-0-1590"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span><span class="n">tables</span><span class="p">,</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">"permissive"</span><span class="p">)</span> |
| <a id="__codelineno-0-1591" name="__codelineno-0-1591"></a> |
| <a id="__codelineno-0-1592" name="__codelineno-0-1592"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1593" name="__codelineno-0-1593"></a> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">)</span> |
| <a id="__codelineno-0-1594" name="__codelineno-0-1594"></a> |
| <a id="__codelineno-0-1595" name="__codelineno-0-1595"></a> <span class="k">return</span> <span class="n">result</span> |
| <a id="__codelineno-0-1596" name="__codelineno-0-1596"></a> |
| <a id="__codelineno-0-1597" name="__codelineno-0-1597"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1598" name="__codelineno-0-1598"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span> |
| <a id="__codelineno-0-1599" name="__codelineno-0-1599"></a> |
| <a id="__codelineno-0-1600" name="__codelineno-0-1600"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span> |
| <a id="__codelineno-0-1601" name="__codelineno-0-1601"></a><span class="sd"> by resolving the right columns that match the current table schema.</span> |
| <a id="__codelineno-0-1602" name="__codelineno-0-1602"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1603" name="__codelineno-0-1603"></a> |
| <a id="__codelineno-0-1604" name="__codelineno-0-1604"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1605" name="__codelineno-0-1605"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1606" name="__codelineno-0-1606"></a> |
| <a id="__codelineno-0-1607" name="__codelineno-0-1607"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1608" name="__codelineno-0-1608"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span> |
| <a id="__codelineno-0-1609" name="__codelineno-0-1609"></a><span class="sd"> Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1610" name="__codelineno-0-1610"></a> |
| <a id="__codelineno-0-1611" name="__codelineno-0-1611"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1612" name="__codelineno-0-1612"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1613" name="__codelineno-0-1613"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1614" name="__codelineno-0-1614"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1615" name="__codelineno-0-1615"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1616" name="__codelineno-0-1616"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span><span class="n">tasks</span><span class="p">,</span> <span class="n">deletes_per_file</span><span class="p">)</span> |
| <a id="__codelineno-0-1617" name="__codelineno-0-1617"></a> |
| <a id="__codelineno-0-1618" name="__codelineno-0-1618"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span> |
| <a id="__codelineno-0-1619" name="__codelineno-0-1619"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">ChunkedArray</span><span class="p">]]</span> |
| <a id="__codelineno-0-1620" name="__codelineno-0-1620"></a> <span class="p">)</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1621" name="__codelineno-0-1621"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span> |
| <a id="__codelineno-0-1622" name="__codelineno-0-1622"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span><span class="p">:</span> |
| <a id="__codelineno-0-1623" name="__codelineno-0-1623"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1624" name="__codelineno-0-1624"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1625" name="__codelineno-0-1625"></a> <span class="n">batches</span> <span class="o">=</span> <span class="n">_task_to_record_batches</span><span class="p">(</span> |
| <a id="__codelineno-0-1626" name="__codelineno-0-1626"></a> <span class="n">_fs_from_file_path</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">file_path</span><span class="p">),</span> |
| <a id="__codelineno-0-1627" name="__codelineno-0-1627"></a> <span class="n">task</span><span class="p">,</span> |
| <a id="__codelineno-0-1628" name="__codelineno-0-1628"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">,</span> |
| <a id="__codelineno-0-1629" name="__codelineno-0-1629"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> |
| <a id="__codelineno-0-1630" name="__codelineno-0-1630"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_field_ids</span><span class="p">,</span> |
| <a id="__codelineno-0-1631" name="__codelineno-0-1631"></a> <span class="n">deletes_per_file</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">file_path</span><span class="p">),</span> |
| <a id="__codelineno-0-1632" name="__codelineno-0-1632"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span><span class="p">,</span> |
| <a id="__codelineno-0-1633" name="__codelineno-0-1633"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">name_mapping</span><span class="p">(),</span> |
| <a id="__codelineno-0-1634" name="__codelineno-0-1634"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_use_large_types</span><span class="p">,</span> |
| <a id="__codelineno-0-1635" name="__codelineno-0-1635"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">spec</span><span class="p">(),</span> |
| <a id="__codelineno-0-1636" name="__codelineno-0-1636"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1637" name="__codelineno-0-1637"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span> |
| <a id="__codelineno-0-1638" name="__codelineno-0-1638"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1639" name="__codelineno-0-1639"></a> <span class="k">if</span> <span class="n">total_row_count</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1640" name="__codelineno-0-1640"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1641" name="__codelineno-0-1641"></a> <span class="k">elif</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1642" name="__codelineno-0-1642"></a> <span class="n">batch</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span> |
| <a id="__codelineno-0-1643" name="__codelineno-0-1643"></a> <span class="k">yield</span> <span class="n">batch</span> |
| <a id="__codelineno-0-1644" name="__codelineno-0-1644"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-attribute"> |
| |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.ArrowScan._limit" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_limit</span> <span class="o">=</span> <span class="n">limit</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._limit" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Scan the Iceberg Table and create an Arrow construct.</p> |
| |
| |
| <p><span class="doc-section-title">Attributes:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._table_metadata">_table_metadata</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Current table metadata of the Iceberg table</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._io">_io</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>PyIceberg FileIO implementation from which to fetch the io properties</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._projected_schema">_projected_schema</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Iceberg Schema to project onto the data files</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._bound_row_filter">_bound_row_filter</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Schema bound row expression to filter the data with</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._case_sensitive">_case_sensitive</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Case sensitivity when looking up column names</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._limit">_limit</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Limit the number of records.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-attribute"> |
| |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_projected_field_ids</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-property"><code>property</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Set of field IDs that should be projected from the data files.</p> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-attribute"> |
| |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_use_large_types</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-property"><code>property</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Whether to represent data as large arrow types.</p> |
| <p>Defaults to True.</p> |
| </div> |
| |
| </div> |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Scan the Iceberg table and return an Iterator[pa.RecordBatch].</p> |
| <p>Returns an Iterator of pa.RecordBatch with data from the Iceberg table |
| by resolving the right columns that match the current table schema. |
| Only data that matches the provided row_filter expression is returned.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>tasks</code> |
| </td> |
| <td> |
| <code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="pyiceberg.table.FileScanTask" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>FileScanTasks representing the data files and delete files to read from.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>An Iterator of PyArrow RecordBatches.</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Total number of rows will be capped if specified.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a required field cannot be found in the file</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="ValueError">ValueError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a field type in the file cannot be projected to the schema type</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1597">1597</a></span> |
| <span class="normal"><a href="#__codelineno-0-1598">1598</a></span> |
| <span class="normal"><a href="#__codelineno-0-1599">1599</a></span> |
| <span class="normal"><a href="#__codelineno-0-1600">1600</a></span> |
| <span class="normal"><a href="#__codelineno-0-1601">1601</a></span> |
| <span class="normal"><a href="#__codelineno-0-1602">1602</a></span> |
| <span class="normal"><a href="#__codelineno-0-1603">1603</a></span> |
| <span class="normal"><a href="#__codelineno-0-1604">1604</a></span> |
| <span class="normal"><a href="#__codelineno-0-1605">1605</a></span> |
| <span class="normal"><a href="#__codelineno-0-1606">1606</a></span> |
| <span class="normal"><a href="#__codelineno-0-1607">1607</a></span> |
| <span class="normal"><a href="#__codelineno-0-1608">1608</a></span> |
| <span class="normal"><a href="#__codelineno-0-1609">1609</a></span> |
| <span class="normal"><a href="#__codelineno-0-1610">1610</a></span> |
| <span class="normal"><a href="#__codelineno-0-1611">1611</a></span> |
| <span class="normal"><a href="#__codelineno-0-1612">1612</a></span> |
| <span class="normal"><a href="#__codelineno-0-1613">1613</a></span> |
| <span class="normal"><a href="#__codelineno-0-1614">1614</a></span> |
| <span class="normal"><a href="#__codelineno-0-1615">1615</a></span> |
| <span class="normal"><a href="#__codelineno-0-1616">1616</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1597" name="__codelineno-0-1597"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span> |
| <a id="__codelineno-0-1598" name="__codelineno-0-1598"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span> |
| <a id="__codelineno-0-1599" name="__codelineno-0-1599"></a> |
| <a id="__codelineno-0-1600" name="__codelineno-0-1600"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span> |
| <a id="__codelineno-0-1601" name="__codelineno-0-1601"></a><span class="sd"> by resolving the right columns that match the current table schema.</span> |
| <a id="__codelineno-0-1602" name="__codelineno-0-1602"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1603" name="__codelineno-0-1603"></a> |
| <a id="__codelineno-0-1604" name="__codelineno-0-1604"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1605" name="__codelineno-0-1605"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1606" name="__codelineno-0-1606"></a> |
| <a id="__codelineno-0-1607" name="__codelineno-0-1607"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1608" name="__codelineno-0-1608"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span> |
| <a id="__codelineno-0-1609" name="__codelineno-0-1609"></a><span class="sd"> Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1610" name="__codelineno-0-1610"></a> |
| <a id="__codelineno-0-1611" name="__codelineno-0-1611"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1612" name="__codelineno-0-1612"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1613" name="__codelineno-0-1613"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1614" name="__codelineno-0-1614"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1615" name="__codelineno-0-1615"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1616" name="__codelineno-0-1616"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span><span class="n">tasks</span><span class="p">,</span> <span class="n">deletes_per_file</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.ArrowScan.to_table" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">to_table</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Scan the Iceberg table and return a pa.Table.</p> |
| <p>Returns a pa.Table with data from the Iceberg table by resolving the |
| right columns that match the current table schema. Only data that |
| matches the provided row_filter expression is returned.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>tasks</code> |
| </td> |
| <td> |
| <code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="pyiceberg.table.FileScanTask" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>FileScanTasks representing the data files and delete files to read from.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="pyarrow.Table">Table</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A PyArrow table. Total number of rows will be capped if specified.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a required field cannot be found in the file</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="ValueError">ValueError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When a field type in the file cannot be projected to the schema type</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1535">1535</a></span> |
| <span class="normal"><a href="#__codelineno-0-1536">1536</a></span> |
| <span class="normal"><a href="#__codelineno-0-1537">1537</a></span> |
| <span class="normal"><a href="#__codelineno-0-1538">1538</a></span> |
| <span class="normal"><a href="#__codelineno-0-1539">1539</a></span> |
| <span class="normal"><a href="#__codelineno-0-1540">1540</a></span> |
| <span class="normal"><a href="#__codelineno-0-1541">1541</a></span> |
| <span class="normal"><a href="#__codelineno-0-1542">1542</a></span> |
| <span class="normal"><a href="#__codelineno-0-1543">1543</a></span> |
| <span class="normal"><a href="#__codelineno-0-1544">1544</a></span> |
| <span class="normal"><a href="#__codelineno-0-1545">1545</a></span> |
| <span class="normal"><a href="#__codelineno-0-1546">1546</a></span> |
| <span class="normal"><a href="#__codelineno-0-1547">1547</a></span> |
| <span class="normal"><a href="#__codelineno-0-1548">1548</a></span> |
| <span class="normal"><a href="#__codelineno-0-1549">1549</a></span> |
| <span class="normal"><a href="#__codelineno-0-1550">1550</a></span> |
| <span class="normal"><a href="#__codelineno-0-1551">1551</a></span> |
| <span class="normal"><a href="#__codelineno-0-1552">1552</a></span> |
| <span class="normal"><a href="#__codelineno-0-1553">1553</a></span> |
| <span class="normal"><a href="#__codelineno-0-1554">1554</a></span> |
| <span class="normal"><a href="#__codelineno-0-1555">1555</a></span> |
| <span class="normal"><a href="#__codelineno-0-1556">1556</a></span> |
| <span class="normal"><a href="#__codelineno-0-1557">1557</a></span> |
| <span class="normal"><a href="#__codelineno-0-1558">1558</a></span> |
| <span class="normal"><a href="#__codelineno-0-1559">1559</a></span> |
| <span class="normal"><a href="#__codelineno-0-1560">1560</a></span> |
| <span class="normal"><a href="#__codelineno-0-1561">1561</a></span> |
| <span class="normal"><a href="#__codelineno-0-1562">1562</a></span> |
| <span class="normal"><a href="#__codelineno-0-1563">1563</a></span> |
| <span class="normal"><a href="#__codelineno-0-1564">1564</a></span> |
| <span class="normal"><a href="#__codelineno-0-1565">1565</a></span> |
| <span class="normal"><a href="#__codelineno-0-1566">1566</a></span> |
| <span class="normal"><a href="#__codelineno-0-1567">1567</a></span> |
| <span class="normal"><a href="#__codelineno-0-1568">1568</a></span> |
| <span class="normal"><a href="#__codelineno-0-1569">1569</a></span> |
| <span class="normal"><a href="#__codelineno-0-1570">1570</a></span> |
| <span class="normal"><a href="#__codelineno-0-1571">1571</a></span> |
| <span class="normal"><a href="#__codelineno-0-1572">1572</a></span> |
| <span class="normal"><a href="#__codelineno-0-1573">1573</a></span> |
| <span class="normal"><a href="#__codelineno-0-1574">1574</a></span> |
| <span class="normal"><a href="#__codelineno-0-1575">1575</a></span> |
| <span class="normal"><a href="#__codelineno-0-1576">1576</a></span> |
| <span class="normal"><a href="#__codelineno-0-1577">1577</a></span> |
| <span class="normal"><a href="#__codelineno-0-1578">1578</a></span> |
| <span class="normal"><a href="#__codelineno-0-1579">1579</a></span> |
| <span class="normal"><a href="#__codelineno-0-1580">1580</a></span> |
| <span class="normal"><a href="#__codelineno-0-1581">1581</a></span> |
| <span class="normal"><a href="#__codelineno-0-1582">1582</a></span> |
| <span class="normal"><a href="#__codelineno-0-1583">1583</a></span> |
| <span class="normal"><a href="#__codelineno-0-1584">1584</a></span> |
| <span class="normal"><a href="#__codelineno-0-1585">1585</a></span> |
| <span class="normal"><a href="#__codelineno-0-1586">1586</a></span> |
| <span class="normal"><a href="#__codelineno-0-1587">1587</a></span> |
| <span class="normal"><a href="#__codelineno-0-1588">1588</a></span> |
| <span class="normal"><a href="#__codelineno-0-1589">1589</a></span> |
| <span class="normal"><a href="#__codelineno-0-1590">1590</a></span> |
| <span class="normal"><a href="#__codelineno-0-1591">1591</a></span> |
| <span class="normal"><a href="#__codelineno-0-1592">1592</a></span> |
| <span class="normal"><a href="#__codelineno-0-1593">1593</a></span> |
| <span class="normal"><a href="#__codelineno-0-1594">1594</a></span> |
| <span class="normal"><a href="#__codelineno-0-1595">1595</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1535" name="__codelineno-0-1535"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-></span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span> |
| <a id="__codelineno-0-1536" name="__codelineno-0-1536"></a><span class="w"> </span><span class="sd">"""Scan the Iceberg table and return a pa.Table.</span> |
| <a id="__codelineno-0-1537" name="__codelineno-0-1537"></a> |
| <a id="__codelineno-0-1538" name="__codelineno-0-1538"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span> |
| <a id="__codelineno-0-1539" name="__codelineno-0-1539"></a><span class="sd"> right columns that match the current table schema. Only data that</span> |
| <a id="__codelineno-0-1540" name="__codelineno-0-1540"></a><span class="sd"> matches the provided row_filter expression is returned.</span> |
| <a id="__codelineno-0-1541" name="__codelineno-0-1541"></a> |
| <a id="__codelineno-0-1542" name="__codelineno-0-1542"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-1543" name="__codelineno-0-1543"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span> |
| <a id="__codelineno-0-1544" name="__codelineno-0-1544"></a> |
| <a id="__codelineno-0-1545" name="__codelineno-0-1545"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-1546" name="__codelineno-0-1546"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span> |
| <a id="__codelineno-0-1547" name="__codelineno-0-1547"></a> |
| <a id="__codelineno-0-1548" name="__codelineno-0-1548"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-1549" name="__codelineno-0-1549"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span> |
| <a id="__codelineno-0-1550" name="__codelineno-0-1550"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span> |
| <a id="__codelineno-0-1551" name="__codelineno-0-1551"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1552" name="__codelineno-0-1552"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span> |
| <a id="__codelineno-0-1553" name="__codelineno-0-1553"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span> |
| <a id="__codelineno-0-1554" name="__codelineno-0-1554"></a> |
| <a id="__codelineno-0-1555" name="__codelineno-0-1555"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_table_from_scan_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-></span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span> |
| <a id="__codelineno-0-1556" name="__codelineno-0-1556"></a> <span class="n">batches</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span> |
| <a id="__codelineno-0-1557" name="__codelineno-0-1557"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> |
| <a id="__codelineno-0-1558" name="__codelineno-0-1558"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> |
| <a id="__codelineno-0-1559" name="__codelineno-0-1559"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1560" name="__codelineno-0-1560"></a> <span class="k">return</span> <span class="kc">None</span> |
| <a id="__codelineno-0-1561" name="__codelineno-0-1561"></a> |
| <a id="__codelineno-0-1562" name="__codelineno-0-1562"></a> <span class="n">futures</span> <span class="o">=</span> <span class="p">[</span> |
| <a id="__codelineno-0-1563" name="__codelineno-0-1563"></a> <span class="n">executor</span><span class="o">.</span><span class="n">submit</span><span class="p">(</span> |
| <a id="__codelineno-0-1564" name="__codelineno-0-1564"></a> <span class="n">_table_from_scan_task</span><span class="p">,</span> |
| <a id="__codelineno-0-1565" name="__codelineno-0-1565"></a> <span class="n">task</span><span class="p">,</span> |
| <a id="__codelineno-0-1566" name="__codelineno-0-1566"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1567" name="__codelineno-0-1567"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span> |
| <a id="__codelineno-0-1568" name="__codelineno-0-1568"></a> <span class="p">]</span> |
| <a id="__codelineno-0-1569" name="__codelineno-0-1569"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span> |
| <a id="__codelineno-0-1570" name="__codelineno-0-1570"></a> <span class="c1"># for consistent ordering, we need to maintain future order</span> |
| <a id="__codelineno-0-1571" name="__codelineno-0-1571"></a> <span class="n">futures_index</span> <span class="o">=</span> <span class="p">{</span><span class="n">f</span><span class="p">:</span> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">f</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">futures</span><span class="p">)}</span> |
| <a id="__codelineno-0-1572" name="__codelineno-0-1572"></a> <span class="n">completed_futures</span><span class="p">:</span> <span class="n">SortedList</span><span class="p">[</span><span class="n">Future</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">]]</span> <span class="o">=</span> <span class="n">SortedList</span><span class="p">(</span><span class="n">iterable</span><span class="o">=</span><span class="p">[],</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">f</span><span class="p">:</span> <span class="n">futures_index</span><span class="p">[</span><span class="n">f</span><span class="p">])</span> |
| <a id="__codelineno-0-1573" name="__codelineno-0-1573"></a> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">concurrent</span><span class="o">.</span><span class="n">futures</span><span class="o">.</span><span class="n">as_completed</span><span class="p">(</span><span class="n">futures</span><span class="p">):</span> |
| <a id="__codelineno-0-1574" name="__codelineno-0-1574"></a> <span class="n">completed_futures</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">future</span><span class="p">)</span> |
| <a id="__codelineno-0-1575" name="__codelineno-0-1575"></a> <span class="k">if</span> <span class="n">table_result</span> <span class="o">:=</span> <span class="n">future</span><span class="o">.</span><span class="n">result</span><span class="p">():</span> |
| <a id="__codelineno-0-1576" name="__codelineno-0-1576"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">table_result</span><span class="p">)</span> |
| <a id="__codelineno-0-1577" name="__codelineno-0-1577"></a> <span class="c1"># stop early if limit is satisfied</span> |
| <a id="__codelineno-0-1578" name="__codelineno-0-1578"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span> |
| <a id="__codelineno-0-1579" name="__codelineno-0-1579"></a> <span class="k">break</span> |
| <a id="__codelineno-0-1580" name="__codelineno-0-1580"></a> |
| <a id="__codelineno-0-1581" name="__codelineno-0-1581"></a> <span class="c1"># by now, we've either completed all tasks or satisfied the limit</span> |
| <a id="__codelineno-0-1582" name="__codelineno-0-1582"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1583" name="__codelineno-0-1583"></a> <span class="n">_</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">cancel</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">futures</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">f</span><span class="o">.</span><span class="n">done</span><span class="p">()]</span> |
| <a id="__codelineno-0-1584" name="__codelineno-0-1584"></a> |
| <a id="__codelineno-0-1585" name="__codelineno-0-1585"></a> <span class="n">tables</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">completed_futures</span> <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()]</span> |
| <a id="__codelineno-0-1586" name="__codelineno-0-1586"></a> |
| <a id="__codelineno-0-1587" name="__codelineno-0-1587"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">tables</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="p">:</span> |
| <a id="__codelineno-0-1588" name="__codelineno-0-1588"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([],</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span> |
| <a id="__codelineno-0-1589" name="__codelineno-0-1589"></a> |
| <a id="__codelineno-0-1590" name="__codelineno-0-1590"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span><span class="n">tables</span><span class="p">,</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">"permissive"</span><span class="p">)</span> |
| <a id="__codelineno-0-1591" name="__codelineno-0-1591"></a> |
| <a id="__codelineno-0-1592" name="__codelineno-0-1592"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1593" name="__codelineno-0-1593"></a> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">)</span> |
| <a id="__codelineno-0-1594" name="__codelineno-0-1594"></a> |
| <a id="__codelineno-0-1595" name="__codelineno-0-1595"></a> <span class="k">return</span> <span class="n">result</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.PyArrowFile" class="doc doc-heading"> |
| <code>PyArrowFile</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.InputFile" href="../#pyiceberg.io.InputFile">InputFile</a></code>, <code><a class="autorefs autorefs-internal" title="pyiceberg.io.OutputFile" href="../#pyiceberg.io.OutputFile">OutputFile</a></code></p> |
| |
| |
| <p>A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="str">str</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A URI or a path to a local file.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Attributes:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td><code><span title="pyiceberg.io.pyarrow.PyArrowFile.location(str)">location(str)</span></code></td> |
| <td> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The URI or path to a local file for a PyArrowFile instance.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Examples:</span></p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.io.pyarrow</span><span class="w"> </span><span class="kn">import</span> <span class="n">PyArrowFile</span> |
| <a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">>>> </span><span class="c1"># input_file = PyArrowFile("s3://foo/bar.txt")</span> |
| <a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">>>> </span><span class="c1"># Read the contents of the PyArrowFile instance</span> |
| <a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">>>> </span><span class="c1"># Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="gp">>>> </span><span class="c1"># file_content = input_file.open().read()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">>>> </span><span class="c1"># output_file = PyArrowFile("s3://baz/qux.txt")</span> |
| <a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">>>> </span><span class="c1"># Write bytes to a file</span> |
| <a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">>>> </span><span class="c1"># Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">>>> </span><span class="c1"># output_file.create().write(b'foobytes')</span> |
| </code></pre></div> |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-224">224</a></span> |
| <span class="normal"><a href="#__codelineno-0-225">225</a></span> |
| <span class="normal"><a href="#__codelineno-0-226">226</a></span> |
| <span class="normal"><a href="#__codelineno-0-227">227</a></span> |
| <span class="normal"><a href="#__codelineno-0-228">228</a></span> |
| <span class="normal"><a href="#__codelineno-0-229">229</a></span> |
| <span class="normal"><a href="#__codelineno-0-230">230</a></span> |
| <span class="normal"><a href="#__codelineno-0-231">231</a></span> |
| <span class="normal"><a href="#__codelineno-0-232">232</a></span> |
| <span class="normal"><a href="#__codelineno-0-233">233</a></span> |
| <span class="normal"><a href="#__codelineno-0-234">234</a></span> |
| <span class="normal"><a href="#__codelineno-0-235">235</a></span> |
| <span class="normal"><a href="#__codelineno-0-236">236</a></span> |
| <span class="normal"><a href="#__codelineno-0-237">237</a></span> |
| <span class="normal"><a href="#__codelineno-0-238">238</a></span> |
| <span class="normal"><a href="#__codelineno-0-239">239</a></span> |
| <span class="normal"><a href="#__codelineno-0-240">240</a></span> |
| <span class="normal"><a href="#__codelineno-0-241">241</a></span> |
| <span class="normal"><a href="#__codelineno-0-242">242</a></span> |
| <span class="normal"><a href="#__codelineno-0-243">243</a></span> |
| <span class="normal"><a href="#__codelineno-0-244">244</a></span> |
| <span class="normal"><a href="#__codelineno-0-245">245</a></span> |
| <span class="normal"><a href="#__codelineno-0-246">246</a></span> |
| <span class="normal"><a href="#__codelineno-0-247">247</a></span> |
| <span class="normal"><a href="#__codelineno-0-248">248</a></span> |
| <span class="normal"><a href="#__codelineno-0-249">249</a></span> |
| <span class="normal"><a href="#__codelineno-0-250">250</a></span> |
| <span class="normal"><a href="#__codelineno-0-251">251</a></span> |
| <span class="normal"><a href="#__codelineno-0-252">252</a></span> |
| <span class="normal"><a href="#__codelineno-0-253">253</a></span> |
| <span class="normal"><a href="#__codelineno-0-254">254</a></span> |
| <span class="normal"><a href="#__codelineno-0-255">255</a></span> |
| <span class="normal"><a href="#__codelineno-0-256">256</a></span> |
| <span class="normal"><a href="#__codelineno-0-257">257</a></span> |
| <span class="normal"><a href="#__codelineno-0-258">258</a></span> |
| <span class="normal"><a href="#__codelineno-0-259">259</a></span> |
| <span class="normal"><a href="#__codelineno-0-260">260</a></span> |
| <span class="normal"><a href="#__codelineno-0-261">261</a></span> |
| <span class="normal"><a href="#__codelineno-0-262">262</a></span> |
| <span class="normal"><a href="#__codelineno-0-263">263</a></span> |
| <span class="normal"><a href="#__codelineno-0-264">264</a></span> |
| <span class="normal"><a href="#__codelineno-0-265">265</a></span> |
| <span class="normal"><a href="#__codelineno-0-266">266</a></span> |
| <span class="normal"><a href="#__codelineno-0-267">267</a></span> |
| <span class="normal"><a href="#__codelineno-0-268">268</a></span> |
| <span class="normal"><a href="#__codelineno-0-269">269</a></span> |
| <span class="normal"><a href="#__codelineno-0-270">270</a></span> |
| <span class="normal"><a href="#__codelineno-0-271">271</a></span> |
| <span class="normal"><a href="#__codelineno-0-272">272</a></span> |
| <span class="normal"><a href="#__codelineno-0-273">273</a></span> |
| <span class="normal"><a href="#__codelineno-0-274">274</a></span> |
| <span class="normal"><a href="#__codelineno-0-275">275</a></span> |
| <span class="normal"><a href="#__codelineno-0-276">276</a></span> |
| <span class="normal"><a href="#__codelineno-0-277">277</a></span> |
| <span class="normal"><a href="#__codelineno-0-278">278</a></span> |
| <span class="normal"><a href="#__codelineno-0-279">279</a></span> |
| <span class="normal"><a href="#__codelineno-0-280">280</a></span> |
| <span class="normal"><a href="#__codelineno-0-281">281</a></span> |
| <span class="normal"><a href="#__codelineno-0-282">282</a></span> |
| <span class="normal"><a href="#__codelineno-0-283">283</a></span> |
| <span class="normal"><a href="#__codelineno-0-284">284</a></span> |
| <span class="normal"><a href="#__codelineno-0-285">285</a></span> |
| <span class="normal"><a href="#__codelineno-0-286">286</a></span> |
| <span class="normal"><a href="#__codelineno-0-287">287</a></span> |
| <span class="normal"><a href="#__codelineno-0-288">288</a></span> |
| <span class="normal"><a href="#__codelineno-0-289">289</a></span> |
| <span class="normal"><a href="#__codelineno-0-290">290</a></span> |
| <span class="normal"><a href="#__codelineno-0-291">291</a></span> |
| <span class="normal"><a href="#__codelineno-0-292">292</a></span> |
| <span class="normal"><a href="#__codelineno-0-293">293</a></span> |
| <span class="normal"><a href="#__codelineno-0-294">294</a></span> |
| <span class="normal"><a href="#__codelineno-0-295">295</a></span> |
| <span class="normal"><a href="#__codelineno-0-296">296</a></span> |
| <span class="normal"><a href="#__codelineno-0-297">297</a></span> |
| <span class="normal"><a href="#__codelineno-0-298">298</a></span> |
| <span class="normal"><a href="#__codelineno-0-299">299</a></span> |
| <span class="normal"><a href="#__codelineno-0-300">300</a></span> |
| <span class="normal"><a href="#__codelineno-0-301">301</a></span> |
| <span class="normal"><a href="#__codelineno-0-302">302</a></span> |
| <span class="normal"><a href="#__codelineno-0-303">303</a></span> |
| <span class="normal"><a href="#__codelineno-0-304">304</a></span> |
| <span class="normal"><a href="#__codelineno-0-305">305</a></span> |
| <span class="normal"><a href="#__codelineno-0-306">306</a></span> |
| <span class="normal"><a href="#__codelineno-0-307">307</a></span> |
| <span class="normal"><a href="#__codelineno-0-308">308</a></span> |
| <span class="normal"><a href="#__codelineno-0-309">309</a></span> |
| <span class="normal"><a href="#__codelineno-0-310">310</a></span> |
| <span class="normal"><a href="#__codelineno-0-311">311</a></span> |
| <span class="normal"><a href="#__codelineno-0-312">312</a></span> |
| <span class="normal"><a href="#__codelineno-0-313">313</a></span> |
| <span class="normal"><a href="#__codelineno-0-314">314</a></span> |
| <span class="normal"><a href="#__codelineno-0-315">315</a></span> |
| <span class="normal"><a href="#__codelineno-0-316">316</a></span> |
| <span class="normal"><a href="#__codelineno-0-317">317</a></span> |
| <span class="normal"><a href="#__codelineno-0-318">318</a></span> |
| <span class="normal"><a href="#__codelineno-0-319">319</a></span> |
| <span class="normal"><a href="#__codelineno-0-320">320</a></span> |
| <span class="normal"><a href="#__codelineno-0-321">321</a></span> |
| <span class="normal"><a href="#__codelineno-0-322">322</a></span> |
| <span class="normal"><a href="#__codelineno-0-323">323</a></span> |
| <span class="normal"><a href="#__codelineno-0-324">324</a></span> |
| <span class="normal"><a href="#__codelineno-0-325">325</a></span> |
| <span class="normal"><a href="#__codelineno-0-326">326</a></span> |
| <span class="normal"><a href="#__codelineno-0-327">327</a></span> |
| <span class="normal"><a href="#__codelineno-0-328">328</a></span> |
| <span class="normal"><a href="#__codelineno-0-329">329</a></span> |
| <span class="normal"><a href="#__codelineno-0-330">330</a></span> |
| <span class="normal"><a href="#__codelineno-0-331">331</a></span> |
| <span class="normal"><a href="#__codelineno-0-332">332</a></span> |
| <span class="normal"><a href="#__codelineno-0-333">333</a></span> |
| <span class="normal"><a href="#__codelineno-0-334">334</a></span> |
| <span class="normal"><a href="#__codelineno-0-335">335</a></span> |
| <span class="normal"><a href="#__codelineno-0-336">336</a></span> |
| <span class="normal"><a href="#__codelineno-0-337">337</a></span> |
| <span class="normal"><a href="#__codelineno-0-338">338</a></span> |
| <span class="normal"><a href="#__codelineno-0-339">339</a></span> |
| <span class="normal"><a href="#__codelineno-0-340">340</a></span> |
| <span class="normal"><a href="#__codelineno-0-341">341</a></span> |
| <span class="normal"><a href="#__codelineno-0-342">342</a></span> |
| <span class="normal"><a href="#__codelineno-0-343">343</a></span> |
| <span class="normal"><a href="#__codelineno-0-344">344</a></span> |
| <span class="normal"><a href="#__codelineno-0-345">345</a></span> |
| <span class="normal"><a href="#__codelineno-0-346">346</a></span> |
| <span class="normal"><a href="#__codelineno-0-347">347</a></span> |
| <span class="normal"><a href="#__codelineno-0-348">348</a></span> |
| <span class="normal"><a href="#__codelineno-0-349">349</a></span> |
| <span class="normal"><a href="#__codelineno-0-350">350</a></span> |
| <span class="normal"><a href="#__codelineno-0-351">351</a></span> |
| <span class="normal"><a href="#__codelineno-0-352">352</a></span> |
| <span class="normal"><a href="#__codelineno-0-353">353</a></span> |
| <span class="normal"><a href="#__codelineno-0-354">354</a></span> |
| <span class="normal"><a href="#__codelineno-0-355">355</a></span> |
| <span class="normal"><a href="#__codelineno-0-356">356</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-224" name="__codelineno-0-224"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFile</span><span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">):</span> |
| <a id="__codelineno-0-225" name="__codelineno-0-225"></a><span class="w"> </span><span class="sd">"""A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</span> |
| <a id="__codelineno-0-226" name="__codelineno-0-226"></a> |
| <a id="__codelineno-0-227" name="__codelineno-0-227"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-228" name="__codelineno-0-228"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-229" name="__codelineno-0-229"></a> |
| <a id="__codelineno-0-230" name="__codelineno-0-230"></a><span class="sd"> Attributes:</span> |
| <a id="__codelineno-0-231" name="__codelineno-0-231"></a><span class="sd"> location(str): The URI or path to a local file for a PyArrowFile instance.</span> |
| <a id="__codelineno-0-232" name="__codelineno-0-232"></a> |
| <a id="__codelineno-0-233" name="__codelineno-0-233"></a><span class="sd"> Examples:</span> |
| <a id="__codelineno-0-234" name="__codelineno-0-234"></a><span class="sd"> >>> from pyiceberg.io.pyarrow import PyArrowFile</span> |
| <a id="__codelineno-0-235" name="__codelineno-0-235"></a><span class="sd"> >>> # input_file = PyArrowFile("s3://foo/bar.txt")</span> |
| <a id="__codelineno-0-236" name="__codelineno-0-236"></a><span class="sd"> >>> # Read the contents of the PyArrowFile instance</span> |
| <a id="__codelineno-0-237" name="__codelineno-0-237"></a><span class="sd"> >>> # Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-238" name="__codelineno-0-238"></a><span class="sd"> >>> # file_content = input_file.open().read()</span> |
| <a id="__codelineno-0-239" name="__codelineno-0-239"></a> |
| <a id="__codelineno-0-240" name="__codelineno-0-240"></a><span class="sd"> >>> # output_file = PyArrowFile("s3://baz/qux.txt")</span> |
| <a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="sd"> >>> # Write bytes to a file</span> |
| <a id="__codelineno-0-242" name="__codelineno-0-242"></a><span class="sd"> >>> # Make sure that you have permissions to read/write</span> |
| <a id="__codelineno-0-243" name="__codelineno-0-243"></a><span class="sd"> >>> # output_file.create().write(b'foobytes')</span> |
| <a id="__codelineno-0-244" name="__codelineno-0-244"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-245" name="__codelineno-0-245"></a> |
| <a id="__codelineno-0-246" name="__codelineno-0-246"></a> <span class="n">_filesystem</span><span class="p">:</span> <span class="n">FileSystem</span> |
| <a id="__codelineno-0-247" name="__codelineno-0-247"></a> <span class="n">_path</span><span class="p">:</span> <span class="nb">str</span> |
| <a id="__codelineno-0-248" name="__codelineno-0-248"></a> <span class="n">_buffer_size</span><span class="p">:</span> <span class="nb">int</span> |
| <a id="__codelineno-0-249" name="__codelineno-0-249"></a> |
| <a id="__codelineno-0-250" name="__codelineno-0-250"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">fs</span><span class="p">:</span> <span class="n">FileSystem</span><span class="p">,</span> <span class="n">buffer_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">ONE_MEGABYTE</span><span class="p">):</span> |
| <a id="__codelineno-0-251" name="__codelineno-0-251"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span> <span class="o">=</span> <span class="n">fs</span> |
| <a id="__codelineno-0-252" name="__codelineno-0-252"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_path</span> <span class="o">=</span> <span class="n">path</span> |
| <a id="__codelineno-0-253" name="__codelineno-0-253"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span> <span class="o">=</span> <span class="n">buffer_size</span> |
| <a id="__codelineno-0-254" name="__codelineno-0-254"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-255" name="__codelineno-0-255"></a> |
| <a id="__codelineno-0-256" name="__codelineno-0-256"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_file_info</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileInfo</span><span class="p">:</span> |
| <a id="__codelineno-0-257" name="__codelineno-0-257"></a><span class="w"> </span><span class="sd">"""Retrieve a pyarrow.fs.FileInfo object for the location.</span> |
| <a id="__codelineno-0-258" name="__codelineno-0-258"></a> |
| <a id="__codelineno-0-259" name="__codelineno-0-259"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-260" name="__codelineno-0-260"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-261" name="__codelineno-0-261"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-262" name="__codelineno-0-262"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-264" name="__codelineno-0-264"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">get_file_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span> |
| <a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-266" name="__codelineno-0-266"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-267" name="__codelineno-0-267"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot get file info, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-268" name="__codelineno-0-268"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-269" name="__codelineno-0-269"></a> |
| <a id="__codelineno-0-270" name="__codelineno-0-270"></a> <span class="k">if</span> <span class="n">file_info</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="n">FileType</span><span class="o">.</span><span class="n">NotFound</span><span class="p">:</span> |
| <a id="__codelineno-0-271" name="__codelineno-0-271"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot get file info, file not found: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-272" name="__codelineno-0-272"></a> <span class="k">return</span> <span class="n">file_info</span> |
| <a id="__codelineno-0-273" name="__codelineno-0-273"></a> |
| <a id="__codelineno-0-274" name="__codelineno-0-274"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <a id="__codelineno-0-275" name="__codelineno-0-275"></a><span class="w"> </span><span class="sd">"""Return the total length of the file, in bytes."""</span> |
| <a id="__codelineno-0-276" name="__codelineno-0-276"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> |
| <a id="__codelineno-0-277" name="__codelineno-0-277"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span> |
| <a id="__codelineno-0-278" name="__codelineno-0-278"></a> |
| <a id="__codelineno-0-279" name="__codelineno-0-279"></a> <span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <a id="__codelineno-0-280" name="__codelineno-0-280"></a><span class="w"> </span><span class="sd">"""Check whether the location exists."""</span> |
| <a id="__codelineno-0-281" name="__codelineno-0-281"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span> |
| <a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="k">return</span> <span class="kc">True</span> |
| <a id="__codelineno-0-284" name="__codelineno-0-284"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="k">return</span> <span class="kc">False</span> |
| <a id="__codelineno-0-286" name="__codelineno-0-286"></a> |
| <a id="__codelineno-0-287" name="__codelineno-0-287"></a> <span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">InputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-288" name="__codelineno-0-288"></a><span class="w"> </span><span class="sd">"""Open the location using a PyArrow FileSystem inferred from the location.</span> |
| <a id="__codelineno-0-289" name="__codelineno-0-289"></a> |
| <a id="__codelineno-0-290" name="__codelineno-0-290"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-291" name="__codelineno-0-291"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span> |
| <a id="__codelineno-0-292" name="__codelineno-0-292"></a> |
| <a id="__codelineno-0-293" name="__codelineno-0-293"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span> |
| <a id="__codelineno-0-295" name="__codelineno-0-295"></a> |
| <a id="__codelineno-0-296" name="__codelineno-0-296"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span> |
| <a id="__codelineno-0-298" name="__codelineno-0-298"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-301" name="__codelineno-0-301"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-302" name="__codelineno-0-302"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span> |
| <a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span> |
| <a id="__codelineno-0-304" name="__codelineno-0-304"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-305" name="__codelineno-0-305"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-306" name="__codelineno-0-306"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-307" name="__codelineno-0-307"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-310" name="__codelineno-0-310"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-311" name="__codelineno-0-311"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-312" name="__codelineno-0-312"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">return</span> <span class="n">input_file</span> |
| <a id="__codelineno-0-317" name="__codelineno-0-317"></a> |
| <a id="__codelineno-0-318" name="__codelineno-0-318"></a> <span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">OutputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="w"> </span><span class="sd">"""Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</span> |
| <a id="__codelineno-0-320" name="__codelineno-0-320"></a> |
| <a id="__codelineno-0-321" name="__codelineno-0-321"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span> |
| <a id="__codelineno-0-323" name="__codelineno-0-323"></a> |
| <a id="__codelineno-0-324" name="__codelineno-0-324"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span> |
| <a id="__codelineno-0-326" name="__codelineno-0-326"></a> |
| <a id="__codelineno-0-327" name="__codelineno-0-327"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span> |
| <a id="__codelineno-0-329" name="__codelineno-0-329"></a> |
| <a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> Note:</span> |
| <a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span> |
| <a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span> |
| <a id="__codelineno-0-333" name="__codelineno-0-333"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span> |
| <a id="__codelineno-0-334" name="__codelineno-0-334"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span> |
| <a id="__codelineno-0-335" name="__codelineno-0-335"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span> |
| <a id="__codelineno-0-336" name="__codelineno-0-336"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> |
| <a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-347" name="__codelineno-0-347"></a> <span class="k">return</span> <span class="n">output_file</span> |
| <a id="__codelineno-0-348" name="__codelineno-0-348"></a> |
| <a id="__codelineno-0-349" name="__codelineno-0-349"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-350" name="__codelineno-0-350"></a><span class="w"> </span><span class="sd">"""Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span> |
| <a id="__codelineno-0-351" name="__codelineno-0-351"></a> |
| <a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span> |
| <a id="__codelineno-0-353" name="__codelineno-0-353"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span> |
| <a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> a copy of the same instance.</span> |
| <a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-356" name="__codelineno-0-356"></a> <span class="k">return</span> <span class="bp">self</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.__len__" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="fm">__len__</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Return the total length of the file, in bytes.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-274">274</a></span> |
| <span class="normal"><a href="#__codelineno-0-275">275</a></span> |
| <span class="normal"><a href="#__codelineno-0-276">276</a></span> |
| <span class="normal"><a href="#__codelineno-0-277">277</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-274" name="__codelineno-0-274"></a><span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <a id="__codelineno-0-275" name="__codelineno-0-275"></a><span class="w"> </span><span class="sd">"""Return the total length of the file, in bytes."""</span> |
| <a id="__codelineno-0-276" name="__codelineno-0-276"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> |
| <a id="__codelineno-0-277" name="__codelineno-0-277"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile._file_info" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_file_info</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile._file_info" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Retrieve a pyarrow.fs.FileInfo object for the location.</p> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="PermissionError">PermissionError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file at self.location cannot be accessed due to a permission error such as |
| an AWS error code 15.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-256">256</a></span> |
| <span class="normal"><a href="#__codelineno-0-257">257</a></span> |
| <span class="normal"><a href="#__codelineno-0-258">258</a></span> |
| <span class="normal"><a href="#__codelineno-0-259">259</a></span> |
| <span class="normal"><a href="#__codelineno-0-260">260</a></span> |
| <span class="normal"><a href="#__codelineno-0-261">261</a></span> |
| <span class="normal"><a href="#__codelineno-0-262">262</a></span> |
| <span class="normal"><a href="#__codelineno-0-263">263</a></span> |
| <span class="normal"><a href="#__codelineno-0-264">264</a></span> |
| <span class="normal"><a href="#__codelineno-0-265">265</a></span> |
| <span class="normal"><a href="#__codelineno-0-266">266</a></span> |
| <span class="normal"><a href="#__codelineno-0-267">267</a></span> |
| <span class="normal"><a href="#__codelineno-0-268">268</a></span> |
| <span class="normal"><a href="#__codelineno-0-269">269</a></span> |
| <span class="normal"><a href="#__codelineno-0-270">270</a></span> |
| <span class="normal"><a href="#__codelineno-0-271">271</a></span> |
| <span class="normal"><a href="#__codelineno-0-272">272</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-256" name="__codelineno-0-256"></a><span class="k">def</span><span class="w"> </span><span class="nf">_file_info</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileInfo</span><span class="p">:</span> |
| <a id="__codelineno-0-257" name="__codelineno-0-257"></a><span class="w"> </span><span class="sd">"""Retrieve a pyarrow.fs.FileInfo object for the location.</span> |
| <a id="__codelineno-0-258" name="__codelineno-0-258"></a> |
| <a id="__codelineno-0-259" name="__codelineno-0-259"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-260" name="__codelineno-0-260"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-261" name="__codelineno-0-261"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-262" name="__codelineno-0-262"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-264" name="__codelineno-0-264"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">get_file_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span> |
| <a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-266" name="__codelineno-0-266"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-267" name="__codelineno-0-267"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot get file info, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-268" name="__codelineno-0-268"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-269" name="__codelineno-0-269"></a> |
| <a id="__codelineno-0-270" name="__codelineno-0-270"></a> <span class="k">if</span> <span class="n">file_info</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="n">FileType</span><span class="o">.</span><span class="n">NotFound</span><span class="p">:</span> |
| <a id="__codelineno-0-271" name="__codelineno-0-271"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot get file info, file not found: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-272" name="__codelineno-0-272"></a> <span class="k">return</span> <span class="n">file_info</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.create" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">create</span><span class="p">(</span><span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>overwrite</code> |
| </td> |
| <td> |
| <code><span title="bool">bool</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>Whether to overwrite the file if it already exists.</p> |
| </div> |
| </td> |
| <td> |
| <code>False</code> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="pyiceberg.io.OutputStream" href="../#pyiceberg.io.OutputStream">OutputStream</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="FileExistsError">FileExistsError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file already exists at <code>self.location</code> and <code>overwrite</code> is False.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <details class="note" open> |
| <summary>Note</summary> |
| <p>This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False, |
| a check is first performed to verify that the file does not exist. This is not thread-safe and |
| a possibility does exist that the file can be created by a concurrent process after the existence |
| check yet before the output stream is created. In such a case, the default pyarrow behavior will |
| truncate the contents of the existing file when opening the output stream.</p> |
| </details> |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-318">318</a></span> |
| <span class="normal"><a href="#__codelineno-0-319">319</a></span> |
| <span class="normal"><a href="#__codelineno-0-320">320</a></span> |
| <span class="normal"><a href="#__codelineno-0-321">321</a></span> |
| <span class="normal"><a href="#__codelineno-0-322">322</a></span> |
| <span class="normal"><a href="#__codelineno-0-323">323</a></span> |
| <span class="normal"><a href="#__codelineno-0-324">324</a></span> |
| <span class="normal"><a href="#__codelineno-0-325">325</a></span> |
| <span class="normal"><a href="#__codelineno-0-326">326</a></span> |
| <span class="normal"><a href="#__codelineno-0-327">327</a></span> |
| <span class="normal"><a href="#__codelineno-0-328">328</a></span> |
| <span class="normal"><a href="#__codelineno-0-329">329</a></span> |
| <span class="normal"><a href="#__codelineno-0-330">330</a></span> |
| <span class="normal"><a href="#__codelineno-0-331">331</a></span> |
| <span class="normal"><a href="#__codelineno-0-332">332</a></span> |
| <span class="normal"><a href="#__codelineno-0-333">333</a></span> |
| <span class="normal"><a href="#__codelineno-0-334">334</a></span> |
| <span class="normal"><a href="#__codelineno-0-335">335</a></span> |
| <span class="normal"><a href="#__codelineno-0-336">336</a></span> |
| <span class="normal"><a href="#__codelineno-0-337">337</a></span> |
| <span class="normal"><a href="#__codelineno-0-338">338</a></span> |
| <span class="normal"><a href="#__codelineno-0-339">339</a></span> |
| <span class="normal"><a href="#__codelineno-0-340">340</a></span> |
| <span class="normal"><a href="#__codelineno-0-341">341</a></span> |
| <span class="normal"><a href="#__codelineno-0-342">342</a></span> |
| <span class="normal"><a href="#__codelineno-0-343">343</a></span> |
| <span class="normal"><a href="#__codelineno-0-344">344</a></span> |
| <span class="normal"><a href="#__codelineno-0-345">345</a></span> |
| <span class="normal"><a href="#__codelineno-0-346">346</a></span> |
| <span class="normal"><a href="#__codelineno-0-347">347</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-318" name="__codelineno-0-318"></a><span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">OutputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="w"> </span><span class="sd">"""Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</span> |
| <a id="__codelineno-0-320" name="__codelineno-0-320"></a> |
| <a id="__codelineno-0-321" name="__codelineno-0-321"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span> |
| <a id="__codelineno-0-323" name="__codelineno-0-323"></a> |
| <a id="__codelineno-0-324" name="__codelineno-0-324"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span> |
| <a id="__codelineno-0-326" name="__codelineno-0-326"></a> |
| <a id="__codelineno-0-327" name="__codelineno-0-327"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span> |
| <a id="__codelineno-0-329" name="__codelineno-0-329"></a> |
| <a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> Note:</span> |
| <a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span> |
| <a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span> |
| <a id="__codelineno-0-333" name="__codelineno-0-333"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span> |
| <a id="__codelineno-0-334" name="__codelineno-0-334"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span> |
| <a id="__codelineno-0-335" name="__codelineno-0-335"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span> |
| <a id="__codelineno-0-336" name="__codelineno-0-336"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> |
| <a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-347" name="__codelineno-0-347"></a> <span class="k">return</span> <span class="n">output_file</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.exists" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">exists</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Check whether the location exists.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-279">279</a></span> |
| <span class="normal"><a href="#__codelineno-0-280">280</a></span> |
| <span class="normal"><a href="#__codelineno-0-281">281</a></span> |
| <span class="normal"><a href="#__codelineno-0-282">282</a></span> |
| <span class="normal"><a href="#__codelineno-0-283">283</a></span> |
| <span class="normal"><a href="#__codelineno-0-284">284</a></span> |
| <span class="normal"><a href="#__codelineno-0-285">285</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-279" name="__codelineno-0-279"></a><span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <a id="__codelineno-0-280" name="__codelineno-0-280"></a><span class="w"> </span><span class="sd">"""Check whether the location exists."""</span> |
| <a id="__codelineno-0-281" name="__codelineno-0-281"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span> |
| <a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="k">return</span> <span class="kc">True</span> |
| <a id="__codelineno-0-284" name="__codelineno-0-284"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="k">return</span> <span class="kc">False</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.open" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="nb">open</span><span class="p">(</span><span class="n">seekable</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Open the location using a PyArrow FileSystem inferred from the location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>seekable</code> |
| </td> |
| <td> |
| <code><span title="bool">bool</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the stream should support seek, or if it is consumed sequential.</p> |
| </div> |
| </td> |
| <td> |
| <code>True</code> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="pyiceberg.io.InputStream" href="../#pyiceberg.io.InputStream">InputStream</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at <code>self.location</code>.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="FileNotFoundError">FileNotFoundError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file at self.location does not exist.</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="PermissionError">PermissionError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file at self.location cannot be accessed due to a permission error such as |
| an AWS error code 15.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-287">287</a></span> |
| <span class="normal"><a href="#__codelineno-0-288">288</a></span> |
| <span class="normal"><a href="#__codelineno-0-289">289</a></span> |
| <span class="normal"><a href="#__codelineno-0-290">290</a></span> |
| <span class="normal"><a href="#__codelineno-0-291">291</a></span> |
| <span class="normal"><a href="#__codelineno-0-292">292</a></span> |
| <span class="normal"><a href="#__codelineno-0-293">293</a></span> |
| <span class="normal"><a href="#__codelineno-0-294">294</a></span> |
| <span class="normal"><a href="#__codelineno-0-295">295</a></span> |
| <span class="normal"><a href="#__codelineno-0-296">296</a></span> |
| <span class="normal"><a href="#__codelineno-0-297">297</a></span> |
| <span class="normal"><a href="#__codelineno-0-298">298</a></span> |
| <span class="normal"><a href="#__codelineno-0-299">299</a></span> |
| <span class="normal"><a href="#__codelineno-0-300">300</a></span> |
| <span class="normal"><a href="#__codelineno-0-301">301</a></span> |
| <span class="normal"><a href="#__codelineno-0-302">302</a></span> |
| <span class="normal"><a href="#__codelineno-0-303">303</a></span> |
| <span class="normal"><a href="#__codelineno-0-304">304</a></span> |
| <span class="normal"><a href="#__codelineno-0-305">305</a></span> |
| <span class="normal"><a href="#__codelineno-0-306">306</a></span> |
| <span class="normal"><a href="#__codelineno-0-307">307</a></span> |
| <span class="normal"><a href="#__codelineno-0-308">308</a></span> |
| <span class="normal"><a href="#__codelineno-0-309">309</a></span> |
| <span class="normal"><a href="#__codelineno-0-310">310</a></span> |
| <span class="normal"><a href="#__codelineno-0-311">311</a></span> |
| <span class="normal"><a href="#__codelineno-0-312">312</a></span> |
| <span class="normal"><a href="#__codelineno-0-313">313</a></span> |
| <span class="normal"><a href="#__codelineno-0-314">314</a></span> |
| <span class="normal"><a href="#__codelineno-0-315">315</a></span> |
| <span class="normal"><a href="#__codelineno-0-316">316</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-287" name="__codelineno-0-287"></a><span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-></span> <span class="n">InputStream</span><span class="p">:</span> |
| <a id="__codelineno-0-288" name="__codelineno-0-288"></a><span class="w"> </span><span class="sd">"""Open the location using a PyArrow FileSystem inferred from the location.</span> |
| <a id="__codelineno-0-289" name="__codelineno-0-289"></a> |
| <a id="__codelineno-0-290" name="__codelineno-0-290"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-291" name="__codelineno-0-291"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span> |
| <a id="__codelineno-0-292" name="__codelineno-0-292"></a> |
| <a id="__codelineno-0-293" name="__codelineno-0-293"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span> |
| <a id="__codelineno-0-295" name="__codelineno-0-295"></a> |
| <a id="__codelineno-0-296" name="__codelineno-0-296"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span> |
| <a id="__codelineno-0-298" name="__codelineno-0-298"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-301" name="__codelineno-0-301"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-302" name="__codelineno-0-302"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span> |
| <a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span> |
| <a id="__codelineno-0-304" name="__codelineno-0-304"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-305" name="__codelineno-0-305"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span> |
| <a id="__codelineno-0-306" name="__codelineno-0-306"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-307" name="__codelineno-0-307"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-310" name="__codelineno-0-310"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-311" name="__codelineno-0-311"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-312" name="__codelineno-0-312"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">return</span> <span class="n">input_file</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">to_input_file</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Return a new PyArrowFile for the location of an existing PyArrowFile instance.</p> |
| <p>This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single |
| PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns |
| a copy of the same instance.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-349">349</a></span> |
| <span class="normal"><a href="#__codelineno-0-350">350</a></span> |
| <span class="normal"><a href="#__codelineno-0-351">351</a></span> |
| <span class="normal"><a href="#__codelineno-0-352">352</a></span> |
| <span class="normal"><a href="#__codelineno-0-353">353</a></span> |
| <span class="normal"><a href="#__codelineno-0-354">354</a></span> |
| <span class="normal"><a href="#__codelineno-0-355">355</a></span> |
| <span class="normal"><a href="#__codelineno-0-356">356</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-349" name="__codelineno-0-349"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-350" name="__codelineno-0-350"></a><span class="w"> </span><span class="sd">"""Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span> |
| <a id="__codelineno-0-351" name="__codelineno-0-351"></a> |
| <a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span> |
| <a id="__codelineno-0-353" name="__codelineno-0-353"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span> |
| <a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> a copy of the same instance.</span> |
| <a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-356" name="__codelineno-0-356"></a> <span class="k">return</span> <span class="bp">self</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.PyArrowFileIO" class="doc doc-heading"> |
| <code>PyArrowFileIO</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.FileIO" href="../#pyiceberg.io.FileIO">FileIO</a></code></p> |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-359">359</a></span> |
| <span class="normal"><a href="#__codelineno-0-360">360</a></span> |
| <span class="normal"><a href="#__codelineno-0-361">361</a></span> |
| <span class="normal"><a href="#__codelineno-0-362">362</a></span> |
| <span class="normal"><a href="#__codelineno-0-363">363</a></span> |
| <span class="normal"><a href="#__codelineno-0-364">364</a></span> |
| <span class="normal"><a href="#__codelineno-0-365">365</a></span> |
| <span class="normal"><a href="#__codelineno-0-366">366</a></span> |
| <span class="normal"><a href="#__codelineno-0-367">367</a></span> |
| <span class="normal"><a href="#__codelineno-0-368">368</a></span> |
| <span class="normal"><a href="#__codelineno-0-369">369</a></span> |
| <span class="normal"><a href="#__codelineno-0-370">370</a></span> |
| <span class="normal"><a href="#__codelineno-0-371">371</a></span> |
| <span class="normal"><a href="#__codelineno-0-372">372</a></span> |
| <span class="normal"><a href="#__codelineno-0-373">373</a></span> |
| <span class="normal"><a href="#__codelineno-0-374">374</a></span> |
| <span class="normal"><a href="#__codelineno-0-375">375</a></span> |
| <span class="normal"><a href="#__codelineno-0-376">376</a></span> |
| <span class="normal"><a href="#__codelineno-0-377">377</a></span> |
| <span class="normal"><a href="#__codelineno-0-378">378</a></span> |
| <span class="normal"><a href="#__codelineno-0-379">379</a></span> |
| <span class="normal"><a href="#__codelineno-0-380">380</a></span> |
| <span class="normal"><a href="#__codelineno-0-381">381</a></span> |
| <span class="normal"><a href="#__codelineno-0-382">382</a></span> |
| <span class="normal"><a href="#__codelineno-0-383">383</a></span> |
| <span class="normal"><a href="#__codelineno-0-384">384</a></span> |
| <span class="normal"><a href="#__codelineno-0-385">385</a></span> |
| <span class="normal"><a href="#__codelineno-0-386">386</a></span> |
| <span class="normal"><a href="#__codelineno-0-387">387</a></span> |
| <span class="normal"><a href="#__codelineno-0-388">388</a></span> |
| <span class="normal"><a href="#__codelineno-0-389">389</a></span> |
| <span class="normal"><a href="#__codelineno-0-390">390</a></span> |
| <span class="normal"><a href="#__codelineno-0-391">391</a></span> |
| <span class="normal"><a href="#__codelineno-0-392">392</a></span> |
| <span class="normal"><a href="#__codelineno-0-393">393</a></span> |
| <span class="normal"><a href="#__codelineno-0-394">394</a></span> |
| <span class="normal"><a href="#__codelineno-0-395">395</a></span> |
| <span class="normal"><a href="#__codelineno-0-396">396</a></span> |
| <span class="normal"><a href="#__codelineno-0-397">397</a></span> |
| <span class="normal"><a href="#__codelineno-0-398">398</a></span> |
| <span class="normal"><a href="#__codelineno-0-399">399</a></span> |
| <span class="normal"><a href="#__codelineno-0-400">400</a></span> |
| <span class="normal"><a href="#__codelineno-0-401">401</a></span> |
| <span class="normal"><a href="#__codelineno-0-402">402</a></span> |
| <span class="normal"><a href="#__codelineno-0-403">403</a></span> |
| <span class="normal"><a href="#__codelineno-0-404">404</a></span> |
| <span class="normal"><a href="#__codelineno-0-405">405</a></span> |
| <span class="normal"><a href="#__codelineno-0-406">406</a></span> |
| <span class="normal"><a href="#__codelineno-0-407">407</a></span> |
| <span class="normal"><a href="#__codelineno-0-408">408</a></span> |
| <span class="normal"><a href="#__codelineno-0-409">409</a></span> |
| <span class="normal"><a href="#__codelineno-0-410">410</a></span> |
| <span class="normal"><a href="#__codelineno-0-411">411</a></span> |
| <span class="normal"><a href="#__codelineno-0-412">412</a></span> |
| <span class="normal"><a href="#__codelineno-0-413">413</a></span> |
| <span class="normal"><a href="#__codelineno-0-414">414</a></span> |
| <span class="normal"><a href="#__codelineno-0-415">415</a></span> |
| <span class="normal"><a href="#__codelineno-0-416">416</a></span> |
| <span class="normal"><a href="#__codelineno-0-417">417</a></span> |
| <span class="normal"><a href="#__codelineno-0-418">418</a></span> |
| <span class="normal"><a href="#__codelineno-0-419">419</a></span> |
| <span class="normal"><a href="#__codelineno-0-420">420</a></span> |
| <span class="normal"><a href="#__codelineno-0-421">421</a></span> |
| <span class="normal"><a href="#__codelineno-0-422">422</a></span> |
| <span class="normal"><a href="#__codelineno-0-423">423</a></span> |
| <span class="normal"><a href="#__codelineno-0-424">424</a></span> |
| <span class="normal"><a href="#__codelineno-0-425">425</a></span> |
| <span class="normal"><a href="#__codelineno-0-426">426</a></span> |
| <span class="normal"><a href="#__codelineno-0-427">427</a></span> |
| <span class="normal"><a href="#__codelineno-0-428">428</a></span> |
| <span class="normal"><a href="#__codelineno-0-429">429</a></span> |
| <span class="normal"><a href="#__codelineno-0-430">430</a></span> |
| <span class="normal"><a href="#__codelineno-0-431">431</a></span> |
| <span class="normal"><a href="#__codelineno-0-432">432</a></span> |
| <span class="normal"><a href="#__codelineno-0-433">433</a></span> |
| <span class="normal"><a href="#__codelineno-0-434">434</a></span> |
| <span class="normal"><a href="#__codelineno-0-435">435</a></span> |
| <span class="normal"><a href="#__codelineno-0-436">436</a></span> |
| <span class="normal"><a href="#__codelineno-0-437">437</a></span> |
| <span class="normal"><a href="#__codelineno-0-438">438</a></span> |
| <span class="normal"><a href="#__codelineno-0-439">439</a></span> |
| <span class="normal"><a href="#__codelineno-0-440">440</a></span> |
| <span class="normal"><a href="#__codelineno-0-441">441</a></span> |
| <span class="normal"><a href="#__codelineno-0-442">442</a></span> |
| <span class="normal"><a href="#__codelineno-0-443">443</a></span> |
| <span class="normal"><a href="#__codelineno-0-444">444</a></span> |
| <span class="normal"><a href="#__codelineno-0-445">445</a></span> |
| <span class="normal"><a href="#__codelineno-0-446">446</a></span> |
| <span class="normal"><a href="#__codelineno-0-447">447</a></span> |
| <span class="normal"><a href="#__codelineno-0-448">448</a></span> |
| <span class="normal"><a href="#__codelineno-0-449">449</a></span> |
| <span class="normal"><a href="#__codelineno-0-450">450</a></span> |
| <span class="normal"><a href="#__codelineno-0-451">451</a></span> |
| <span class="normal"><a href="#__codelineno-0-452">452</a></span> |
| <span class="normal"><a href="#__codelineno-0-453">453</a></span> |
| <span class="normal"><a href="#__codelineno-0-454">454</a></span> |
| <span class="normal"><a href="#__codelineno-0-455">455</a></span> |
| <span class="normal"><a href="#__codelineno-0-456">456</a></span> |
| <span class="normal"><a href="#__codelineno-0-457">457</a></span> |
| <span class="normal"><a href="#__codelineno-0-458">458</a></span> |
| <span class="normal"><a href="#__codelineno-0-459">459</a></span> |
| <span class="normal"><a href="#__codelineno-0-460">460</a></span> |
| <span class="normal"><a href="#__codelineno-0-461">461</a></span> |
| <span class="normal"><a href="#__codelineno-0-462">462</a></span> |
| <span class="normal"><a href="#__codelineno-0-463">463</a></span> |
| <span class="normal"><a href="#__codelineno-0-464">464</a></span> |
| <span class="normal"><a href="#__codelineno-0-465">465</a></span> |
| <span class="normal"><a href="#__codelineno-0-466">466</a></span> |
| <span class="normal"><a href="#__codelineno-0-467">467</a></span> |
| <span class="normal"><a href="#__codelineno-0-468">468</a></span> |
| <span class="normal"><a href="#__codelineno-0-469">469</a></span> |
| <span class="normal"><a href="#__codelineno-0-470">470</a></span> |
| <span class="normal"><a href="#__codelineno-0-471">471</a></span> |
| <span class="normal"><a href="#__codelineno-0-472">472</a></span> |
| <span class="normal"><a href="#__codelineno-0-473">473</a></span> |
| <span class="normal"><a href="#__codelineno-0-474">474</a></span> |
| <span class="normal"><a href="#__codelineno-0-475">475</a></span> |
| <span class="normal"><a href="#__codelineno-0-476">476</a></span> |
| <span class="normal"><a href="#__codelineno-0-477">477</a></span> |
| <span class="normal"><a href="#__codelineno-0-478">478</a></span> |
| <span class="normal"><a href="#__codelineno-0-479">479</a></span> |
| <span class="normal"><a href="#__codelineno-0-480">480</a></span> |
| <span class="normal"><a href="#__codelineno-0-481">481</a></span> |
| <span class="normal"><a href="#__codelineno-0-482">482</a></span> |
| <span class="normal"><a href="#__codelineno-0-483">483</a></span> |
| <span class="normal"><a href="#__codelineno-0-484">484</a></span> |
| <span class="normal"><a href="#__codelineno-0-485">485</a></span> |
| <span class="normal"><a href="#__codelineno-0-486">486</a></span> |
| <span class="normal"><a href="#__codelineno-0-487">487</a></span> |
| <span class="normal"><a href="#__codelineno-0-488">488</a></span> |
| <span class="normal"><a href="#__codelineno-0-489">489</a></span> |
| <span class="normal"><a href="#__codelineno-0-490">490</a></span> |
| <span class="normal"><a href="#__codelineno-0-491">491</a></span> |
| <span class="normal"><a href="#__codelineno-0-492">492</a></span> |
| <span class="normal"><a href="#__codelineno-0-493">493</a></span> |
| <span class="normal"><a href="#__codelineno-0-494">494</a></span> |
| <span class="normal"><a href="#__codelineno-0-495">495</a></span> |
| <span class="normal"><a href="#__codelineno-0-496">496</a></span> |
| <span class="normal"><a href="#__codelineno-0-497">497</a></span> |
| <span class="normal"><a href="#__codelineno-0-498">498</a></span> |
| <span class="normal"><a href="#__codelineno-0-499">499</a></span> |
| <span class="normal"><a href="#__codelineno-0-500">500</a></span> |
| <span class="normal"><a href="#__codelineno-0-501">501</a></span> |
| <span class="normal"><a href="#__codelineno-0-502">502</a></span> |
| <span class="normal"><a href="#__codelineno-0-503">503</a></span> |
| <span class="normal"><a href="#__codelineno-0-504">504</a></span> |
| <span class="normal"><a href="#__codelineno-0-505">505</a></span> |
| <span class="normal"><a href="#__codelineno-0-506">506</a></span> |
| <span class="normal"><a href="#__codelineno-0-507">507</a></span> |
| <span class="normal"><a href="#__codelineno-0-508">508</a></span> |
| <span class="normal"><a href="#__codelineno-0-509">509</a></span> |
| <span class="normal"><a href="#__codelineno-0-510">510</a></span> |
| <span class="normal"><a href="#__codelineno-0-511">511</a></span> |
| <span class="normal"><a href="#__codelineno-0-512">512</a></span> |
| <span class="normal"><a href="#__codelineno-0-513">513</a></span> |
| <span class="normal"><a href="#__codelineno-0-514">514</a></span> |
| <span class="normal"><a href="#__codelineno-0-515">515</a></span> |
| <span class="normal"><a href="#__codelineno-0-516">516</a></span> |
| <span class="normal"><a href="#__codelineno-0-517">517</a></span> |
| <span class="normal"><a href="#__codelineno-0-518">518</a></span> |
| <span class="normal"><a href="#__codelineno-0-519">519</a></span> |
| <span class="normal"><a href="#__codelineno-0-520">520</a></span> |
| <span class="normal"><a href="#__codelineno-0-521">521</a></span> |
| <span class="normal"><a href="#__codelineno-0-522">522</a></span> |
| <span class="normal"><a href="#__codelineno-0-523">523</a></span> |
| <span class="normal"><a href="#__codelineno-0-524">524</a></span> |
| <span class="normal"><a href="#__codelineno-0-525">525</a></span> |
| <span class="normal"><a href="#__codelineno-0-526">526</a></span> |
| <span class="normal"><a href="#__codelineno-0-527">527</a></span> |
| <span class="normal"><a href="#__codelineno-0-528">528</a></span> |
| <span class="normal"><a href="#__codelineno-0-529">529</a></span> |
| <span class="normal"><a href="#__codelineno-0-530">530</a></span> |
| <span class="normal"><a href="#__codelineno-0-531">531</a></span> |
| <span class="normal"><a href="#__codelineno-0-532">532</a></span> |
| <span class="normal"><a href="#__codelineno-0-533">533</a></span> |
| <span class="normal"><a href="#__codelineno-0-534">534</a></span> |
| <span class="normal"><a href="#__codelineno-0-535">535</a></span> |
| <span class="normal"><a href="#__codelineno-0-536">536</a></span> |
| <span class="normal"><a href="#__codelineno-0-537">537</a></span> |
| <span class="normal"><a href="#__codelineno-0-538">538</a></span> |
| <span class="normal"><a href="#__codelineno-0-539">539</a></span> |
| <span class="normal"><a href="#__codelineno-0-540">540</a></span> |
| <span class="normal"><a href="#__codelineno-0-541">541</a></span> |
| <span class="normal"><a href="#__codelineno-0-542">542</a></span> |
| <span class="normal"><a href="#__codelineno-0-543">543</a></span> |
| <span class="normal"><a href="#__codelineno-0-544">544</a></span> |
| <span class="normal"><a href="#__codelineno-0-545">545</a></span> |
| <span class="normal"><a href="#__codelineno-0-546">546</a></span> |
| <span class="normal"><a href="#__codelineno-0-547">547</a></span> |
| <span class="normal"><a href="#__codelineno-0-548">548</a></span> |
| <span class="normal"><a href="#__codelineno-0-549">549</a></span> |
| <span class="normal"><a href="#__codelineno-0-550">550</a></span> |
| <span class="normal"><a href="#__codelineno-0-551">551</a></span> |
| <span class="normal"><a href="#__codelineno-0-552">552</a></span> |
| <span class="normal"><a href="#__codelineno-0-553">553</a></span> |
| <span class="normal"><a href="#__codelineno-0-554">554</a></span> |
| <span class="normal"><a href="#__codelineno-0-555">555</a></span> |
| <span class="normal"><a href="#__codelineno-0-556">556</a></span> |
| <span class="normal"><a href="#__codelineno-0-557">557</a></span> |
| <span class="normal"><a href="#__codelineno-0-558">558</a></span> |
| <span class="normal"><a href="#__codelineno-0-559">559</a></span> |
| <span class="normal"><a href="#__codelineno-0-560">560</a></span> |
| <span class="normal"><a href="#__codelineno-0-561">561</a></span> |
| <span class="normal"><a href="#__codelineno-0-562">562</a></span> |
| <span class="normal"><a href="#__codelineno-0-563">563</a></span> |
| <span class="normal"><a href="#__codelineno-0-564">564</a></span> |
| <span class="normal"><a href="#__codelineno-0-565">565</a></span> |
| <span class="normal"><a href="#__codelineno-0-566">566</a></span> |
| <span class="normal"><a href="#__codelineno-0-567">567</a></span> |
| <span class="normal"><a href="#__codelineno-0-568">568</a></span> |
| <span class="normal"><a href="#__codelineno-0-569">569</a></span> |
| <span class="normal"><a href="#__codelineno-0-570">570</a></span> |
| <span class="normal"><a href="#__codelineno-0-571">571</a></span> |
| <span class="normal"><a href="#__codelineno-0-572">572</a></span> |
| <span class="normal"><a href="#__codelineno-0-573">573</a></span> |
| <span class="normal"><a href="#__codelineno-0-574">574</a></span> |
| <span class="normal"><a href="#__codelineno-0-575">575</a></span> |
| <span class="normal"><a href="#__codelineno-0-576">576</a></span> |
| <span class="normal"><a href="#__codelineno-0-577">577</a></span> |
| <span class="normal"><a href="#__codelineno-0-578">578</a></span> |
| <span class="normal"><a href="#__codelineno-0-579">579</a></span> |
| <span class="normal"><a href="#__codelineno-0-580">580</a></span> |
| <span class="normal"><a href="#__codelineno-0-581">581</a></span> |
| <span class="normal"><a href="#__codelineno-0-582">582</a></span> |
| <span class="normal"><a href="#__codelineno-0-583">583</a></span> |
| <span class="normal"><a href="#__codelineno-0-584">584</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-359" name="__codelineno-0-359"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFileIO</span><span class="p">(</span><span class="n">FileIO</span><span class="p">):</span> |
| <a id="__codelineno-0-360" name="__codelineno-0-360"></a> <span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span> |
| <a id="__codelineno-0-361" name="__codelineno-0-361"></a> |
| <a id="__codelineno-0-362" name="__codelineno-0-362"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">):</span> |
| <a id="__codelineno-0-363" name="__codelineno-0-363"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span> |
| <a id="__codelineno-0-364" name="__codelineno-0-364"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">properties</span><span class="o">=</span><span class="n">properties</span><span class="p">)</span> |
| <a id="__codelineno-0-365" name="__codelineno-0-365"></a> |
| <a id="__codelineno-0-366" name="__codelineno-0-366"></a> <span class="nd">@staticmethod</span> |
| <a id="__codelineno-0-367" name="__codelineno-0-367"></a> <span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> |
| <a id="__codelineno-0-368" name="__codelineno-0-368"></a><span class="w"> </span><span class="sd">"""Return the path without the scheme."""</span> |
| <a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span> |
| <a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">return</span> <span class="s2">"file"</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"hdfs"</span><span class="p">,</span> <span class="s2">"viewfs"</span><span class="p">):</span> |
| <a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span> |
| <a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">"</span> |
| <a id="__codelineno-0-376" name="__codelineno-0-376"></a> |
| <a id="__codelineno-0-377" name="__codelineno-0-377"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-378" name="__codelineno-0-378"></a><span class="w"> </span><span class="sd">"""Initialize FileSystem for different scheme."""</span> |
| <a id="__codelineno-0-379" name="__codelineno-0-379"></a> <span class="k">if</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"oss"</span><span class="p">}:</span> |
| <a id="__codelineno-0-380" name="__codelineno-0-380"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_oss_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-381" name="__codelineno-0-381"></a> |
| <a id="__codelineno-0-382" name="__codelineno-0-382"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"s3"</span><span class="p">,</span> <span class="s2">"s3a"</span><span class="p">,</span> <span class="s2">"s3n"</span><span class="p">}:</span> |
| <a id="__codelineno-0-383" name="__codelineno-0-383"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_s3_fs</span><span class="p">(</span><span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-384" name="__codelineno-0-384"></a> |
| <a id="__codelineno-0-385" name="__codelineno-0-385"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"hdfs"</span><span class="p">,</span> <span class="s2">"viewfs"</span><span class="p">}:</span> |
| <a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_hdfs_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-387" name="__codelineno-0-387"></a> |
| <a id="__codelineno-0-388" name="__codelineno-0-388"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"gs"</span><span class="p">,</span> <span class="s2">"gcs"</span><span class="p">}:</span> |
| <a id="__codelineno-0-389" name="__codelineno-0-389"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_gcs_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-390" name="__codelineno-0-390"></a> |
| <a id="__codelineno-0-391" name="__codelineno-0-391"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"file"</span><span class="p">}:</span> |
| <a id="__codelineno-0-392" name="__codelineno-0-392"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_local_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-393" name="__codelineno-0-393"></a> |
| <a id="__codelineno-0-394" name="__codelineno-0-394"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-395" name="__codelineno-0-395"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unrecognized filesystem type in URI: </span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-396" name="__codelineno-0-396"></a> |
| <a id="__codelineno-0-397" name="__codelineno-0-397"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_oss_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-398" name="__codelineno-0-398"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span> |
| <a id="__codelineno-0-399" name="__codelineno-0-399"></a> |
| <a id="__codelineno-0-400" name="__codelineno-0-400"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> |
| <a id="__codelineno-0-401" name="__codelineno-0-401"></a> <span class="s2">"endpoint_override"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span> |
| <a id="__codelineno-0-402" name="__codelineno-0-402"></a> <span class="s2">"access_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span> |
| <a id="__codelineno-0-403" name="__codelineno-0-403"></a> <span class="s2">"secret_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span> |
| <a id="__codelineno-0-404" name="__codelineno-0-404"></a> <span class="s2">"session_token"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span> |
| <a id="__codelineno-0-405" name="__codelineno-0-405"></a> <span class="s2">"region"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">),</span> |
| <a id="__codelineno-0-406" name="__codelineno-0-406"></a> <span class="p">}</span> |
| <a id="__codelineno-0-407" name="__codelineno-0-407"></a> |
| <a id="__codelineno-0-408" name="__codelineno-0-408"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span> |
| <a id="__codelineno-0-409" name="__codelineno-0-409"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"proxy_options"</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span> |
| <a id="__codelineno-0-410" name="__codelineno-0-410"></a> |
| <a id="__codelineno-0-411" name="__codelineno-0-411"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-412" name="__codelineno-0-412"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"connect_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-413" name="__codelineno-0-413"></a> |
| <a id="__codelineno-0-414" name="__codelineno-0-414"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-415" name="__codelineno-0-415"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"request_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-416" name="__codelineno-0-416"></a> |
| <a id="__codelineno-0-417" name="__codelineno-0-417"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span> |
| <a id="__codelineno-0-418" name="__codelineno-0-418"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"role_arn"</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span> |
| <a id="__codelineno-0-419" name="__codelineno-0-419"></a> |
| <a id="__codelineno-0-420" name="__codelineno-0-420"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span> |
| <a id="__codelineno-0-421" name="__codelineno-0-421"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"session_name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span> |
| <a id="__codelineno-0-422" name="__codelineno-0-422"></a> |
| <a id="__codelineno-0-423" name="__codelineno-0-423"></a> <span class="k">if</span> <span class="n">force_virtual_addressing</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">):</span> |
| <a id="__codelineno-0-424" name="__codelineno-0-424"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"force_virtual_addressing"</span><span class="p">]</span> <span class="o">=</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">force_virtual_addressing</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> |
| <a id="__codelineno-0-425" name="__codelineno-0-425"></a> |
| <a id="__codelineno-0-426" name="__codelineno-0-426"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-427" name="__codelineno-0-427"></a> |
| <a id="__codelineno-0-428" name="__codelineno-0-428"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_s3_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-429" name="__codelineno-0-429"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span> |
| <a id="__codelineno-0-430" name="__codelineno-0-430"></a> |
| <a id="__codelineno-0-431" name="__codelineno-0-431"></a> <span class="n">provided_region</span> <span class="o">=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">)</span> |
| <a id="__codelineno-0-432" name="__codelineno-0-432"></a> |
| <a id="__codelineno-0-433" name="__codelineno-0-433"></a> <span class="c1"># Do this when we don't provide the region at all, or when we explicitly enable it</span> |
| <a id="__codelineno-0-434" name="__codelineno-0-434"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_RESOLVE_REGION</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span> |
| <a id="__codelineno-0-435" name="__codelineno-0-435"></a> <span class="c1"># Resolve region from netloc(bucket), fallback to user-provided region</span> |
| <a id="__codelineno-0-436" name="__codelineno-0-436"></a> <span class="c1"># Only supported by buckets hosted by S3</span> |
| <a id="__codelineno-0-437" name="__codelineno-0-437"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">_cached_resolve_s3_region</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">netloc</span><span class="p">)</span> <span class="ow">or</span> <span class="n">provided_region</span> |
| <a id="__codelineno-0-438" name="__codelineno-0-438"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">bucket_region</span> <span class="o">!=</span> <span class="n">provided_region</span><span class="p">:</span> |
| <a id="__codelineno-0-439" name="__codelineno-0-439"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span> |
| <a id="__codelineno-0-440" name="__codelineno-0-440"></a> <span class="sa">f</span><span class="s2">"PyArrow FileIO overriding S3 bucket region for bucket </span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">: "</span> |
| <a id="__codelineno-0-441" name="__codelineno-0-441"></a> <span class="sa">f</span><span class="s2">"provided region </span><span class="si">{</span><span class="n">provided_region</span><span class="si">}</span><span class="s2">, actual region </span><span class="si">{</span><span class="n">bucket_region</span><span class="si">}</span><span class="s2">"</span> |
| <a id="__codelineno-0-442" name="__codelineno-0-442"></a> <span class="p">)</span> |
| <a id="__codelineno-0-443" name="__codelineno-0-443"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-444" name="__codelineno-0-444"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">provided_region</span> |
| <a id="__codelineno-0-445" name="__codelineno-0-445"></a> |
| <a id="__codelineno-0-446" name="__codelineno-0-446"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span> |
| <a id="__codelineno-0-447" name="__codelineno-0-447"></a> <span class="s2">"endpoint_override"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span> |
| <a id="__codelineno-0-448" name="__codelineno-0-448"></a> <span class="s2">"access_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span> |
| <a id="__codelineno-0-449" name="__codelineno-0-449"></a> <span class="s2">"secret_key"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span> |
| <a id="__codelineno-0-450" name="__codelineno-0-450"></a> <span class="s2">"session_token"</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span> |
| <a id="__codelineno-0-451" name="__codelineno-0-451"></a> <span class="s2">"region"</span><span class="p">:</span> <span class="n">bucket_region</span><span class="p">,</span> |
| <a id="__codelineno-0-452" name="__codelineno-0-452"></a> <span class="p">}</span> |
| <a id="__codelineno-0-453" name="__codelineno-0-453"></a> |
| <a id="__codelineno-0-454" name="__codelineno-0-454"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span> |
| <a id="__codelineno-0-455" name="__codelineno-0-455"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"proxy_options"</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span> |
| <a id="__codelineno-0-456" name="__codelineno-0-456"></a> |
| <a id="__codelineno-0-457" name="__codelineno-0-457"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-458" name="__codelineno-0-458"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"connect_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-459" name="__codelineno-0-459"></a> |
| <a id="__codelineno-0-460" name="__codelineno-0-460"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span> |
| <a id="__codelineno-0-461" name="__codelineno-0-461"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"request_timeout"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span> |
| <a id="__codelineno-0-462" name="__codelineno-0-462"></a> |
| <a id="__codelineno-0-463" name="__codelineno-0-463"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span> |
| <a id="__codelineno-0-464" name="__codelineno-0-464"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"role_arn"</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span> |
| <a id="__codelineno-0-465" name="__codelineno-0-465"></a> |
| <a id="__codelineno-0-466" name="__codelineno-0-466"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span> |
| <a id="__codelineno-0-467" name="__codelineno-0-467"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"session_name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span> |
| <a id="__codelineno-0-468" name="__codelineno-0-468"></a> |
| <a id="__codelineno-0-469" name="__codelineno-0-469"></a> <span class="k">if</span> <span class="n">force_virtual_addressing</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">):</span> |
| <a id="__codelineno-0-470" name="__codelineno-0-470"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">"force_virtual_addressing"</span><span class="p">]</span> <span class="o">=</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">force_virtual_addressing</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> |
| <a id="__codelineno-0-471" name="__codelineno-0-471"></a> |
| <a id="__codelineno-0-472" name="__codelineno-0-472"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-473" name="__codelineno-0-473"></a> |
| <a id="__codelineno-0-474" name="__codelineno-0-474"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_hdfs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-475" name="__codelineno-0-475"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">HadoopFileSystem</span> |
| <a id="__codelineno-0-476" name="__codelineno-0-476"></a> |
| <a id="__codelineno-0-477" name="__codelineno-0-477"></a> <span class="n">hdfs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-478" name="__codelineno-0-478"></a> <span class="k">if</span> <span class="n">netloc</span><span class="p">:</span> |
| <a id="__codelineno-0-479" name="__codelineno-0-479"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="o">.</span><span class="n">from_uri</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">://</span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-480" name="__codelineno-0-480"></a> <span class="k">if</span> <span class="n">host</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_HOST</span><span class="p">):</span> |
| <a id="__codelineno-0-481" name="__codelineno-0-481"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"host"</span><span class="p">]</span> <span class="o">=</span> <span class="n">host</span> |
| <a id="__codelineno-0-482" name="__codelineno-0-482"></a> <span class="k">if</span> <span class="n">port</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_PORT</span><span class="p">):</span> |
| <a id="__codelineno-0-483" name="__codelineno-0-483"></a> <span class="c1"># port should be an integer type</span> |
| <a id="__codelineno-0-484" name="__codelineno-0-484"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"port"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">port</span><span class="p">)</span> |
| <a id="__codelineno-0-485" name="__codelineno-0-485"></a> <span class="k">if</span> <span class="n">user</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_USER</span><span class="p">):</span> |
| <a id="__codelineno-0-486" name="__codelineno-0-486"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"user"</span><span class="p">]</span> <span class="o">=</span> <span class="n">user</span> |
| <a id="__codelineno-0-487" name="__codelineno-0-487"></a> <span class="k">if</span> <span class="n">kerb_ticket</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_KERB_TICKET</span><span class="p">):</span> |
| <a id="__codelineno-0-488" name="__codelineno-0-488"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">"kerb_ticket"</span><span class="p">]</span> <span class="o">=</span> <span class="n">kerb_ticket</span> |
| <a id="__codelineno-0-489" name="__codelineno-0-489"></a> |
| <a id="__codelineno-0-490" name="__codelineno-0-490"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">hdfs_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-491" name="__codelineno-0-491"></a> |
| <a id="__codelineno-0-492" name="__codelineno-0-492"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_gcs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-493" name="__codelineno-0-493"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">GcsFileSystem</span> |
| <a id="__codelineno-0-494" name="__codelineno-0-494"></a> |
| <a id="__codelineno-0-495" name="__codelineno-0-495"></a> <span class="n">gcs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-496" name="__codelineno-0-496"></a> <span class="k">if</span> <span class="n">access_token</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN</span><span class="p">):</span> |
| <a id="__codelineno-0-497" name="__codelineno-0-497"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"access_token"</span><span class="p">]</span> <span class="o">=</span> <span class="n">access_token</span> |
| <a id="__codelineno-0-498" name="__codelineno-0-498"></a> <span class="k">if</span> <span class="n">expiration</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN_EXPIRES_AT_MS</span><span class="p">):</span> |
| <a id="__codelineno-0-499" name="__codelineno-0-499"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"credential_token_expiration"</span><span class="p">]</span> <span class="o">=</span> <span class="n">millis_to_datetime</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">expiration</span><span class="p">))</span> |
| <a id="__codelineno-0-500" name="__codelineno-0-500"></a> <span class="k">if</span> <span class="n">bucket_location</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_DEFAULT_LOCATION</span><span class="p">):</span> |
| <a id="__codelineno-0-501" name="__codelineno-0-501"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"default_bucket_location"</span><span class="p">]</span> <span class="o">=</span> <span class="n">bucket_location</span> |
| <a id="__codelineno-0-502" name="__codelineno-0-502"></a> <span class="k">if</span> <span class="n">endpoint</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_SERVICE_HOST</span><span class="p">):</span> |
| <a id="__codelineno-0-503" name="__codelineno-0-503"></a> <span class="n">url_parts</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">endpoint</span><span class="p">)</span> |
| <a id="__codelineno-0-504" name="__codelineno-0-504"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">scheme</span> |
| <a id="__codelineno-0-505" name="__codelineno-0-505"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">"endpoint_override"</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">netloc</span> |
| <a id="__codelineno-0-506" name="__codelineno-0-506"></a> |
| <a id="__codelineno-0-507" name="__codelineno-0-507"></a> <span class="k">return</span> <span class="n">GcsFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">gcs_kwargs</span><span class="p">)</span> |
| <a id="__codelineno-0-508" name="__codelineno-0-508"></a> |
| <a id="__codelineno-0-509" name="__codelineno-0-509"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_local_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-510" name="__codelineno-0-510"></a> <span class="k">return</span> <span class="n">PyArrowLocalFileSystem</span><span class="p">()</span> |
| <a id="__codelineno-0-511" name="__codelineno-0-511"></a> |
| <a id="__codelineno-0-512" name="__codelineno-0-512"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-513" name="__codelineno-0-513"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to read bytes from the file at the given location.</span> |
| <a id="__codelineno-0-514" name="__codelineno-0-514"></a> |
| <a id="__codelineno-0-515" name="__codelineno-0-515"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-516" name="__codelineno-0-516"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-517" name="__codelineno-0-517"></a> |
| <a id="__codelineno-0-518" name="__codelineno-0-518"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-519" name="__codelineno-0-519"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-520" name="__codelineno-0-520"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-521" name="__codelineno-0-521"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-522" name="__codelineno-0-522"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-523" name="__codelineno-0-523"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-524" name="__codelineno-0-524"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-525" name="__codelineno-0-525"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-526" name="__codelineno-0-526"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-527" name="__codelineno-0-527"></a> <span class="p">)</span> |
| <a id="__codelineno-0-528" name="__codelineno-0-528"></a> |
| <a id="__codelineno-0-529" name="__codelineno-0-529"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-530" name="__codelineno-0-530"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to write bytes to the file at the given location.</span> |
| <a id="__codelineno-0-531" name="__codelineno-0-531"></a> |
| <a id="__codelineno-0-532" name="__codelineno-0-532"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-533" name="__codelineno-0-533"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-534" name="__codelineno-0-534"></a> |
| <a id="__codelineno-0-535" name="__codelineno-0-535"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-536" name="__codelineno-0-536"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-537" name="__codelineno-0-537"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-538" name="__codelineno-0-538"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-539" name="__codelineno-0-539"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-540" name="__codelineno-0-540"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-541" name="__codelineno-0-541"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-542" name="__codelineno-0-542"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-543" name="__codelineno-0-543"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-544" name="__codelineno-0-544"></a> <span class="p">)</span> |
| <a id="__codelineno-0-545" name="__codelineno-0-545"></a> |
| <a id="__codelineno-0-546" name="__codelineno-0-546"></a> <span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-547" name="__codelineno-0-547"></a><span class="w"> </span><span class="sd">"""Delete the file at the given location.</span> |
| <a id="__codelineno-0-548" name="__codelineno-0-548"></a> |
| <a id="__codelineno-0-549" name="__codelineno-0-549"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-550" name="__codelineno-0-550"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span> |
| <a id="__codelineno-0-551" name="__codelineno-0-551"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span> |
| <a id="__codelineno-0-552" name="__codelineno-0-552"></a> |
| <a id="__codelineno-0-553" name="__codelineno-0-553"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-554" name="__codelineno-0-554"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span> |
| <a id="__codelineno-0-555" name="__codelineno-0-555"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-556" name="__codelineno-0-556"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-557" name="__codelineno-0-557"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-558" name="__codelineno-0-558"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span> |
| <a id="__codelineno-0-559" name="__codelineno-0-559"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">)</span> |
| <a id="__codelineno-0-560" name="__codelineno-0-560"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-561" name="__codelineno-0-561"></a> |
| <a id="__codelineno-0-562" name="__codelineno-0-562"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-563" name="__codelineno-0-563"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> |
| <a id="__codelineno-0-564" name="__codelineno-0-564"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-565" name="__codelineno-0-565"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-566" name="__codelineno-0-566"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-567" name="__codelineno-0-567"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-568" name="__codelineno-0-568"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-569" name="__codelineno-0-569"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-570" name="__codelineno-0-570"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-571" name="__codelineno-0-571"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-572" name="__codelineno-0-572"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-573" name="__codelineno-0-573"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| <a id="__codelineno-0-574" name="__codelineno-0-574"></a> |
| <a id="__codelineno-0-575" name="__codelineno-0-575"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> |
| <a id="__codelineno-0-576" name="__codelineno-0-576"></a><span class="w"> </span><span class="sd">"""Create a dictionary of the PyArrowFileIO fields used when pickling."""</span> |
| <a id="__codelineno-0-577" name="__codelineno-0-577"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span> |
| <a id="__codelineno-0-578" name="__codelineno-0-578"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">"fs_by_scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> |
| <a id="__codelineno-0-579" name="__codelineno-0-579"></a> <span class="k">return</span> <span class="n">fileio_copy</span> |
| <a id="__codelineno-0-580" name="__codelineno-0-580"></a> |
| <a id="__codelineno-0-581" name="__codelineno-0-581"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-582" name="__codelineno-0-582"></a><span class="w"> </span><span class="sd">"""Deserialize the state into a PyArrowFileIO instance."""</span> |
| <a id="__codelineno-0-583" name="__codelineno-0-583"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span> |
| <a id="__codelineno-0-584" name="__codelineno-0-584"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">__getstate__</span><span class="p">()</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Create a dictionary of the PyArrowFileIO fields used when pickling.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-575">575</a></span> |
| <span class="normal"><a href="#__codelineno-0-576">576</a></span> |
| <span class="normal"><a href="#__codelineno-0-577">577</a></span> |
| <span class="normal"><a href="#__codelineno-0-578">578</a></span> |
| <span class="normal"><a href="#__codelineno-0-579">579</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-575" name="__codelineno-0-575"></a><span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span> |
| <a id="__codelineno-0-576" name="__codelineno-0-576"></a><span class="w"> </span><span class="sd">"""Create a dictionary of the PyArrowFileIO fields used when pickling."""</span> |
| <a id="__codelineno-0-577" name="__codelineno-0-577"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span> |
| <a id="__codelineno-0-578" name="__codelineno-0-578"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">"fs_by_scheme"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> |
| <a id="__codelineno-0-579" name="__codelineno-0-579"></a> <span class="k">return</span> <span class="n">fileio_copy</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">__setstate__</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Deserialize the state into a PyArrowFileIO instance.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-581">581</a></span> |
| <span class="normal"><a href="#__codelineno-0-582">582</a></span> |
| <span class="normal"><a href="#__codelineno-0-583">583</a></span> |
| <span class="normal"><a href="#__codelineno-0-584">584</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-581" name="__codelineno-0-581"></a><span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-582" name="__codelineno-0-582"></a><span class="w"> </span><span class="sd">"""Deserialize the state into a PyArrowFileIO instance."""</span> |
| <a id="__codelineno-0-583" name="__codelineno-0-583"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span> |
| <a id="__codelineno-0-584" name="__codelineno-0-584"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_initialize_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Initialize FileSystem for different scheme.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-377">377</a></span> |
| <span class="normal"><a href="#__codelineno-0-378">378</a></span> |
| <span class="normal"><a href="#__codelineno-0-379">379</a></span> |
| <span class="normal"><a href="#__codelineno-0-380">380</a></span> |
| <span class="normal"><a href="#__codelineno-0-381">381</a></span> |
| <span class="normal"><a href="#__codelineno-0-382">382</a></span> |
| <span class="normal"><a href="#__codelineno-0-383">383</a></span> |
| <span class="normal"><a href="#__codelineno-0-384">384</a></span> |
| <span class="normal"><a href="#__codelineno-0-385">385</a></span> |
| <span class="normal"><a href="#__codelineno-0-386">386</a></span> |
| <span class="normal"><a href="#__codelineno-0-387">387</a></span> |
| <span class="normal"><a href="#__codelineno-0-388">388</a></span> |
| <span class="normal"><a href="#__codelineno-0-389">389</a></span> |
| <span class="normal"><a href="#__codelineno-0-390">390</a></span> |
| <span class="normal"><a href="#__codelineno-0-391">391</a></span> |
| <span class="normal"><a href="#__codelineno-0-392">392</a></span> |
| <span class="normal"><a href="#__codelineno-0-393">393</a></span> |
| <span class="normal"><a href="#__codelineno-0-394">394</a></span> |
| <span class="normal"><a href="#__codelineno-0-395">395</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-377" name="__codelineno-0-377"></a><span class="k">def</span><span class="w"> </span><span class="nf">_initialize_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">FileSystem</span><span class="p">:</span> |
| <a id="__codelineno-0-378" name="__codelineno-0-378"></a><span class="w"> </span><span class="sd">"""Initialize FileSystem for different scheme."""</span> |
| <a id="__codelineno-0-379" name="__codelineno-0-379"></a> <span class="k">if</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"oss"</span><span class="p">}:</span> |
| <a id="__codelineno-0-380" name="__codelineno-0-380"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_oss_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-381" name="__codelineno-0-381"></a> |
| <a id="__codelineno-0-382" name="__codelineno-0-382"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"s3"</span><span class="p">,</span> <span class="s2">"s3a"</span><span class="p">,</span> <span class="s2">"s3n"</span><span class="p">}:</span> |
| <a id="__codelineno-0-383" name="__codelineno-0-383"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_s3_fs</span><span class="p">(</span><span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-384" name="__codelineno-0-384"></a> |
| <a id="__codelineno-0-385" name="__codelineno-0-385"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"hdfs"</span><span class="p">,</span> <span class="s2">"viewfs"</span><span class="p">}:</span> |
| <a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_hdfs_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-387" name="__codelineno-0-387"></a> |
| <a id="__codelineno-0-388" name="__codelineno-0-388"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"gs"</span><span class="p">,</span> <span class="s2">"gcs"</span><span class="p">}:</span> |
| <a id="__codelineno-0-389" name="__codelineno-0-389"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_gcs_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-390" name="__codelineno-0-390"></a> |
| <a id="__codelineno-0-391" name="__codelineno-0-391"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">"file"</span><span class="p">}:</span> |
| <a id="__codelineno-0-392" name="__codelineno-0-392"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_local_fs</span><span class="p">()</span> |
| <a id="__codelineno-0-393" name="__codelineno-0-393"></a> |
| <a id="__codelineno-0-394" name="__codelineno-0-394"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-395" name="__codelineno-0-395"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unrecognized filesystem type in URI: </span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">delete</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Delete the file at the given location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="typing.Union">Union</span>[<span title="str">str</span>, <a class="autorefs autorefs-internal" title="pyiceberg.io.InputFile" href="../#pyiceberg.io.InputFile">InputFile</a>, <a class="autorefs autorefs-internal" title="pyiceberg.io.OutputFile" href="../#pyiceberg.io.OutputFile">OutputFile</a>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The URI to the file--if an InputFile instance or an OutputFile instance is provided, |
| the location attribute for that instance is used as the location to delete.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="FileNotFoundError">FileNotFoundError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>When the file at the provided location does not exist.</p> |
| </div> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="PermissionError">PermissionError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the file at the provided location cannot be accessed due to a permission error such as |
| an AWS error code 15.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-546">546</a></span> |
| <span class="normal"><a href="#__codelineno-0-547">547</a></span> |
| <span class="normal"><a href="#__codelineno-0-548">548</a></span> |
| <span class="normal"><a href="#__codelineno-0-549">549</a></span> |
| <span class="normal"><a href="#__codelineno-0-550">550</a></span> |
| <span class="normal"><a href="#__codelineno-0-551">551</a></span> |
| <span class="normal"><a href="#__codelineno-0-552">552</a></span> |
| <span class="normal"><a href="#__codelineno-0-553">553</a></span> |
| <span class="normal"><a href="#__codelineno-0-554">554</a></span> |
| <span class="normal"><a href="#__codelineno-0-555">555</a></span> |
| <span class="normal"><a href="#__codelineno-0-556">556</a></span> |
| <span class="normal"><a href="#__codelineno-0-557">557</a></span> |
| <span class="normal"><a href="#__codelineno-0-558">558</a></span> |
| <span class="normal"><a href="#__codelineno-0-559">559</a></span> |
| <span class="normal"><a href="#__codelineno-0-560">560</a></span> |
| <span class="normal"><a href="#__codelineno-0-561">561</a></span> |
| <span class="normal"><a href="#__codelineno-0-562">562</a></span> |
| <span class="normal"><a href="#__codelineno-0-563">563</a></span> |
| <span class="normal"><a href="#__codelineno-0-564">564</a></span> |
| <span class="normal"><a href="#__codelineno-0-565">565</a></span> |
| <span class="normal"><a href="#__codelineno-0-566">566</a></span> |
| <span class="normal"><a href="#__codelineno-0-567">567</a></span> |
| <span class="normal"><a href="#__codelineno-0-568">568</a></span> |
| <span class="normal"><a href="#__codelineno-0-569">569</a></span> |
| <span class="normal"><a href="#__codelineno-0-570">570</a></span> |
| <span class="normal"><a href="#__codelineno-0-571">571</a></span> |
| <span class="normal"><a href="#__codelineno-0-572">572</a></span> |
| <span class="normal"><a href="#__codelineno-0-573">573</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-546" name="__codelineno-0-546"></a><span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-547" name="__codelineno-0-547"></a><span class="w"> </span><span class="sd">"""Delete the file at the given location.</span> |
| <a id="__codelineno-0-548" name="__codelineno-0-548"></a> |
| <a id="__codelineno-0-549" name="__codelineno-0-549"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-550" name="__codelineno-0-550"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span> |
| <a id="__codelineno-0-551" name="__codelineno-0-551"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span> |
| <a id="__codelineno-0-552" name="__codelineno-0-552"></a> |
| <a id="__codelineno-0-553" name="__codelineno-0-553"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-554" name="__codelineno-0-554"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span> |
| <a id="__codelineno-0-555" name="__codelineno-0-555"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span> |
| <a id="__codelineno-0-556" name="__codelineno-0-556"></a><span class="sd"> an AWS error code 15.</span> |
| <a id="__codelineno-0-557" name="__codelineno-0-557"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-558" name="__codelineno-0-558"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span> |
| <a id="__codelineno-0-559" name="__codelineno-0-559"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">)</span> |
| <a id="__codelineno-0-560" name="__codelineno-0-560"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span> |
| <a id="__codelineno-0-561" name="__codelineno-0-561"></a> |
| <a id="__codelineno-0-562" name="__codelineno-0-562"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-563" name="__codelineno-0-563"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> |
| <a id="__codelineno-0-564" name="__codelineno-0-564"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span> |
| <a id="__codelineno-0-565" name="__codelineno-0-565"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-566" name="__codelineno-0-566"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span> |
| <a id="__codelineno-0-567" name="__codelineno-0-567"></a> <span class="k">raise</span> |
| <a id="__codelineno-0-568" name="__codelineno-0-568"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-569" name="__codelineno-0-569"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">"Path does not exist"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-570" name="__codelineno-0-570"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-571" name="__codelineno-0-571"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">"AWS Error [code 15]"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span> |
| <a id="__codelineno-0-572" name="__codelineno-0-572"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-573" name="__codelineno-0-573"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">new_input</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Get a PyArrowFile instance to read bytes from the file at the given location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="str">str</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A URI or a path to a local file.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td><code>PyArrowFile</code></td> <td> |
| <code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowFile" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A PyArrowFile instance for the given location.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-512">512</a></span> |
| <span class="normal"><a href="#__codelineno-0-513">513</a></span> |
| <span class="normal"><a href="#__codelineno-0-514">514</a></span> |
| <span class="normal"><a href="#__codelineno-0-515">515</a></span> |
| <span class="normal"><a href="#__codelineno-0-516">516</a></span> |
| <span class="normal"><a href="#__codelineno-0-517">517</a></span> |
| <span class="normal"><a href="#__codelineno-0-518">518</a></span> |
| <span class="normal"><a href="#__codelineno-0-519">519</a></span> |
| <span class="normal"><a href="#__codelineno-0-520">520</a></span> |
| <span class="normal"><a href="#__codelineno-0-521">521</a></span> |
| <span class="normal"><a href="#__codelineno-0-522">522</a></span> |
| <span class="normal"><a href="#__codelineno-0-523">523</a></span> |
| <span class="normal"><a href="#__codelineno-0-524">524</a></span> |
| <span class="normal"><a href="#__codelineno-0-525">525</a></span> |
| <span class="normal"><a href="#__codelineno-0-526">526</a></span> |
| <span class="normal"><a href="#__codelineno-0-527">527</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-512" name="__codelineno-0-512"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-513" name="__codelineno-0-513"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to read bytes from the file at the given location.</span> |
| <a id="__codelineno-0-514" name="__codelineno-0-514"></a> |
| <a id="__codelineno-0-515" name="__codelineno-0-515"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-516" name="__codelineno-0-516"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-517" name="__codelineno-0-517"></a> |
| <a id="__codelineno-0-518" name="__codelineno-0-518"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-519" name="__codelineno-0-519"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-520" name="__codelineno-0-520"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-521" name="__codelineno-0-521"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-522" name="__codelineno-0-522"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-523" name="__codelineno-0-523"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-524" name="__codelineno-0-524"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-525" name="__codelineno-0-525"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-526" name="__codelineno-0-526"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-527" name="__codelineno-0-527"></a> <span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">new_output</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Get a PyArrowFile instance to write bytes to the file at the given location.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>location</code> |
| </td> |
| <td> |
| <code><span title="str">str</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A URI or a path to a local file.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td><code>PyArrowFile</code></td> <td> |
| <code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowFile" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A PyArrowFile instance for the given location.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-529">529</a></span> |
| <span class="normal"><a href="#__codelineno-0-530">530</a></span> |
| <span class="normal"><a href="#__codelineno-0-531">531</a></span> |
| <span class="normal"><a href="#__codelineno-0-532">532</a></span> |
| <span class="normal"><a href="#__codelineno-0-533">533</a></span> |
| <span class="normal"><a href="#__codelineno-0-534">534</a></span> |
| <span class="normal"><a href="#__codelineno-0-535">535</a></span> |
| <span class="normal"><a href="#__codelineno-0-536">536</a></span> |
| <span class="normal"><a href="#__codelineno-0-537">537</a></span> |
| <span class="normal"><a href="#__codelineno-0-538">538</a></span> |
| <span class="normal"><a href="#__codelineno-0-539">539</a></span> |
| <span class="normal"><a href="#__codelineno-0-540">540</a></span> |
| <span class="normal"><a href="#__codelineno-0-541">541</a></span> |
| <span class="normal"><a href="#__codelineno-0-542">542</a></span> |
| <span class="normal"><a href="#__codelineno-0-543">543</a></span> |
| <span class="normal"><a href="#__codelineno-0-544">544</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-529" name="__codelineno-0-529"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">PyArrowFile</span><span class="p">:</span> |
| <a id="__codelineno-0-530" name="__codelineno-0-530"></a><span class="w"> </span><span class="sd">"""Get a PyArrowFile instance to write bytes to the file at the given location.</span> |
| <a id="__codelineno-0-531" name="__codelineno-0-531"></a> |
| <a id="__codelineno-0-532" name="__codelineno-0-532"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-533" name="__codelineno-0-533"></a><span class="sd"> location (str): A URI or a path to a local file.</span> |
| <a id="__codelineno-0-534" name="__codelineno-0-534"></a> |
| <a id="__codelineno-0-535" name="__codelineno-0-535"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-536" name="__codelineno-0-536"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span> |
| <a id="__codelineno-0-537" name="__codelineno-0-537"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-538" name="__codelineno-0-538"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-539" name="__codelineno-0-539"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span> |
| <a id="__codelineno-0-540" name="__codelineno-0-540"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span> |
| <a id="__codelineno-0-541" name="__codelineno-0-541"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span> |
| <a id="__codelineno-0-542" name="__codelineno-0-542"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> |
| <a id="__codelineno-0-543" name="__codelineno-0-543"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span> |
| <a id="__codelineno-0-544" name="__codelineno-0-544"></a> <span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Return the path without the scheme.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-366">366</a></span> |
| <span class="normal"><a href="#__codelineno-0-367">367</a></span> |
| <span class="normal"><a href="#__codelineno-0-368">368</a></span> |
| <span class="normal"><a href="#__codelineno-0-369">369</a></span> |
| <span class="normal"><a href="#__codelineno-0-370">370</a></span> |
| <span class="normal"><a href="#__codelineno-0-371">371</a></span> |
| <span class="normal"><a href="#__codelineno-0-372">372</a></span> |
| <span class="normal"><a href="#__codelineno-0-373">373</a></span> |
| <span class="normal"><a href="#__codelineno-0-374">374</a></span> |
| <span class="normal"><a href="#__codelineno-0-375">375</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-366" name="__codelineno-0-366"></a><span class="nd">@staticmethod</span> |
| <a id="__codelineno-0-367" name="__codelineno-0-367"></a><span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span> |
| <a id="__codelineno-0-368" name="__codelineno-0-368"></a><span class="w"> </span><span class="sd">"""Return the path without the scheme."""</span> |
| <a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span> |
| <a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">return</span> <span class="s2">"file"</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span> |
| <a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"hdfs"</span><span class="p">,</span> <span class="s2">"viewfs"</span><span class="p">):</span> |
| <a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span> |
| <a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">"</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="doc doc-heading"> |
| <code>PyArrowSchemaVisitor</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><span title="typing.Generic">Generic</span>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code>, <code><span title="abc.ABC">ABC</span></code></p> |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1044">1044</a></span> |
| <span class="normal"><a href="#__codelineno-0-1045">1045</a></span> |
| <span class="normal"><a href="#__codelineno-0-1046">1046</a></span> |
| <span class="normal"><a href="#__codelineno-0-1047">1047</a></span> |
| <span class="normal"><a href="#__codelineno-0-1048">1048</a></span> |
| <span class="normal"><a href="#__codelineno-0-1049">1049</a></span> |
| <span class="normal"><a href="#__codelineno-0-1050">1050</a></span> |
| <span class="normal"><a href="#__codelineno-0-1051">1051</a></span> |
| <span class="normal"><a href="#__codelineno-0-1052">1052</a></span> |
| <span class="normal"><a href="#__codelineno-0-1053">1053</a></span> |
| <span class="normal"><a href="#__codelineno-0-1054">1054</a></span> |
| <span class="normal"><a href="#__codelineno-0-1055">1055</a></span> |
| <span class="normal"><a href="#__codelineno-0-1056">1056</a></span> |
| <span class="normal"><a href="#__codelineno-0-1057">1057</a></span> |
| <span class="normal"><a href="#__codelineno-0-1058">1058</a></span> |
| <span class="normal"><a href="#__codelineno-0-1059">1059</a></span> |
| <span class="normal"><a href="#__codelineno-0-1060">1060</a></span> |
| <span class="normal"><a href="#__codelineno-0-1061">1061</a></span> |
| <span class="normal"><a href="#__codelineno-0-1062">1062</a></span> |
| <span class="normal"><a href="#__codelineno-0-1063">1063</a></span> |
| <span class="normal"><a href="#__codelineno-0-1064">1064</a></span> |
| <span class="normal"><a href="#__codelineno-0-1065">1065</a></span> |
| <span class="normal"><a href="#__codelineno-0-1066">1066</a></span> |
| <span class="normal"><a href="#__codelineno-0-1067">1067</a></span> |
| <span class="normal"><a href="#__codelineno-0-1068">1068</a></span> |
| <span class="normal"><a href="#__codelineno-0-1069">1069</a></span> |
| <span class="normal"><a href="#__codelineno-0-1070">1070</a></span> |
| <span class="normal"><a href="#__codelineno-0-1071">1071</a></span> |
| <span class="normal"><a href="#__codelineno-0-1072">1072</a></span> |
| <span class="normal"><a href="#__codelineno-0-1073">1073</a></span> |
| <span class="normal"><a href="#__codelineno-0-1074">1074</a></span> |
| <span class="normal"><a href="#__codelineno-0-1075">1075</a></span> |
| <span class="normal"><a href="#__codelineno-0-1076">1076</a></span> |
| <span class="normal"><a href="#__codelineno-0-1077">1077</a></span> |
| <span class="normal"><a href="#__codelineno-0-1078">1078</a></span> |
| <span class="normal"><a href="#__codelineno-0-1079">1079</a></span> |
| <span class="normal"><a href="#__codelineno-0-1080">1080</a></span> |
| <span class="normal"><a href="#__codelineno-0-1081">1081</a></span> |
| <span class="normal"><a href="#__codelineno-0-1082">1082</a></span> |
| <span class="normal"><a href="#__codelineno-0-1083">1083</a></span> |
| <span class="normal"><a href="#__codelineno-0-1084">1084</a></span> |
| <span class="normal"><a href="#__codelineno-0-1085">1085</a></span> |
| <span class="normal"><a href="#__codelineno-0-1086">1086</a></span> |
| <span class="normal"><a href="#__codelineno-0-1087">1087</a></span> |
| <span class="normal"><a href="#__codelineno-0-1088">1088</a></span> |
| <span class="normal"><a href="#__codelineno-0-1089">1089</a></span> |
| <span class="normal"><a href="#__codelineno-0-1090">1090</a></span> |
| <span class="normal"><a href="#__codelineno-0-1091">1091</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1044" name="__codelineno-0-1044"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowSchemaVisitor</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">ABC</span><span class="p">):</span> |
| <a id="__codelineno-0-1045" name="__codelineno-0-1045"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1046" name="__codelineno-0-1046"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a field."""</span> |
| <a id="__codelineno-0-1047" name="__codelineno-0-1047"></a> |
| <a id="__codelineno-0-1048" name="__codelineno-0-1048"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1049" name="__codelineno-0-1049"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a field."""</span> |
| <a id="__codelineno-0-1050" name="__codelineno-0-1050"></a> |
| <a id="__codelineno-0-1051" name="__codelineno-0-1051"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1052" name="__codelineno-0-1052"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting an element within a ListType."""</span> |
| <a id="__codelineno-0-1053" name="__codelineno-0-1053"></a> |
| <a id="__codelineno-0-1054" name="__codelineno-0-1054"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1055" name="__codelineno-0-1055"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting an element within a ListType."""</span> |
| <a id="__codelineno-0-1056" name="__codelineno-0-1056"></a> |
| <a id="__codelineno-0-1057" name="__codelineno-0-1057"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1058" name="__codelineno-0-1058"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a key within a MapType."""</span> |
| <a id="__codelineno-0-1059" name="__codelineno-0-1059"></a> |
| <a id="__codelineno-0-1060" name="__codelineno-0-1060"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1061" name="__codelineno-0-1061"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a key within a MapType."""</span> |
| <a id="__codelineno-0-1062" name="__codelineno-0-1062"></a> |
| <a id="__codelineno-0-1063" name="__codelineno-0-1063"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1064" name="__codelineno-0-1064"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a value within a MapType."""</span> |
| <a id="__codelineno-0-1065" name="__codelineno-0-1065"></a> |
| <a id="__codelineno-0-1066" name="__codelineno-0-1066"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1067" name="__codelineno-0-1067"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a value within a MapType."""</span> |
| <a id="__codelineno-0-1068" name="__codelineno-0-1068"></a> |
| <a id="__codelineno-0-1069" name="__codelineno-0-1069"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1070" name="__codelineno-0-1070"></a> <span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1071" name="__codelineno-0-1071"></a><span class="w"> </span><span class="sd">"""Visit a schema."""</span> |
| <a id="__codelineno-0-1072" name="__codelineno-0-1072"></a> |
| <a id="__codelineno-0-1073" name="__codelineno-0-1073"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1074" name="__codelineno-0-1074"></a> <span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1075" name="__codelineno-0-1075"></a><span class="w"> </span><span class="sd">"""Visit a struct."""</span> |
| <a id="__codelineno-0-1076" name="__codelineno-0-1076"></a> |
| <a id="__codelineno-0-1077" name="__codelineno-0-1077"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1078" name="__codelineno-0-1078"></a> <span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1079" name="__codelineno-0-1079"></a><span class="w"> </span><span class="sd">"""Visit a field."""</span> |
| <a id="__codelineno-0-1080" name="__codelineno-0-1080"></a> |
| <a id="__codelineno-0-1081" name="__codelineno-0-1081"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1082" name="__codelineno-0-1082"></a> <span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1083" name="__codelineno-0-1083"></a><span class="w"> </span><span class="sd">"""Visit a list."""</span> |
| <a id="__codelineno-0-1084" name="__codelineno-0-1084"></a> |
| <a id="__codelineno-0-1085" name="__codelineno-0-1085"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1086" name="__codelineno-0-1086"></a> <span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1087" name="__codelineno-0-1087"></a><span class="w"> </span><span class="sd">"""Visit a map."""</span> |
| <a id="__codelineno-0-1088" name="__codelineno-0-1088"></a> |
| <a id="__codelineno-0-1089" name="__codelineno-0-1089"></a> <span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1090" name="__codelineno-0-1090"></a> <span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1091" name="__codelineno-0-1091"></a><span class="w"> </span><span class="sd">"""Visit a primitive type."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting a field.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1048">1048</a></span> |
| <span class="normal"><a href="#__codelineno-0-1049">1049</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1048" name="__codelineno-0-1048"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1049" name="__codelineno-0-1049"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a field."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting an element within a ListType.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1054">1054</a></span> |
| <span class="normal"><a href="#__codelineno-0-1055">1055</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1054" name="__codelineno-0-1054"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1055" name="__codelineno-0-1055"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting an element within a ListType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting a key within a MapType.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1060">1060</a></span> |
| <span class="normal"><a href="#__codelineno-0-1061">1061</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1060" name="__codelineno-0-1060"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1061" name="__codelineno-0-1061"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a key within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">after_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately after visiting a value within a MapType.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1066">1066</a></span> |
| <span class="normal"><a href="#__codelineno-0-1067">1067</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1066" name="__codelineno-0-1066"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1067" name="__codelineno-0-1067"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately after visiting a value within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting a field.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1045">1045</a></span> |
| <span class="normal"><a href="#__codelineno-0-1046">1046</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1045" name="__codelineno-0-1045"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1046" name="__codelineno-0-1046"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a field."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting an element within a ListType.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1051">1051</a></span> |
| <span class="normal"><a href="#__codelineno-0-1052">1052</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1051" name="__codelineno-0-1051"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1052" name="__codelineno-0-1052"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting an element within a ListType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting a key within a MapType.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1057">1057</a></span> |
| <span class="normal"><a href="#__codelineno-0-1058">1058</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1057" name="__codelineno-0-1057"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1058" name="__codelineno-0-1058"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a key within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">before_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Override this method to perform an action immediately before visiting a value within a MapType.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1063">1063</a></span> |
| <span class="normal"><a href="#__codelineno-0-1064">1064</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1063" name="__codelineno-0-1063"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1064" name="__codelineno-0-1064"></a><span class="w"> </span><span class="sd">"""Override this method to perform an action immediately before visiting a value within a MapType."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">field</span><span class="p">(</span><span class="n">field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a field.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1077">1077</a></span> |
| <span class="normal"><a href="#__codelineno-0-1078">1078</a></span> |
| <span class="normal"><a href="#__codelineno-0-1079">1079</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1077" name="__codelineno-0-1077"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1078" name="__codelineno-0-1078"></a><span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1079" name="__codelineno-0-1079"></a><span class="w"> </span><span class="sd">"""Visit a field."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="nb">list</span><span class="p">(</span><span class="n">list_type</span><span class="p">,</span> <span class="n">element_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a list.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1081">1081</a></span> |
| <span class="normal"><a href="#__codelineno-0-1082">1082</a></span> |
| <span class="normal"><a href="#__codelineno-0-1083">1083</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1081" name="__codelineno-0-1081"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1082" name="__codelineno-0-1082"></a><span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1083" name="__codelineno-0-1083"></a><span class="w"> </span><span class="sd">"""Visit a list."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="nb">map</span><span class="p">(</span><span class="n">map_type</span><span class="p">,</span> <span class="n">key_result</span><span class="p">,</span> <span class="n">value_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a map.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1085">1085</a></span> |
| <span class="normal"><a href="#__codelineno-0-1086">1086</a></span> |
| <span class="normal"><a href="#__codelineno-0-1087">1087</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1085" name="__codelineno-0-1085"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1086" name="__codelineno-0-1086"></a><span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1087" name="__codelineno-0-1087"></a><span class="w"> </span><span class="sd">"""Visit a map."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">primitive</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a primitive type.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1089">1089</a></span> |
| <span class="normal"><a href="#__codelineno-0-1090">1090</a></span> |
| <span class="normal"><a href="#__codelineno-0-1091">1091</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1089" name="__codelineno-0-1089"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1090" name="__codelineno-0-1090"></a><span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1091" name="__codelineno-0-1091"></a><span class="w"> </span><span class="sd">"""Visit a primitive type."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a schema.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1069">1069</a></span> |
| <span class="normal"><a href="#__codelineno-0-1070">1070</a></span> |
| <span class="normal"><a href="#__codelineno-0-1071">1071</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1069" name="__codelineno-0-1069"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1070" name="__codelineno-0-1070"></a><span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1071" name="__codelineno-0-1071"></a><span class="w"> </span><span class="sd">"""Visit a schema."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">struct</span><span class="p">(</span><span class="n">struct</span><span class="p">,</span> <span class="n">field_results</span><span class="p">)</span></code> |
| |
| <span class="doc doc-labels"> |
| <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small> |
| </span> |
| |
| <a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Visit a struct.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1073">1073</a></span> |
| <span class="normal"><a href="#__codelineno-0-1074">1074</a></span> |
| <span class="normal"><a href="#__codelineno-0-1075">1075</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1073" name="__codelineno-0-1073"></a><span class="nd">@abstractmethod</span> |
| <a id="__codelineno-0-1074" name="__codelineno-0-1074"></a><span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-1075" name="__codelineno-0-1075"></a><span class="w"> </span><span class="sd">"""Visit a struct."""</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="doc doc-heading"> |
| <code>UnsupportedPyArrowTypeException</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><span title="Exception">Exception</span></code></p> |
| |
| |
| <p>Cannot convert PyArrow type to corresponding Iceberg type.</p> |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-209">209</a></span> |
| <span class="normal"><a href="#__codelineno-0-210">210</a></span> |
| <span class="normal"><a href="#__codelineno-0-211">211</a></span> |
| <span class="normal"><a href="#__codelineno-0-212">212</a></span> |
| <span class="normal"><a href="#__codelineno-0-213">213</a></span> |
| <span class="normal"><a href="#__codelineno-0-214">214</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-209" name="__codelineno-0-209"></a><span class="k">class</span><span class="w"> </span><span class="nc">UnsupportedPyArrowTypeException</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span> |
| <a id="__codelineno-0-210" name="__codelineno-0-210"></a><span class="w"> </span><span class="sd">"""Cannot convert PyArrow type to corresponding Iceberg type."""</span> |
| <a id="__codelineno-0-211" name="__codelineno-0-211"></a> |
| <a id="__codelineno-0-212" name="__codelineno-0-212"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <a id="__codelineno-0-213" name="__codelineno-0-213"></a> <span class="bp">self</span><span class="o">.</span><span class="n">field</span> <span class="o">=</span> <span class="n">field</span> |
| <a id="__codelineno-0-214" name="__codelineno-0-214"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._ConvertToIceberg" class="doc doc-heading"> |
| <code>_ConvertToIceberg</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow._ConvertToIceberg" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor">PyArrowSchemaVisitor</a>[<span title="typing.Union">Union</span>[<a class="autorefs autorefs-internal" title="pyiceberg.types.IcebergType" href="../../types/#pyiceberg.types.IcebergType">IcebergType</a>, <a class="autorefs autorefs-internal" title="pyiceberg.schema.Schema" href="../../schema/#pyiceberg.schema.Schema">Schema</a>]]</code></p> |
| |
| |
| <p>Converts PyArrowSchema to Iceberg Schema. Applies the IDs from name_mapping if provided.</p> |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1128">1128</a></span> |
| <span class="normal"><a href="#__codelineno-0-1129">1129</a></span> |
| <span class="normal"><a href="#__codelineno-0-1130">1130</a></span> |
| <span class="normal"><a href="#__codelineno-0-1131">1131</a></span> |
| <span class="normal"><a href="#__codelineno-0-1132">1132</a></span> |
| <span class="normal"><a href="#__codelineno-0-1133">1133</a></span> |
| <span class="normal"><a href="#__codelineno-0-1134">1134</a></span> |
| <span class="normal"><a href="#__codelineno-0-1135">1135</a></span> |
| <span class="normal"><a href="#__codelineno-0-1136">1136</a></span> |
| <span class="normal"><a href="#__codelineno-0-1137">1137</a></span> |
| <span class="normal"><a href="#__codelineno-0-1138">1138</a></span> |
| <span class="normal"><a href="#__codelineno-0-1139">1139</a></span> |
| <span class="normal"><a href="#__codelineno-0-1140">1140</a></span> |
| <span class="normal"><a href="#__codelineno-0-1141">1141</a></span> |
| <span class="normal"><a href="#__codelineno-0-1142">1142</a></span> |
| <span class="normal"><a href="#__codelineno-0-1143">1143</a></span> |
| <span class="normal"><a href="#__codelineno-0-1144">1144</a></span> |
| <span class="normal"><a href="#__codelineno-0-1145">1145</a></span> |
| <span class="normal"><a href="#__codelineno-0-1146">1146</a></span> |
| <span class="normal"><a href="#__codelineno-0-1147">1147</a></span> |
| <span class="normal"><a href="#__codelineno-0-1148">1148</a></span> |
| <span class="normal"><a href="#__codelineno-0-1149">1149</a></span> |
| <span class="normal"><a href="#__codelineno-0-1150">1150</a></span> |
| <span class="normal"><a href="#__codelineno-0-1151">1151</a></span> |
| <span class="normal"><a href="#__codelineno-0-1152">1152</a></span> |
| <span class="normal"><a href="#__codelineno-0-1153">1153</a></span> |
| <span class="normal"><a href="#__codelineno-0-1154">1154</a></span> |
| <span class="normal"><a href="#__codelineno-0-1155">1155</a></span> |
| <span class="normal"><a href="#__codelineno-0-1156">1156</a></span> |
| <span class="normal"><a href="#__codelineno-0-1157">1157</a></span> |
| <span class="normal"><a href="#__codelineno-0-1158">1158</a></span> |
| <span class="normal"><a href="#__codelineno-0-1159">1159</a></span> |
| <span class="normal"><a href="#__codelineno-0-1160">1160</a></span> |
| <span class="normal"><a href="#__codelineno-0-1161">1161</a></span> |
| <span class="normal"><a href="#__codelineno-0-1162">1162</a></span> |
| <span class="normal"><a href="#__codelineno-0-1163">1163</a></span> |
| <span class="normal"><a href="#__codelineno-0-1164">1164</a></span> |
| <span class="normal"><a href="#__codelineno-0-1165">1165</a></span> |
| <span class="normal"><a href="#__codelineno-0-1166">1166</a></span> |
| <span class="normal"><a href="#__codelineno-0-1167">1167</a></span> |
| <span class="normal"><a href="#__codelineno-0-1168">1168</a></span> |
| <span class="normal"><a href="#__codelineno-0-1169">1169</a></span> |
| <span class="normal"><a href="#__codelineno-0-1170">1170</a></span> |
| <span class="normal"><a href="#__codelineno-0-1171">1171</a></span> |
| <span class="normal"><a href="#__codelineno-0-1172">1172</a></span> |
| <span class="normal"><a href="#__codelineno-0-1173">1173</a></span> |
| <span class="normal"><a href="#__codelineno-0-1174">1174</a></span> |
| <span class="normal"><a href="#__codelineno-0-1175">1175</a></span> |
| <span class="normal"><a href="#__codelineno-0-1176">1176</a></span> |
| <span class="normal"><a href="#__codelineno-0-1177">1177</a></span> |
| <span class="normal"><a href="#__codelineno-0-1178">1178</a></span> |
| <span class="normal"><a href="#__codelineno-0-1179">1179</a></span> |
| <span class="normal"><a href="#__codelineno-0-1180">1180</a></span> |
| <span class="normal"><a href="#__codelineno-0-1181">1181</a></span> |
| <span class="normal"><a href="#__codelineno-0-1182">1182</a></span> |
| <span class="normal"><a href="#__codelineno-0-1183">1183</a></span> |
| <span class="normal"><a href="#__codelineno-0-1184">1184</a></span> |
| <span class="normal"><a href="#__codelineno-0-1185">1185</a></span> |
| <span class="normal"><a href="#__codelineno-0-1186">1186</a></span> |
| <span class="normal"><a href="#__codelineno-0-1187">1187</a></span> |
| <span class="normal"><a href="#__codelineno-0-1188">1188</a></span> |
| <span class="normal"><a href="#__codelineno-0-1189">1189</a></span> |
| <span class="normal"><a href="#__codelineno-0-1190">1190</a></span> |
| <span class="normal"><a href="#__codelineno-0-1191">1191</a></span> |
| <span class="normal"><a href="#__codelineno-0-1192">1192</a></span> |
| <span class="normal"><a href="#__codelineno-0-1193">1193</a></span> |
| <span class="normal"><a href="#__codelineno-0-1194">1194</a></span> |
| <span class="normal"><a href="#__codelineno-0-1195">1195</a></span> |
| <span class="normal"><a href="#__codelineno-0-1196">1196</a></span> |
| <span class="normal"><a href="#__codelineno-0-1197">1197</a></span> |
| <span class="normal"><a href="#__codelineno-0-1198">1198</a></span> |
| <span class="normal"><a href="#__codelineno-0-1199">1199</a></span> |
| <span class="normal"><a href="#__codelineno-0-1200">1200</a></span> |
| <span class="normal"><a href="#__codelineno-0-1201">1201</a></span> |
| <span class="normal"><a href="#__codelineno-0-1202">1202</a></span> |
| <span class="normal"><a href="#__codelineno-0-1203">1203</a></span> |
| <span class="normal"><a href="#__codelineno-0-1204">1204</a></span> |
| <span class="normal"><a href="#__codelineno-0-1205">1205</a></span> |
| <span class="normal"><a href="#__codelineno-0-1206">1206</a></span> |
| <span class="normal"><a href="#__codelineno-0-1207">1207</a></span> |
| <span class="normal"><a href="#__codelineno-0-1208">1208</a></span> |
| <span class="normal"><a href="#__codelineno-0-1209">1209</a></span> |
| <span class="normal"><a href="#__codelineno-0-1210">1210</a></span> |
| <span class="normal"><a href="#__codelineno-0-1211">1211</a></span> |
| <span class="normal"><a href="#__codelineno-0-1212">1212</a></span> |
| <span class="normal"><a href="#__codelineno-0-1213">1213</a></span> |
| <span class="normal"><a href="#__codelineno-0-1214">1214</a></span> |
| <span class="normal"><a href="#__codelineno-0-1215">1215</a></span> |
| <span class="normal"><a href="#__codelineno-0-1216">1216</a></span> |
| <span class="normal"><a href="#__codelineno-0-1217">1217</a></span> |
| <span class="normal"><a href="#__codelineno-0-1218">1218</a></span> |
| <span class="normal"><a href="#__codelineno-0-1219">1219</a></span> |
| <span class="normal"><a href="#__codelineno-0-1220">1220</a></span> |
| <span class="normal"><a href="#__codelineno-0-1221">1221</a></span> |
| <span class="normal"><a href="#__codelineno-0-1222">1222</a></span> |
| <span class="normal"><a href="#__codelineno-0-1223">1223</a></span> |
| <span class="normal"><a href="#__codelineno-0-1224">1224</a></span> |
| <span class="normal"><a href="#__codelineno-0-1225">1225</a></span> |
| <span class="normal"><a href="#__codelineno-0-1226">1226</a></span> |
| <span class="normal"><a href="#__codelineno-0-1227">1227</a></span> |
| <span class="normal"><a href="#__codelineno-0-1228">1228</a></span> |
| <span class="normal"><a href="#__codelineno-0-1229">1229</a></span> |
| <span class="normal"><a href="#__codelineno-0-1230">1230</a></span> |
| <span class="normal"><a href="#__codelineno-0-1231">1231</a></span> |
| <span class="normal"><a href="#__codelineno-0-1232">1232</a></span> |
| <span class="normal"><a href="#__codelineno-0-1233">1233</a></span> |
| <span class="normal"><a href="#__codelineno-0-1234">1234</a></span> |
| <span class="normal"><a href="#__codelineno-0-1235">1235</a></span> |
| <span class="normal"><a href="#__codelineno-0-1236">1236</a></span> |
| <span class="normal"><a href="#__codelineno-0-1237">1237</a></span> |
| <span class="normal"><a href="#__codelineno-0-1238">1238</a></span> |
| <span class="normal"><a href="#__codelineno-0-1239">1239</a></span> |
| <span class="normal"><a href="#__codelineno-0-1240">1240</a></span> |
| <span class="normal"><a href="#__codelineno-0-1241">1241</a></span> |
| <span class="normal"><a href="#__codelineno-0-1242">1242</a></span> |
| <span class="normal"><a href="#__codelineno-0-1243">1243</a></span> |
| <span class="normal"><a href="#__codelineno-0-1244">1244</a></span> |
| <span class="normal"><a href="#__codelineno-0-1245">1245</a></span> |
| <span class="normal"><a href="#__codelineno-0-1246">1246</a></span> |
| <span class="normal"><a href="#__codelineno-0-1247">1247</a></span> |
| <span class="normal"><a href="#__codelineno-0-1248">1248</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1128" name="__codelineno-0-1128"></a><span class="k">class</span><span class="w"> </span><span class="nc">_ConvertToIceberg</span><span class="p">(</span><span class="n">PyArrowSchemaVisitor</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">IcebergType</span><span class="p">,</span> <span class="n">Schema</span><span class="p">]]):</span> |
| <a id="__codelineno-0-1129" name="__codelineno-0-1129"></a><span class="w"> </span><span class="sd">"""Converts PyArrowSchema to Iceberg Schema. Applies the IDs from name_mapping if provided."""</span> |
| <a id="__codelineno-0-1130" name="__codelineno-0-1130"></a> |
| <a id="__codelineno-0-1131" name="__codelineno-0-1131"></a> <span class="n">_field_names</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> |
| <a id="__codelineno-0-1132" name="__codelineno-0-1132"></a> |
| <a id="__codelineno-0-1133" name="__codelineno-0-1133"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1134" name="__codelineno-0-1134"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span> <span class="o">=</span> <span class="p">[]</span> |
| <a id="__codelineno-0-1135" name="__codelineno-0-1135"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span> <span class="o">=</span> <span class="n">downcast_ns_timestamp_to_us</span> |
| <a id="__codelineno-0-1136" name="__codelineno-0-1136"></a> |
| <a id="__codelineno-0-1137" name="__codelineno-0-1137"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_field_id</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <a id="__codelineno-0-1138" name="__codelineno-0-1138"></a> <span class="k">if</span> <span class="p">(</span><span class="n">field_id</span> <span class="o">:=</span> <span class="n">_get_field_id</span><span class="p">(</span><span class="n">field</span><span class="p">))</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1139" name="__codelineno-0-1139"></a> <span class="k">return</span> <span class="n">field_id</span> |
| <a id="__codelineno-0-1140" name="__codelineno-0-1140"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1141" name="__codelineno-0-1141"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot convert </span><span class="si">{</span><span class="n">field</span><span class="si">}</span><span class="s2"> to Iceberg Field as field_id is empty."</span><span class="p">)</span> |
| <a id="__codelineno-0-1142" name="__codelineno-0-1142"></a> |
| <a id="__codelineno-0-1143" name="__codelineno-0-1143"></a> <span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">StructType</span><span class="p">)</span> <span class="o">-></span> <span class="n">Schema</span><span class="p">:</span> |
| <a id="__codelineno-0-1144" name="__codelineno-0-1144"></a> <span class="k">return</span> <span class="n">Schema</span><span class="p">(</span><span class="o">*</span><span class="n">struct_result</span><span class="o">.</span><span class="n">fields</span><span class="p">)</span> |
| <a id="__codelineno-0-1145" name="__codelineno-0-1145"></a> |
| <a id="__codelineno-0-1146" name="__codelineno-0-1146"></a> <span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">NestedField</span><span class="p">])</span> <span class="o">-></span> <span class="n">StructType</span><span class="p">:</span> |
| <a id="__codelineno-0-1147" name="__codelineno-0-1147"></a> <span class="k">return</span> <span class="n">StructType</span><span class="p">(</span><span class="o">*</span><span class="n">field_results</span><span class="p">)</span> |
| <a id="__codelineno-0-1148" name="__codelineno-0-1148"></a> |
| <a id="__codelineno-0-1149" name="__codelineno-0-1149"></a> <span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">)</span> <span class="o">-></span> <span class="n">NestedField</span><span class="p">:</span> |
| <a id="__codelineno-0-1150" name="__codelineno-0-1150"></a> <span class="n">field_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">field</span><span class="p">)</span> |
| <a id="__codelineno-0-1151" name="__codelineno-0-1151"></a> <span class="n">field_doc</span> <span class="o">=</span> <span class="n">doc_str</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> <span class="k">if</span> <span class="p">(</span><span class="n">field</span><span class="o">.</span><span class="n">metadata</span> <span class="ow">and</span> <span class="p">(</span><span class="n">doc_str</span> <span class="o">:=</span> <span class="n">field</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">PYARROW_FIELD_DOC_KEY</span><span class="p">)))</span> <span class="k">else</span> <span class="kc">None</span> |
| <a id="__codelineno-0-1152" name="__codelineno-0-1152"></a> <span class="n">field_type</span> <span class="o">=</span> <span class="n">field_result</span> |
| <a id="__codelineno-0-1153" name="__codelineno-0-1153"></a> <span class="k">return</span> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="n">field</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">field_type</span><span class="p">,</span> <span class="n">required</span><span class="o">=</span><span class="ow">not</span> <span class="n">field</span><span class="o">.</span><span class="n">nullable</span><span class="p">,</span> <span class="n">doc</span><span class="o">=</span><span class="n">field_doc</span><span class="p">)</span> |
| <a id="__codelineno-0-1154" name="__codelineno-0-1154"></a> |
| <a id="__codelineno-0-1155" name="__codelineno-0-1155"></a> <span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">)</span> <span class="o">-></span> <span class="n">ListType</span><span class="p">:</span> |
| <a id="__codelineno-0-1156" name="__codelineno-0-1156"></a> <span class="n">element_field</span> <span class="o">=</span> <span class="n">list_type</span><span class="o">.</span><span class="n">value_field</span> |
| <a id="__codelineno-0-1157" name="__codelineno-0-1157"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">LIST_ELEMENT_NAME</span><span class="p">)</span> |
| <a id="__codelineno-0-1158" name="__codelineno-0-1158"></a> <span class="n">element_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">element_field</span><span class="p">)</span> |
| <a id="__codelineno-0-1159" name="__codelineno-0-1159"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> |
| <a id="__codelineno-0-1160" name="__codelineno-0-1160"></a> <span class="k">return</span> <span class="n">ListType</span><span class="p">(</span><span class="n">element_id</span><span class="p">,</span> <span class="n">element_result</span><span class="p">,</span> <span class="n">element_required</span><span class="o">=</span><span class="ow">not</span> <span class="n">element_field</span><span class="o">.</span><span class="n">nullable</span><span class="p">)</span> |
| <a id="__codelineno-0-1161" name="__codelineno-0-1161"></a> |
| <a id="__codelineno-0-1162" name="__codelineno-0-1162"></a> <span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">)</span> <span class="o">-></span> <span class="n">MapType</span><span class="p">:</span> |
| <a id="__codelineno-0-1163" name="__codelineno-0-1163"></a> <span class="n">key_field</span> <span class="o">=</span> <span class="n">map_type</span><span class="o">.</span><span class="n">key_field</span> |
| <a id="__codelineno-0-1164" name="__codelineno-0-1164"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_KEY_NAME</span><span class="p">)</span> |
| <a id="__codelineno-0-1165" name="__codelineno-0-1165"></a> <span class="n">key_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">key_field</span><span class="p">)</span> |
| <a id="__codelineno-0-1166" name="__codelineno-0-1166"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> |
| <a id="__codelineno-0-1167" name="__codelineno-0-1167"></a> <span class="n">value_field</span> <span class="o">=</span> <span class="n">map_type</span><span class="o">.</span><span class="n">item_field</span> |
| <a id="__codelineno-0-1168" name="__codelineno-0-1168"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_VALUE_NAME</span><span class="p">)</span> |
| <a id="__codelineno-0-1169" name="__codelineno-0-1169"></a> <span class="n">value_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">value_field</span><span class="p">)</span> |
| <a id="__codelineno-0-1170" name="__codelineno-0-1170"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> |
| <a id="__codelineno-0-1171" name="__codelineno-0-1171"></a> <span class="k">return</span> <span class="n">MapType</span><span class="p">(</span><span class="n">key_id</span><span class="p">,</span> <span class="n">key_result</span><span class="p">,</span> <span class="n">value_id</span><span class="p">,</span> <span class="n">value_result</span><span class="p">,</span> <span class="n">value_required</span><span class="o">=</span><span class="ow">not</span> <span class="n">value_field</span><span class="o">.</span><span class="n">nullable</span><span class="p">)</span> |
| <a id="__codelineno-0-1172" name="__codelineno-0-1172"></a> |
| <a id="__codelineno-0-1173" name="__codelineno-0-1173"></a> <span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-></span> <span class="n">PrimitiveType</span><span class="p">:</span> |
| <a id="__codelineno-0-1174" name="__codelineno-0-1174"></a> <span class="k">if</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_boolean</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1175" name="__codelineno-0-1175"></a> <span class="k">return</span> <span class="n">BooleanType</span><span class="p">()</span> |
| <a id="__codelineno-0-1176" name="__codelineno-0-1176"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_integer</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1177" name="__codelineno-0-1177"></a> <span class="n">width</span> <span class="o">=</span> <span class="n">primitive</span><span class="o">.</span><span class="n">bit_width</span> |
| <a id="__codelineno-0-1178" name="__codelineno-0-1178"></a> <span class="k">if</span> <span class="n">width</span> <span class="o"><=</span> <span class="mi">32</span><span class="p">:</span> |
| <a id="__codelineno-0-1179" name="__codelineno-0-1179"></a> <span class="k">return</span> <span class="n">IntegerType</span><span class="p">()</span> |
| <a id="__codelineno-0-1180" name="__codelineno-0-1180"></a> <span class="k">elif</span> <span class="n">width</span> <span class="o"><=</span> <span class="mi">64</span><span class="p">:</span> |
| <a id="__codelineno-0-1181" name="__codelineno-0-1181"></a> <span class="k">return</span> <span class="n">LongType</span><span class="p">()</span> |
| <a id="__codelineno-0-1182" name="__codelineno-0-1182"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1183" name="__codelineno-0-1183"></a> <span class="c1"># Does not exist (yet)</span> |
| <a id="__codelineno-0-1184" name="__codelineno-0-1184"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unsupported integer type: </span><span class="si">{</span><span class="n">primitive</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-1185" name="__codelineno-0-1185"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_float32</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1186" name="__codelineno-0-1186"></a> <span class="k">return</span> <span class="n">FloatType</span><span class="p">()</span> |
| <a id="__codelineno-0-1187" name="__codelineno-0-1187"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_float64</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1188" name="__codelineno-0-1188"></a> <span class="k">return</span> <span class="n">DoubleType</span><span class="p">()</span> |
| <a id="__codelineno-0-1189" name="__codelineno-0-1189"></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">primitive</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Decimal128Type</span><span class="p">):</span> |
| <a id="__codelineno-0-1190" name="__codelineno-0-1190"></a> <span class="n">primitive</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">Decimal128Type</span><span class="p">,</span> <span class="n">primitive</span><span class="p">)</span> |
| <a id="__codelineno-0-1191" name="__codelineno-0-1191"></a> <span class="k">return</span> <span class="n">DecimalType</span><span class="p">(</span><span class="n">primitive</span><span class="o">.</span><span class="n">precision</span><span class="p">,</span> <span class="n">primitive</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span> |
| <a id="__codelineno-0-1192" name="__codelineno-0-1192"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_string</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span> <span class="ow">or</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_large_string</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1193" name="__codelineno-0-1193"></a> <span class="k">return</span> <span class="n">StringType</span><span class="p">()</span> |
| <a id="__codelineno-0-1194" name="__codelineno-0-1194"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_date32</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1195" name="__codelineno-0-1195"></a> <span class="k">return</span> <span class="n">DateType</span><span class="p">()</span> |
| <a id="__codelineno-0-1196" name="__codelineno-0-1196"></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">primitive</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Time64Type</span><span class="p">)</span> <span class="ow">and</span> <span class="n">primitive</span><span class="o">.</span><span class="n">unit</span> <span class="o">==</span> <span class="s2">"us"</span><span class="p">:</span> |
| <a id="__codelineno-0-1197" name="__codelineno-0-1197"></a> <span class="k">return</span> <span class="n">TimeType</span><span class="p">()</span> |
| <a id="__codelineno-0-1198" name="__codelineno-0-1198"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_timestamp</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1199" name="__codelineno-0-1199"></a> <span class="n">primitive</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">TimestampType</span><span class="p">,</span> <span class="n">primitive</span><span class="p">)</span> |
| <a id="__codelineno-0-1200" name="__codelineno-0-1200"></a> <span class="k">if</span> <span class="n">primitive</span><span class="o">.</span><span class="n">unit</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"s"</span><span class="p">,</span> <span class="s2">"ms"</span><span class="p">,</span> <span class="s2">"us"</span><span class="p">):</span> |
| <a id="__codelineno-0-1201" name="__codelineno-0-1201"></a> <span class="c1"># Supported types, will be upcast automatically to 'us'</span> |
| <a id="__codelineno-0-1202" name="__codelineno-0-1202"></a> <span class="k">pass</span> |
| <a id="__codelineno-0-1203" name="__codelineno-0-1203"></a> <span class="k">elif</span> <span class="n">primitive</span><span class="o">.</span><span class="n">unit</span> <span class="o">==</span> <span class="s2">"ns"</span><span class="p">:</span> |
| <a id="__codelineno-0-1204" name="__codelineno-0-1204"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span><span class="p">:</span> |
| <a id="__codelineno-0-1205" name="__codelineno-0-1205"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Iceberg does not yet support 'ns' timestamp precision. Downcasting to 'us'."</span><span class="p">)</span> |
| <a id="__codelineno-0-1206" name="__codelineno-0-1206"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1207" name="__codelineno-0-1207"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span> |
| <a id="__codelineno-0-1208" name="__codelineno-0-1208"></a> <span class="s2">"Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write."</span><span class="p">,</span> |
| <a id="__codelineno-0-1209" name="__codelineno-0-1209"></a> <span class="p">)</span> |
| <a id="__codelineno-0-1210" name="__codelineno-0-1210"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1211" name="__codelineno-0-1211"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unsupported precision for timestamp type: </span><span class="si">{</span><span class="n">primitive</span><span class="o">.</span><span class="n">unit</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-1212" name="__codelineno-0-1212"></a> |
| <a id="__codelineno-0-1213" name="__codelineno-0-1213"></a> <span class="k">if</span> <span class="n">primitive</span><span class="o">.</span><span class="n">tz</span> <span class="ow">in</span> <span class="n">UTC_ALIASES</span><span class="p">:</span> |
| <a id="__codelineno-0-1214" name="__codelineno-0-1214"></a> <span class="k">return</span> <span class="n">TimestamptzType</span><span class="p">()</span> |
| <a id="__codelineno-0-1215" name="__codelineno-0-1215"></a> <span class="k">elif</span> <span class="n">primitive</span><span class="o">.</span><span class="n">tz</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1216" name="__codelineno-0-1216"></a> <span class="k">return</span> <span class="n">TimestampType</span><span class="p">()</span> |
| <a id="__codelineno-0-1217" name="__codelineno-0-1217"></a> |
| <a id="__codelineno-0-1218" name="__codelineno-0-1218"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_binary</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span> <span class="ow">or</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_large_binary</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1219" name="__codelineno-0-1219"></a> <span class="k">return</span> <span class="n">BinaryType</span><span class="p">()</span> |
| <a id="__codelineno-0-1220" name="__codelineno-0-1220"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_fixed_size_binary</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span> |
| <a id="__codelineno-0-1221" name="__codelineno-0-1221"></a> <span class="n">primitive</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">FixedSizeBinaryType</span><span class="p">,</span> <span class="n">primitive</span><span class="p">)</span> |
| <a id="__codelineno-0-1222" name="__codelineno-0-1222"></a> <span class="k">return</span> <span class="n">FixedType</span><span class="p">(</span><span class="n">primitive</span><span class="o">.</span><span class="n">byte_width</span><span class="p">)</span> |
| <a id="__codelineno-0-1223" name="__codelineno-0-1223"></a> |
| <a id="__codelineno-0-1224" name="__codelineno-0-1224"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unsupported type: </span><span class="si">{</span><span class="n">primitive</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-0-1225" name="__codelineno-0-1225"></a> |
| <a id="__codelineno-0-1226" name="__codelineno-0-1226"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1227" name="__codelineno-0-1227"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">field</span><span class="o">.</span><span class="n">name</span><span class="p">)</span> |
| <a id="__codelineno-0-1228" name="__codelineno-0-1228"></a> |
| <a id="__codelineno-0-1229" name="__codelineno-0-1229"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1230" name="__codelineno-0-1230"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> |
| <a id="__codelineno-0-1231" name="__codelineno-0-1231"></a> |
| <a id="__codelineno-0-1232" name="__codelineno-0-1232"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1233" name="__codelineno-0-1233"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">LIST_ELEMENT_NAME</span><span class="p">)</span> |
| <a id="__codelineno-0-1234" name="__codelineno-0-1234"></a> |
| <a id="__codelineno-0-1235" name="__codelineno-0-1235"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1236" name="__codelineno-0-1236"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> |
| <a id="__codelineno-0-1237" name="__codelineno-0-1237"></a> |
| <a id="__codelineno-0-1238" name="__codelineno-0-1238"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1239" name="__codelineno-0-1239"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_KEY_NAME</span><span class="p">)</span> |
| <a id="__codelineno-0-1240" name="__codelineno-0-1240"></a> |
| <a id="__codelineno-0-1241" name="__codelineno-0-1241"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1242" name="__codelineno-0-1242"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> |
| <a id="__codelineno-0-1243" name="__codelineno-0-1243"></a> |
| <a id="__codelineno-0-1244" name="__codelineno-0-1244"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1245" name="__codelineno-0-1245"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_VALUE_NAME</span><span class="p">)</span> |
| <a id="__codelineno-0-1246" name="__codelineno-0-1246"></a> |
| <a id="__codelineno-0-1247" name="__codelineno-0-1247"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1248" name="__codelineno-0-1248"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="doc doc-heading"> |
| <code>_ConvertToIcebergWithoutIDs</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow._ConvertToIceberg" href="#pyiceberg.io.pyarrow._ConvertToIceberg">_ConvertToIceberg</a></code></p> |
| |
| |
| <p>Converts PyArrowSchema to Iceberg Schema with all -1 ids.</p> |
| <p>The schema generated through this visitor should always be |
| used in conjunction with <code>new_table_metadata</code> function to |
| assign new field ids in order. This is currently used only |
| when creating an Iceberg Schema from a PyArrow schema when |
| creating a new Iceberg table.</p> |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1299">1299</a></span> |
| <span class="normal"><a href="#__codelineno-0-1300">1300</a></span> |
| <span class="normal"><a href="#__codelineno-0-1301">1301</a></span> |
| <span class="normal"><a href="#__codelineno-0-1302">1302</a></span> |
| <span class="normal"><a href="#__codelineno-0-1303">1303</a></span> |
| <span class="normal"><a href="#__codelineno-0-1304">1304</a></span> |
| <span class="normal"><a href="#__codelineno-0-1305">1305</a></span> |
| <span class="normal"><a href="#__codelineno-0-1306">1306</a></span> |
| <span class="normal"><a href="#__codelineno-0-1307">1307</a></span> |
| <span class="normal"><a href="#__codelineno-0-1308">1308</a></span> |
| <span class="normal"><a href="#__codelineno-0-1309">1309</a></span> |
| <span class="normal"><a href="#__codelineno-0-1310">1310</a></span> |
| <span class="normal"><a href="#__codelineno-0-1311">1311</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1299" name="__codelineno-0-1299"></a><span class="k">class</span><span class="w"> </span><span class="nc">_ConvertToIcebergWithoutIDs</span><span class="p">(</span><span class="n">_ConvertToIceberg</span><span class="p">):</span> |
| <a id="__codelineno-0-1300" name="__codelineno-0-1300"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-1301" name="__codelineno-0-1301"></a><span class="sd"> Converts PyArrowSchema to Iceberg Schema with all -1 ids.</span> |
| <a id="__codelineno-0-1302" name="__codelineno-0-1302"></a> |
| <a id="__codelineno-0-1303" name="__codelineno-0-1303"></a><span class="sd"> The schema generated through this visitor should always be</span> |
| <a id="__codelineno-0-1304" name="__codelineno-0-1304"></a><span class="sd"> used in conjunction with `new_table_metadata` function to</span> |
| <a id="__codelineno-0-1305" name="__codelineno-0-1305"></a><span class="sd"> assign new field ids in order. This is currently used only</span> |
| <a id="__codelineno-0-1306" name="__codelineno-0-1306"></a><span class="sd"> when creating an Iceberg Schema from a PyArrow schema when</span> |
| <a id="__codelineno-0-1307" name="__codelineno-0-1307"></a><span class="sd"> creating a new Iceberg table.</span> |
| <a id="__codelineno-0-1308" name="__codelineno-0-1308"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-1309" name="__codelineno-0-1309"></a> |
| <a id="__codelineno-0-1310" name="__codelineno-0-1310"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_field_id</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <a id="__codelineno-0-1311" name="__codelineno-0-1311"></a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-class"> |
| |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="doc doc-heading"> |
| <code>_NullNaNUnmentionedTermsCollector</code> |
| |
| |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| <p class="doc doc-class-bases"> |
| Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.expressions.visitors.BoundBooleanExpressionVisitor" href="../../expressions/visitors/#pyiceberg.expressions.visitors.BoundBooleanExpressionVisitor">BoundBooleanExpressionVisitor</a>[None]</code></p> |
| |
| |
| |
| |
| |
| |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-746">746</a></span> |
| <span class="normal"><a href="#__codelineno-0-747">747</a></span> |
| <span class="normal"><a href="#__codelineno-0-748">748</a></span> |
| <span class="normal"><a href="#__codelineno-0-749">749</a></span> |
| <span class="normal"><a href="#__codelineno-0-750">750</a></span> |
| <span class="normal"><a href="#__codelineno-0-751">751</a></span> |
| <span class="normal"><a href="#__codelineno-0-752">752</a></span> |
| <span class="normal"><a href="#__codelineno-0-753">753</a></span> |
| <span class="normal"><a href="#__codelineno-0-754">754</a></span> |
| <span class="normal"><a href="#__codelineno-0-755">755</a></span> |
| <span class="normal"><a href="#__codelineno-0-756">756</a></span> |
| <span class="normal"><a href="#__codelineno-0-757">757</a></span> |
| <span class="normal"><a href="#__codelineno-0-758">758</a></span> |
| <span class="normal"><a href="#__codelineno-0-759">759</a></span> |
| <span class="normal"><a href="#__codelineno-0-760">760</a></span> |
| <span class="normal"><a href="#__codelineno-0-761">761</a></span> |
| <span class="normal"><a href="#__codelineno-0-762">762</a></span> |
| <span class="normal"><a href="#__codelineno-0-763">763</a></span> |
| <span class="normal"><a href="#__codelineno-0-764">764</a></span> |
| <span class="normal"><a href="#__codelineno-0-765">765</a></span> |
| <span class="normal"><a href="#__codelineno-0-766">766</a></span> |
| <span class="normal"><a href="#__codelineno-0-767">767</a></span> |
| <span class="normal"><a href="#__codelineno-0-768">768</a></span> |
| <span class="normal"><a href="#__codelineno-0-769">769</a></span> |
| <span class="normal"><a href="#__codelineno-0-770">770</a></span> |
| <span class="normal"><a href="#__codelineno-0-771">771</a></span> |
| <span class="normal"><a href="#__codelineno-0-772">772</a></span> |
| <span class="normal"><a href="#__codelineno-0-773">773</a></span> |
| <span class="normal"><a href="#__codelineno-0-774">774</a></span> |
| <span class="normal"><a href="#__codelineno-0-775">775</a></span> |
| <span class="normal"><a href="#__codelineno-0-776">776</a></span> |
| <span class="normal"><a href="#__codelineno-0-777">777</a></span> |
| <span class="normal"><a href="#__codelineno-0-778">778</a></span> |
| <span class="normal"><a href="#__codelineno-0-779">779</a></span> |
| <span class="normal"><a href="#__codelineno-0-780">780</a></span> |
| <span class="normal"><a href="#__codelineno-0-781">781</a></span> |
| <span class="normal"><a href="#__codelineno-0-782">782</a></span> |
| <span class="normal"><a href="#__codelineno-0-783">783</a></span> |
| <span class="normal"><a href="#__codelineno-0-784">784</a></span> |
| <span class="normal"><a href="#__codelineno-0-785">785</a></span> |
| <span class="normal"><a href="#__codelineno-0-786">786</a></span> |
| <span class="normal"><a href="#__codelineno-0-787">787</a></span> |
| <span class="normal"><a href="#__codelineno-0-788">788</a></span> |
| <span class="normal"><a href="#__codelineno-0-789">789</a></span> |
| <span class="normal"><a href="#__codelineno-0-790">790</a></span> |
| <span class="normal"><a href="#__codelineno-0-791">791</a></span> |
| <span class="normal"><a href="#__codelineno-0-792">792</a></span> |
| <span class="normal"><a href="#__codelineno-0-793">793</a></span> |
| <span class="normal"><a href="#__codelineno-0-794">794</a></span> |
| <span class="normal"><a href="#__codelineno-0-795">795</a></span> |
| <span class="normal"><a href="#__codelineno-0-796">796</a></span> |
| <span class="normal"><a href="#__codelineno-0-797">797</a></span> |
| <span class="normal"><a href="#__codelineno-0-798">798</a></span> |
| <span class="normal"><a href="#__codelineno-0-799">799</a></span> |
| <span class="normal"><a href="#__codelineno-0-800">800</a></span> |
| <span class="normal"><a href="#__codelineno-0-801">801</a></span> |
| <span class="normal"><a href="#__codelineno-0-802">802</a></span> |
| <span class="normal"><a href="#__codelineno-0-803">803</a></span> |
| <span class="normal"><a href="#__codelineno-0-804">804</a></span> |
| <span class="normal"><a href="#__codelineno-0-805">805</a></span> |
| <span class="normal"><a href="#__codelineno-0-806">806</a></span> |
| <span class="normal"><a href="#__codelineno-0-807">807</a></span> |
| <span class="normal"><a href="#__codelineno-0-808">808</a></span> |
| <span class="normal"><a href="#__codelineno-0-809">809</a></span> |
| <span class="normal"><a href="#__codelineno-0-810">810</a></span> |
| <span class="normal"><a href="#__codelineno-0-811">811</a></span> |
| <span class="normal"><a href="#__codelineno-0-812">812</a></span> |
| <span class="normal"><a href="#__codelineno-0-813">813</a></span> |
| <span class="normal"><a href="#__codelineno-0-814">814</a></span> |
| <span class="normal"><a href="#__codelineno-0-815">815</a></span> |
| <span class="normal"><a href="#__codelineno-0-816">816</a></span> |
| <span class="normal"><a href="#__codelineno-0-817">817</a></span> |
| <span class="normal"><a href="#__codelineno-0-818">818</a></span> |
| <span class="normal"><a href="#__codelineno-0-819">819</a></span> |
| <span class="normal"><a href="#__codelineno-0-820">820</a></span> |
| <span class="normal"><a href="#__codelineno-0-821">821</a></span> |
| <span class="normal"><a href="#__codelineno-0-822">822</a></span> |
| <span class="normal"><a href="#__codelineno-0-823">823</a></span> |
| <span class="normal"><a href="#__codelineno-0-824">824</a></span> |
| <span class="normal"><a href="#__codelineno-0-825">825</a></span> |
| <span class="normal"><a href="#__codelineno-0-826">826</a></span> |
| <span class="normal"><a href="#__codelineno-0-827">827</a></span> |
| <span class="normal"><a href="#__codelineno-0-828">828</a></span> |
| <span class="normal"><a href="#__codelineno-0-829">829</a></span> |
| <span class="normal"><a href="#__codelineno-0-830">830</a></span> |
| <span class="normal"><a href="#__codelineno-0-831">831</a></span> |
| <span class="normal"><a href="#__codelineno-0-832">832</a></span> |
| <span class="normal"><a href="#__codelineno-0-833">833</a></span> |
| <span class="normal"><a href="#__codelineno-0-834">834</a></span> |
| <span class="normal"><a href="#__codelineno-0-835">835</a></span> |
| <span class="normal"><a href="#__codelineno-0-836">836</a></span> |
| <span class="normal"><a href="#__codelineno-0-837">837</a></span> |
| <span class="normal"><a href="#__codelineno-0-838">838</a></span> |
| <span class="normal"><a href="#__codelineno-0-839">839</a></span> |
| <span class="normal"><a href="#__codelineno-0-840">840</a></span> |
| <span class="normal"><a href="#__codelineno-0-841">841</a></span> |
| <span class="normal"><a href="#__codelineno-0-842">842</a></span> |
| <span class="normal"><a href="#__codelineno-0-843">843</a></span> |
| <span class="normal"><a href="#__codelineno-0-844">844</a></span> |
| <span class="normal"><a href="#__codelineno-0-845">845</a></span> |
| <span class="normal"><a href="#__codelineno-0-846">846</a></span> |
| <span class="normal"><a href="#__codelineno-0-847">847</a></span> |
| <span class="normal"><a href="#__codelineno-0-848">848</a></span> |
| <span class="normal"><a href="#__codelineno-0-849">849</a></span> |
| <span class="normal"><a href="#__codelineno-0-850">850</a></span> |
| <span class="normal"><a href="#__codelineno-0-851">851</a></span> |
| <span class="normal"><a href="#__codelineno-0-852">852</a></span> |
| <span class="normal"><a href="#__codelineno-0-853">853</a></span> |
| <span class="normal"><a href="#__codelineno-0-854">854</a></span> |
| <span class="normal"><a href="#__codelineno-0-855">855</a></span> |
| <span class="normal"><a href="#__codelineno-0-856">856</a></span> |
| <span class="normal"><a href="#__codelineno-0-857">857</a></span> |
| <span class="normal"><a href="#__codelineno-0-858">858</a></span> |
| <span class="normal"><a href="#__codelineno-0-859">859</a></span> |
| <span class="normal"><a href="#__codelineno-0-860">860</a></span> |
| <span class="normal"><a href="#__codelineno-0-861">861</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-746" name="__codelineno-0-746"></a><span class="k">class</span><span class="w"> </span><span class="nc">_NullNaNUnmentionedTermsCollector</span><span class="p">(</span><span class="n">BoundBooleanExpressionVisitor</span><span class="p">[</span><span class="kc">None</span><span class="p">]):</span> |
| <a id="__codelineno-0-747" name="__codelineno-0-747"></a> <span class="c1"># BoundTerms which have either is_null or is_not_null appearing at least once in the boolean expr.</span> |
| <a id="__codelineno-0-748" name="__codelineno-0-748"></a> <span class="n">is_null_or_not_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> |
| <a id="__codelineno-0-749" name="__codelineno-0-749"></a> <span class="c1"># The remaining BoundTerms appearing in the boolean expr.</span> |
| <a id="__codelineno-0-750" name="__codelineno-0-750"></a> <span class="n">null_unmentioned_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> |
| <a id="__codelineno-0-751" name="__codelineno-0-751"></a> <span class="c1"># BoundTerms which have either is_nan or is_not_nan appearing at least once in the boolean expr.</span> |
| <a id="__codelineno-0-752" name="__codelineno-0-752"></a> <span class="n">is_nan_or_not_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> |
| <a id="__codelineno-0-753" name="__codelineno-0-753"></a> <span class="c1"># The remaining BoundTerms appearing in the boolean expr.</span> |
| <a id="__codelineno-0-754" name="__codelineno-0-754"></a> <span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> |
| <a id="__codelineno-0-755" name="__codelineno-0-755"></a> |
| <a id="__codelineno-0-756" name="__codelineno-0-756"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-757" name="__codelineno-0-757"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <a id="__codelineno-0-758" name="__codelineno-0-758"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> |
| <a id="__codelineno-0-759" name="__codelineno-0-759"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> |
| <a id="__codelineno-0-760" name="__codelineno-0-760"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> |
| <a id="__codelineno-0-761" name="__codelineno-0-761"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> |
| <a id="__codelineno-0-762" name="__codelineno-0-762"></a> |
| <a id="__codelineno-0-763" name="__codelineno-0-763"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-764" name="__codelineno-0-764"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where either is_null or is_not_null is included."""</span> |
| <a id="__codelineno-0-765" name="__codelineno-0-765"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-766" name="__codelineno-0-766"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-767" name="__codelineno-0-767"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-768" name="__codelineno-0-768"></a> |
| <a id="__codelineno-0-769" name="__codelineno-0-769"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_null_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-770" name="__codelineno-0-770"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where neither is_null or is_not_null is included."""</span> |
| <a id="__codelineno-0-771" name="__codelineno-0-771"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-772" name="__codelineno-0-772"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-773" name="__codelineno-0-773"></a> |
| <a id="__codelineno-0-774" name="__codelineno-0-774"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-775" name="__codelineno-0-775"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where either is_nan or is_not_nan is included."""</span> |
| <a id="__codelineno-0-776" name="__codelineno-0-776"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-777" name="__codelineno-0-777"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-778" name="__codelineno-0-778"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-779" name="__codelineno-0-779"></a> |
| <a id="__codelineno-0-780" name="__codelineno-0-780"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_nan_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-781" name="__codelineno-0-781"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where neither is_nan or is_not_nan is included."""</span> |
| <a id="__codelineno-0-782" name="__codelineno-0-782"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-783" name="__codelineno-0-783"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-784" name="__codelineno-0-784"></a> |
| <a id="__codelineno-0-785" name="__codelineno-0-785"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_in</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literals</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-786" name="__codelineno-0-786"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-787" name="__codelineno-0-787"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-788" name="__codelineno-0-788"></a> |
| <a id="__codelineno-0-789" name="__codelineno-0-789"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_in</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literals</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-790" name="__codelineno-0-790"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-791" name="__codelineno-0-791"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-792" name="__codelineno-0-792"></a> |
| <a id="__codelineno-0-793" name="__codelineno-0-793"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_is_nan</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-794" name="__codelineno-0-794"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-795" name="__codelineno-0-795"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-796" name="__codelineno-0-796"></a> |
| <a id="__codelineno-0-797" name="__codelineno-0-797"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_nan</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-798" name="__codelineno-0-798"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-799" name="__codelineno-0-799"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-800" name="__codelineno-0-800"></a> |
| <a id="__codelineno-0-801" name="__codelineno-0-801"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_is_null</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-802" name="__codelineno-0-802"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-803" name="__codelineno-0-803"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-804" name="__codelineno-0-804"></a> |
| <a id="__codelineno-0-805" name="__codelineno-0-805"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_null</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-806" name="__codelineno-0-806"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-807" name="__codelineno-0-807"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-808" name="__codelineno-0-808"></a> |
| <a id="__codelineno-0-809" name="__codelineno-0-809"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-810" name="__codelineno-0-810"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-811" name="__codelineno-0-811"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-812" name="__codelineno-0-812"></a> |
| <a id="__codelineno-0-813" name="__codelineno-0-813"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-814" name="__codelineno-0-814"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-815" name="__codelineno-0-815"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-816" name="__codelineno-0-816"></a> |
| <a id="__codelineno-0-817" name="__codelineno-0-817"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_greater_than_or_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-818" name="__codelineno-0-818"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-819" name="__codelineno-0-819"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-820" name="__codelineno-0-820"></a> |
| <a id="__codelineno-0-821" name="__codelineno-0-821"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_greater_than</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-822" name="__codelineno-0-822"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-823" name="__codelineno-0-823"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-824" name="__codelineno-0-824"></a> |
| <a id="__codelineno-0-825" name="__codelineno-0-825"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_less_than</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-826" name="__codelineno-0-826"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-827" name="__codelineno-0-827"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-828" name="__codelineno-0-828"></a> |
| <a id="__codelineno-0-829" name="__codelineno-0-829"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_less_than_or_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-830" name="__codelineno-0-830"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-831" name="__codelineno-0-831"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-832" name="__codelineno-0-832"></a> |
| <a id="__codelineno-0-833" name="__codelineno-0-833"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_starts_with</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-834" name="__codelineno-0-834"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-835" name="__codelineno-0-835"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-836" name="__codelineno-0-836"></a> |
| <a id="__codelineno-0-837" name="__codelineno-0-837"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_starts_with</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-838" name="__codelineno-0-838"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-839" name="__codelineno-0-839"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-840" name="__codelineno-0-840"></a> |
| <a id="__codelineno-0-841" name="__codelineno-0-841"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_true</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-842" name="__codelineno-0-842"></a> <span class="k">return</span> |
| <a id="__codelineno-0-843" name="__codelineno-0-843"></a> |
| <a id="__codelineno-0-844" name="__codelineno-0-844"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_false</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-845" name="__codelineno-0-845"></a> <span class="k">return</span> |
| <a id="__codelineno-0-846" name="__codelineno-0-846"></a> |
| <a id="__codelineno-0-847" name="__codelineno-0-847"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">child_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-848" name="__codelineno-0-848"></a> <span class="k">return</span> |
| <a id="__codelineno-0-849" name="__codelineno-0-849"></a> |
| <a id="__codelineno-0-850" name="__codelineno-0-850"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_and</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">left_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="n">right_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-851" name="__codelineno-0-851"></a> <span class="k">return</span> |
| <a id="__codelineno-0-852" name="__codelineno-0-852"></a> |
| <a id="__codelineno-0-853" name="__codelineno-0-853"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_or</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">left_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="n">right_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-854" name="__codelineno-0-854"></a> <span class="k">return</span> |
| <a id="__codelineno-0-855" name="__codelineno-0-855"></a> |
| <a id="__codelineno-0-856" name="__codelineno-0-856"></a> <span class="k">def</span><span class="w"> </span><span class="nf">collect</span><span class="p">(</span> |
| <a id="__codelineno-0-857" name="__codelineno-0-857"></a> <span class="bp">self</span><span class="p">,</span> |
| <a id="__codelineno-0-858" name="__codelineno-0-858"></a> <span class="n">expr</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span> |
| <a id="__codelineno-0-859" name="__codelineno-0-859"></a> <span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-860" name="__codelineno-0-860"></a><span class="w"> </span><span class="sd">"""Collect the bound references categorized by having at least one is_null or is_not_null in the expr and the remaining."""</span> |
| <a id="__codelineno-0-861" name="__codelineno-0-861"></a> <span class="n">boolean_expression_visit</span><span class="p">(</span><span class="n">expr</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| |
| |
| |
| <div class="doc doc-children"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Handle the predicate case where either is_nan or is_not_nan is included.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-774">774</a></span> |
| <span class="normal"><a href="#__codelineno-0-775">775</a></span> |
| <span class="normal"><a href="#__codelineno-0-776">776</a></span> |
| <span class="normal"><a href="#__codelineno-0-777">777</a></span> |
| <span class="normal"><a href="#__codelineno-0-778">778</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-774" name="__codelineno-0-774"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-775" name="__codelineno-0-775"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where either is_nan or is_not_nan is included."""</span> |
| <a id="__codelineno-0-776" name="__codelineno-0-776"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-777" name="__codelineno-0-777"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-778" name="__codelineno-0-778"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Handle the predicate case where either is_null or is_not_null is included.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-763">763</a></span> |
| <span class="normal"><a href="#__codelineno-0-764">764</a></span> |
| <span class="normal"><a href="#__codelineno-0-765">765</a></span> |
| <span class="normal"><a href="#__codelineno-0-766">766</a></span> |
| <span class="normal"><a href="#__codelineno-0-767">767</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-763" name="__codelineno-0-763"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-764" name="__codelineno-0-764"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where either is_null or is_not_null is included."""</span> |
| <a id="__codelineno-0-765" name="__codelineno-0-765"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-766" name="__codelineno-0-766"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| <a id="__codelineno-0-767" name="__codelineno-0-767"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Handle the predicate case where neither is_nan or is_not_nan is included.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-780">780</a></span> |
| <span class="normal"><a href="#__codelineno-0-781">781</a></span> |
| <span class="normal"><a href="#__codelineno-0-782">782</a></span> |
| <span class="normal"><a href="#__codelineno-0-783">783</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-780" name="__codelineno-0-780"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_nan_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-781" name="__codelineno-0-781"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where neither is_nan or is_not_nan is included."""</span> |
| <a id="__codelineno-0-782" name="__codelineno-0-782"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-783" name="__codelineno-0-783"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Handle the predicate case where neither is_null or is_not_null is included.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-769">769</a></span> |
| <span class="normal"><a href="#__codelineno-0-770">770</a></span> |
| <span class="normal"><a href="#__codelineno-0-771">771</a></span> |
| <span class="normal"><a href="#__codelineno-0-772">772</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-769" name="__codelineno-0-769"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_null_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-770" name="__codelineno-0-770"></a><span class="w"> </span><span class="sd">"""Handle the predicate case where neither is_null or is_not_null is included."""</span> |
| <a id="__codelineno-0-771" name="__codelineno-0-771"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-772" name="__codelineno-0-772"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">collect</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="headerlink" title="Permanent link">¶</a></h3> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Collect the bound references categorized by having at least one is_null or is_not_null in the expr and the remaining.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-856">856</a></span> |
| <span class="normal"><a href="#__codelineno-0-857">857</a></span> |
| <span class="normal"><a href="#__codelineno-0-858">858</a></span> |
| <span class="normal"><a href="#__codelineno-0-859">859</a></span> |
| <span class="normal"><a href="#__codelineno-0-860">860</a></span> |
| <span class="normal"><a href="#__codelineno-0-861">861</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-856" name="__codelineno-0-856"></a><span class="k">def</span><span class="w"> </span><span class="nf">collect</span><span class="p">(</span> |
| <a id="__codelineno-0-857" name="__codelineno-0-857"></a> <span class="bp">self</span><span class="p">,</span> |
| <a id="__codelineno-0-858" name="__codelineno-0-858"></a> <span class="n">expr</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span> |
| <a id="__codelineno-0-859" name="__codelineno-0-859"></a><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-860" name="__codelineno-0-860"></a><span class="w"> </span><span class="sd">"""Collect the bound references categorized by having at least one is_null or is_not_null in the expr and the remaining."""</span> |
| <a id="__codelineno-0-861" name="__codelineno-0-861"></a> <span class="n">boolean_expression_visit</span><span class="p">(</span><span class="n">expr</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_check_pyarrow_schema_compatible</span><span class="p">(</span><span class="n">requested_schema</span><span class="p">,</span> <span class="n">provided_schema</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Check if the <code>requested_schema</code> is compatible with <code>provided_schema</code>.</p> |
| <p>Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type.</p> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="ValueError">ValueError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If the schemas are not compatible.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2442">2442</a></span> |
| <span class="normal"><a href="#__codelineno-0-2443">2443</a></span> |
| <span class="normal"><a href="#__codelineno-0-2444">2444</a></span> |
| <span class="normal"><a href="#__codelineno-0-2445">2445</a></span> |
| <span class="normal"><a href="#__codelineno-0-2446">2446</a></span> |
| <span class="normal"><a href="#__codelineno-0-2447">2447</a></span> |
| <span class="normal"><a href="#__codelineno-0-2448">2448</a></span> |
| <span class="normal"><a href="#__codelineno-0-2449">2449</a></span> |
| <span class="normal"><a href="#__codelineno-0-2450">2450</a></span> |
| <span class="normal"><a href="#__codelineno-0-2451">2451</a></span> |
| <span class="normal"><a href="#__codelineno-0-2452">2452</a></span> |
| <span class="normal"><a href="#__codelineno-0-2453">2453</a></span> |
| <span class="normal"><a href="#__codelineno-0-2454">2454</a></span> |
| <span class="normal"><a href="#__codelineno-0-2455">2455</a></span> |
| <span class="normal"><a href="#__codelineno-0-2456">2456</a></span> |
| <span class="normal"><a href="#__codelineno-0-2457">2457</a></span> |
| <span class="normal"><a href="#__codelineno-0-2458">2458</a></span> |
| <span class="normal"><a href="#__codelineno-0-2459">2459</a></span> |
| <span class="normal"><a href="#__codelineno-0-2460">2460</a></span> |
| <span class="normal"><a href="#__codelineno-0-2461">2461</a></span> |
| <span class="normal"><a href="#__codelineno-0-2462">2462</a></span> |
| <span class="normal"><a href="#__codelineno-0-2463">2463</a></span> |
| <span class="normal"><a href="#__codelineno-0-2464">2464</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2442" name="__codelineno-0-2442"></a><span class="k">def</span><span class="w"> </span><span class="nf">_check_pyarrow_schema_compatible</span><span class="p">(</span> |
| <a id="__codelineno-0-2443" name="__codelineno-0-2443"></a> <span class="n">requested_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> <span class="n">provided_schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span> |
| <a id="__codelineno-0-2444" name="__codelineno-0-2444"></a><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-2445" name="__codelineno-0-2445"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-2446" name="__codelineno-0-2446"></a><span class="sd"> Check if the `requested_schema` is compatible with `provided_schema`.</span> |
| <a id="__codelineno-0-2447" name="__codelineno-0-2447"></a> |
| <a id="__codelineno-0-2448" name="__codelineno-0-2448"></a><span class="sd"> Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type.</span> |
| <a id="__codelineno-0-2449" name="__codelineno-0-2449"></a> |
| <a id="__codelineno-0-2450" name="__codelineno-0-2450"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-2451" name="__codelineno-0-2451"></a><span class="sd"> ValueError: If the schemas are not compatible.</span> |
| <a id="__codelineno-0-2452" name="__codelineno-0-2452"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2453" name="__codelineno-0-2453"></a> <span class="n">name_mapping</span> <span class="o">=</span> <span class="n">requested_schema</span><span class="o">.</span><span class="n">name_mapping</span> |
| <a id="__codelineno-0-2454" name="__codelineno-0-2454"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-2455" name="__codelineno-0-2455"></a> <span class="n">provided_schema</span> <span class="o">=</span> <span class="n">pyarrow_to_schema</span><span class="p">(</span> |
| <a id="__codelineno-0-2456" name="__codelineno-0-2456"></a> <span class="n">provided_schema</span><span class="p">,</span> <span class="n">name_mapping</span><span class="o">=</span><span class="n">name_mapping</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="n">downcast_ns_timestamp_to_us</span> |
| <a id="__codelineno-0-2457" name="__codelineno-0-2457"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2458" name="__codelineno-0-2458"></a> <span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-2459" name="__codelineno-0-2459"></a> <span class="n">provided_schema</span> <span class="o">=</span> <span class="n">_pyarrow_to_schema_without_ids</span><span class="p">(</span><span class="n">provided_schema</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="n">downcast_ns_timestamp_to_us</span><span class="p">)</span> |
| <a id="__codelineno-0-2460" name="__codelineno-0-2460"></a> <span class="n">additional_names</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">provided_schema</span><span class="o">.</span><span class="n">_name_to_id</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">-</span> <span class="nb">set</span><span class="p">(</span><span class="n">requested_schema</span><span class="o">.</span><span class="n">_name_to_id</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> |
| <a id="__codelineno-0-2461" name="__codelineno-0-2461"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <a id="__codelineno-0-2462" name="__codelineno-0-2462"></a> <span class="sa">f</span><span class="s2">"PyArrow table contains more columns: </span><span class="si">{</span><span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">additional_names</span><span class="p">))</span><span class="si">}</span><span class="s2">. Update the schema first (hint, use union_by_name)."</span> |
| <a id="__codelineno-0-2463" name="__codelineno-0-2463"></a> <span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span> |
| <a id="__codelineno-0-2464" name="__codelineno-0-2464"></a> <span class="n">_check_schema_compatible</span><span class="p">(</span><span class="n">requested_schema</span><span class="p">,</span> <span class="n">provided_schema</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._dataframe_to_data_files" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_dataframe_to_data_files</span><span class="p">(</span><span class="n">table_metadata</span><span class="p">,</span> <span class="n">df</span><span class="p">,</span> <span class="n">io</span><span class="p">,</span> <span class="n">write_uuid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">counter</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._dataframe_to_data_files" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Convert a PyArrow table into a DataFile.</p> |
| |
| |
| <p><span class="doc-section-title">Returns:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="pyiceberg.manifest.DataFile" href="../../manifest/#pyiceberg.manifest.DataFile">DataFile</a>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>An iterable that supplies datafiles that represent the table.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2546">2546</a></span> |
| <span class="normal"><a href="#__codelineno-0-2547">2547</a></span> |
| <span class="normal"><a href="#__codelineno-0-2548">2548</a></span> |
| <span class="normal"><a href="#__codelineno-0-2549">2549</a></span> |
| <span class="normal"><a href="#__codelineno-0-2550">2550</a></span> |
| <span class="normal"><a href="#__codelineno-0-2551">2551</a></span> |
| <span class="normal"><a href="#__codelineno-0-2552">2552</a></span> |
| <span class="normal"><a href="#__codelineno-0-2553">2553</a></span> |
| <span class="normal"><a href="#__codelineno-0-2554">2554</a></span> |
| <span class="normal"><a href="#__codelineno-0-2555">2555</a></span> |
| <span class="normal"><a href="#__codelineno-0-2556">2556</a></span> |
| <span class="normal"><a href="#__codelineno-0-2557">2557</a></span> |
| <span class="normal"><a href="#__codelineno-0-2558">2558</a></span> |
| <span class="normal"><a href="#__codelineno-0-2559">2559</a></span> |
| <span class="normal"><a href="#__codelineno-0-2560">2560</a></span> |
| <span class="normal"><a href="#__codelineno-0-2561">2561</a></span> |
| <span class="normal"><a href="#__codelineno-0-2562">2562</a></span> |
| <span class="normal"><a href="#__codelineno-0-2563">2563</a></span> |
| <span class="normal"><a href="#__codelineno-0-2564">2564</a></span> |
| <span class="normal"><a href="#__codelineno-0-2565">2565</a></span> |
| <span class="normal"><a href="#__codelineno-0-2566">2566</a></span> |
| <span class="normal"><a href="#__codelineno-0-2567">2567</a></span> |
| <span class="normal"><a href="#__codelineno-0-2568">2568</a></span> |
| <span class="normal"><a href="#__codelineno-0-2569">2569</a></span> |
| <span class="normal"><a href="#__codelineno-0-2570">2570</a></span> |
| <span class="normal"><a href="#__codelineno-0-2571">2571</a></span> |
| <span class="normal"><a href="#__codelineno-0-2572">2572</a></span> |
| <span class="normal"><a href="#__codelineno-0-2573">2573</a></span> |
| <span class="normal"><a href="#__codelineno-0-2574">2574</a></span> |
| <span class="normal"><a href="#__codelineno-0-2575">2575</a></span> |
| <span class="normal"><a href="#__codelineno-0-2576">2576</a></span> |
| <span class="normal"><a href="#__codelineno-0-2577">2577</a></span> |
| <span class="normal"><a href="#__codelineno-0-2578">2578</a></span> |
| <span class="normal"><a href="#__codelineno-0-2579">2579</a></span> |
| <span class="normal"><a href="#__codelineno-0-2580">2580</a></span> |
| <span class="normal"><a href="#__codelineno-0-2581">2581</a></span> |
| <span class="normal"><a href="#__codelineno-0-2582">2582</a></span> |
| <span class="normal"><a href="#__codelineno-0-2583">2583</a></span> |
| <span class="normal"><a href="#__codelineno-0-2584">2584</a></span> |
| <span class="normal"><a href="#__codelineno-0-2585">2585</a></span> |
| <span class="normal"><a href="#__codelineno-0-2586">2586</a></span> |
| <span class="normal"><a href="#__codelineno-0-2587">2587</a></span> |
| <span class="normal"><a href="#__codelineno-0-2588">2588</a></span> |
| <span class="normal"><a href="#__codelineno-0-2589">2589</a></span> |
| <span class="normal"><a href="#__codelineno-0-2590">2590</a></span> |
| <span class="normal"><a href="#__codelineno-0-2591">2591</a></span> |
| <span class="normal"><a href="#__codelineno-0-2592">2592</a></span> |
| <span class="normal"><a href="#__codelineno-0-2593">2593</a></span> |
| <span class="normal"><a href="#__codelineno-0-2594">2594</a></span> |
| <span class="normal"><a href="#__codelineno-0-2595">2595</a></span> |
| <span class="normal"><a href="#__codelineno-0-2596">2596</a></span> |
| <span class="normal"><a href="#__codelineno-0-2597">2597</a></span> |
| <span class="normal"><a href="#__codelineno-0-2598">2598</a></span> |
| <span class="normal"><a href="#__codelineno-0-2599">2599</a></span> |
| <span class="normal"><a href="#__codelineno-0-2600">2600</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2546" name="__codelineno-0-2546"></a><span class="k">def</span><span class="w"> </span><span class="nf">_dataframe_to_data_files</span><span class="p">(</span> |
| <a id="__codelineno-0-2547" name="__codelineno-0-2547"></a> <span class="n">table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span><span class="p">,</span> |
| <a id="__codelineno-0-2548" name="__codelineno-0-2548"></a> <span class="n">df</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">,</span> |
| <a id="__codelineno-0-2549" name="__codelineno-0-2549"></a> <span class="n">io</span><span class="p">:</span> <span class="n">FileIO</span><span class="p">,</span> |
| <a id="__codelineno-0-2550" name="__codelineno-0-2550"></a> <span class="n">write_uuid</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">uuid</span><span class="o">.</span><span class="n">UUID</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <a id="__codelineno-0-2551" name="__codelineno-0-2551"></a> <span class="n">counter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">itertools</span><span class="o">.</span><span class="n">count</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <a id="__codelineno-0-2552" name="__codelineno-0-2552"></a><span class="p">)</span> <span class="o">-></span> <span class="n">Iterable</span><span class="p">[</span><span class="n">DataFile</span><span class="p">]:</span> |
| <a id="__codelineno-0-2553" name="__codelineno-0-2553"></a><span class="w"> </span><span class="sd">"""Convert a PyArrow table into a DataFile.</span> |
| <a id="__codelineno-0-2554" name="__codelineno-0-2554"></a> |
| <a id="__codelineno-0-2555" name="__codelineno-0-2555"></a><span class="sd"> Returns:</span> |
| <a id="__codelineno-0-2556" name="__codelineno-0-2556"></a><span class="sd"> An iterable that supplies datafiles that represent the table.</span> |
| <a id="__codelineno-0-2557" name="__codelineno-0-2557"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2558" name="__codelineno-0-2558"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.table</span><span class="w"> </span><span class="kn">import</span> <span class="n">DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE</span><span class="p">,</span> <span class="n">TableProperties</span><span class="p">,</span> <span class="n">WriteTask</span> |
| <a id="__codelineno-0-2559" name="__codelineno-0-2559"></a> |
| <a id="__codelineno-0-2560" name="__codelineno-0-2560"></a> <span class="n">counter</span> <span class="o">=</span> <span class="n">counter</span> <span class="ow">or</span> <span class="n">itertools</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> |
| <a id="__codelineno-0-2561" name="__codelineno-0-2561"></a> <span class="n">write_uuid</span> <span class="o">=</span> <span class="n">write_uuid</span> <span class="ow">or</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()</span> |
| <a id="__codelineno-0-2562" name="__codelineno-0-2562"></a> <span class="n">target_file_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">property_as_int</span><span class="p">(</span> <span class="c1"># type: ignore # The property is set with non-None value.</span> |
| <a id="__codelineno-0-2563" name="__codelineno-0-2563"></a> <span class="n">properties</span><span class="o">=</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> |
| <a id="__codelineno-0-2564" name="__codelineno-0-2564"></a> <span class="n">property_name</span><span class="o">=</span><span class="n">TableProperties</span><span class="o">.</span><span class="n">WRITE_TARGET_FILE_SIZE_BYTES</span><span class="p">,</span> |
| <a id="__codelineno-0-2565" name="__codelineno-0-2565"></a> <span class="n">default</span><span class="o">=</span><span class="n">TableProperties</span><span class="o">.</span><span class="n">WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT</span><span class="p">,</span> |
| <a id="__codelineno-0-2566" name="__codelineno-0-2566"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2567" name="__codelineno-0-2567"></a> <span class="n">name_mapping</span> <span class="o">=</span> <span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">()</span><span class="o">.</span><span class="n">name_mapping</span> |
| <a id="__codelineno-0-2568" name="__codelineno-0-2568"></a> <span class="n">downcast_ns_timestamp_to_us</span> <span class="o">=</span> <span class="n">Config</span><span class="p">()</span><span class="o">.</span><span class="n">get_bool</span><span class="p">(</span><span class="n">DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE</span><span class="p">)</span> <span class="ow">or</span> <span class="kc">False</span> |
| <a id="__codelineno-0-2569" name="__codelineno-0-2569"></a> <span class="n">task_schema</span> <span class="o">=</span> <span class="n">pyarrow_to_schema</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="n">name_mapping</span><span class="o">=</span><span class="n">name_mapping</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="n">downcast_ns_timestamp_to_us</span><span class="p">)</span> |
| <a id="__codelineno-0-2570" name="__codelineno-0-2570"></a> |
| <a id="__codelineno-0-2571" name="__codelineno-0-2571"></a> <span class="k">if</span> <span class="n">table_metadata</span><span class="o">.</span><span class="n">spec</span><span class="p">()</span><span class="o">.</span><span class="n">is_unpartitioned</span><span class="p">():</span> |
| <a id="__codelineno-0-2572" name="__codelineno-0-2572"></a> <span class="k">yield from</span> <span class="n">write_file</span><span class="p">(</span> |
| <a id="__codelineno-0-2573" name="__codelineno-0-2573"></a> <span class="n">io</span><span class="o">=</span><span class="n">io</span><span class="p">,</span> |
| <a id="__codelineno-0-2574" name="__codelineno-0-2574"></a> <span class="n">table_metadata</span><span class="o">=</span><span class="n">table_metadata</span><span class="p">,</span> |
| <a id="__codelineno-0-2575" name="__codelineno-0-2575"></a> <span class="n">tasks</span><span class="o">=</span><span class="nb">iter</span><span class="p">(</span> |
| <a id="__codelineno-0-2576" name="__codelineno-0-2576"></a> <span class="p">[</span> |
| <a id="__codelineno-0-2577" name="__codelineno-0-2577"></a> <span class="n">WriteTask</span><span class="p">(</span><span class="n">write_uuid</span><span class="o">=</span><span class="n">write_uuid</span><span class="p">,</span> <span class="n">task_id</span><span class="o">=</span><span class="nb">next</span><span class="p">(</span><span class="n">counter</span><span class="p">),</span> <span class="n">record_batches</span><span class="o">=</span><span class="n">batches</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">task_schema</span><span class="p">)</span> |
| <a id="__codelineno-0-2578" name="__codelineno-0-2578"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">bin_pack_arrow_table</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">target_file_size</span><span class="p">)</span> |
| <a id="__codelineno-0-2579" name="__codelineno-0-2579"></a> <span class="p">]</span> |
| <a id="__codelineno-0-2580" name="__codelineno-0-2580"></a> <span class="p">),</span> |
| <a id="__codelineno-0-2581" name="__codelineno-0-2581"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2582" name="__codelineno-0-2582"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-2583" name="__codelineno-0-2583"></a> <span class="n">partitions</span> <span class="o">=</span> <span class="n">_determine_partitions</span><span class="p">(</span><span class="n">spec</span><span class="o">=</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">spec</span><span class="p">(),</span> <span class="n">schema</span><span class="o">=</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">(),</span> <span class="n">arrow_table</span><span class="o">=</span><span class="n">df</span><span class="p">)</span> |
| <a id="__codelineno-0-2584" name="__codelineno-0-2584"></a> <span class="k">yield from</span> <span class="n">write_file</span><span class="p">(</span> |
| <a id="__codelineno-0-2585" name="__codelineno-0-2585"></a> <span class="n">io</span><span class="o">=</span><span class="n">io</span><span class="p">,</span> |
| <a id="__codelineno-0-2586" name="__codelineno-0-2586"></a> <span class="n">table_metadata</span><span class="o">=</span><span class="n">table_metadata</span><span class="p">,</span> |
| <a id="__codelineno-0-2587" name="__codelineno-0-2587"></a> <span class="n">tasks</span><span class="o">=</span><span class="nb">iter</span><span class="p">(</span> |
| <a id="__codelineno-0-2588" name="__codelineno-0-2588"></a> <span class="p">[</span> |
| <a id="__codelineno-0-2589" name="__codelineno-0-2589"></a> <span class="n">WriteTask</span><span class="p">(</span> |
| <a id="__codelineno-0-2590" name="__codelineno-0-2590"></a> <span class="n">write_uuid</span><span class="o">=</span><span class="n">write_uuid</span><span class="p">,</span> |
| <a id="__codelineno-0-2591" name="__codelineno-0-2591"></a> <span class="n">task_id</span><span class="o">=</span><span class="nb">next</span><span class="p">(</span><span class="n">counter</span><span class="p">),</span> |
| <a id="__codelineno-0-2592" name="__codelineno-0-2592"></a> <span class="n">record_batches</span><span class="o">=</span><span class="n">batches</span><span class="p">,</span> |
| <a id="__codelineno-0-2593" name="__codelineno-0-2593"></a> <span class="n">partition_key</span><span class="o">=</span><span class="n">partition</span><span class="o">.</span><span class="n">partition_key</span><span class="p">,</span> |
| <a id="__codelineno-0-2594" name="__codelineno-0-2594"></a> <span class="n">schema</span><span class="o">=</span><span class="n">task_schema</span><span class="p">,</span> |
| <a id="__codelineno-0-2595" name="__codelineno-0-2595"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2596" name="__codelineno-0-2596"></a> <span class="k">for</span> <span class="n">partition</span> <span class="ow">in</span> <span class="n">partitions</span> |
| <a id="__codelineno-0-2597" name="__codelineno-0-2597"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">bin_pack_arrow_table</span><span class="p">(</span><span class="n">partition</span><span class="o">.</span><span class="n">arrow_table_partition</span><span class="p">,</span> <span class="n">target_file_size</span><span class="p">)</span> |
| <a id="__codelineno-0-2598" name="__codelineno-0-2598"></a> <span class="p">]</span> |
| <a id="__codelineno-0-2599" name="__codelineno-0-2599"></a> <span class="p">),</span> |
| <a id="__codelineno-0-2600" name="__codelineno-0-2600"></a> <span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._determine_partitions" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_determine_partitions</span><span class="p">(</span><span class="n">spec</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">arrow_table</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._determine_partitions" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Based on the iceberg table partition spec, filter the arrow table into partitions with their keys.</p> |
| <p>Example: |
| Input: |
| An arrow table with partition key of ['n_legs', 'year'] and with data of |
| {'year': [2020, 2022, 2022, 2021, 2022, 2022, 2022, 2019, 2021], |
| 'n_legs': [2, 2, 2, 4, 4, 4, 4, 5, 100], |
| 'animal': ["Flamingo", "Parrot", "Parrot", "Dog", "Horse", "Horse", "Horse","Brittle stars", "Centipede"]}. |
| The algorithm: |
| - We determine the set of unique partition keys |
| - Then we produce a set of partitions by filtering on each of the combinations |
| - We combine the chunks to create a copy to avoid GIL congestion on the original table</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2609">2609</a></span> |
| <span class="normal"><a href="#__codelineno-0-2610">2610</a></span> |
| <span class="normal"><a href="#__codelineno-0-2611">2611</a></span> |
| <span class="normal"><a href="#__codelineno-0-2612">2612</a></span> |
| <span class="normal"><a href="#__codelineno-0-2613">2613</a></span> |
| <span class="normal"><a href="#__codelineno-0-2614">2614</a></span> |
| <span class="normal"><a href="#__codelineno-0-2615">2615</a></span> |
| <span class="normal"><a href="#__codelineno-0-2616">2616</a></span> |
| <span class="normal"><a href="#__codelineno-0-2617">2617</a></span> |
| <span class="normal"><a href="#__codelineno-0-2618">2618</a></span> |
| <span class="normal"><a href="#__codelineno-0-2619">2619</a></span> |
| <span class="normal"><a href="#__codelineno-0-2620">2620</a></span> |
| <span class="normal"><a href="#__codelineno-0-2621">2621</a></span> |
| <span class="normal"><a href="#__codelineno-0-2622">2622</a></span> |
| <span class="normal"><a href="#__codelineno-0-2623">2623</a></span> |
| <span class="normal"><a href="#__codelineno-0-2624">2624</a></span> |
| <span class="normal"><a href="#__codelineno-0-2625">2625</a></span> |
| <span class="normal"><a href="#__codelineno-0-2626">2626</a></span> |
| <span class="normal"><a href="#__codelineno-0-2627">2627</a></span> |
| <span class="normal"><a href="#__codelineno-0-2628">2628</a></span> |
| <span class="normal"><a href="#__codelineno-0-2629">2629</a></span> |
| <span class="normal"><a href="#__codelineno-0-2630">2630</a></span> |
| <span class="normal"><a href="#__codelineno-0-2631">2631</a></span> |
| <span class="normal"><a href="#__codelineno-0-2632">2632</a></span> |
| <span class="normal"><a href="#__codelineno-0-2633">2633</a></span> |
| <span class="normal"><a href="#__codelineno-0-2634">2634</a></span> |
| <span class="normal"><a href="#__codelineno-0-2635">2635</a></span> |
| <span class="normal"><a href="#__codelineno-0-2636">2636</a></span> |
| <span class="normal"><a href="#__codelineno-0-2637">2637</a></span> |
| <span class="normal"><a href="#__codelineno-0-2638">2638</a></span> |
| <span class="normal"><a href="#__codelineno-0-2639">2639</a></span> |
| <span class="normal"><a href="#__codelineno-0-2640">2640</a></span> |
| <span class="normal"><a href="#__codelineno-0-2641">2641</a></span> |
| <span class="normal"><a href="#__codelineno-0-2642">2642</a></span> |
| <span class="normal"><a href="#__codelineno-0-2643">2643</a></span> |
| <span class="normal"><a href="#__codelineno-0-2644">2644</a></span> |
| <span class="normal"><a href="#__codelineno-0-2645">2645</a></span> |
| <span class="normal"><a href="#__codelineno-0-2646">2646</a></span> |
| <span class="normal"><a href="#__codelineno-0-2647">2647</a></span> |
| <span class="normal"><a href="#__codelineno-0-2648">2648</a></span> |
| <span class="normal"><a href="#__codelineno-0-2649">2649</a></span> |
| <span class="normal"><a href="#__codelineno-0-2650">2650</a></span> |
| <span class="normal"><a href="#__codelineno-0-2651">2651</a></span> |
| <span class="normal"><a href="#__codelineno-0-2652">2652</a></span> |
| <span class="normal"><a href="#__codelineno-0-2653">2653</a></span> |
| <span class="normal"><a href="#__codelineno-0-2654">2654</a></span> |
| <span class="normal"><a href="#__codelineno-0-2655">2655</a></span> |
| <span class="normal"><a href="#__codelineno-0-2656">2656</a></span> |
| <span class="normal"><a href="#__codelineno-0-2657">2657</a></span> |
| <span class="normal"><a href="#__codelineno-0-2658">2658</a></span> |
| <span class="normal"><a href="#__codelineno-0-2659">2659</a></span> |
| <span class="normal"><a href="#__codelineno-0-2660">2660</a></span> |
| <span class="normal"><a href="#__codelineno-0-2661">2661</a></span> |
| <span class="normal"><a href="#__codelineno-0-2662">2662</a></span> |
| <span class="normal"><a href="#__codelineno-0-2663">2663</a></span> |
| <span class="normal"><a href="#__codelineno-0-2664">2664</a></span> |
| <span class="normal"><a href="#__codelineno-0-2665">2665</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2609" name="__codelineno-0-2609"></a><span class="k">def</span><span class="w"> </span><span class="nf">_determine_partitions</span><span class="p">(</span><span class="n">spec</span><span class="p">:</span> <span class="n">PartitionSpec</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> <span class="n">arrow_table</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">_TablePartition</span><span class="p">]:</span> |
| <a id="__codelineno-0-2610" name="__codelineno-0-2610"></a><span class="w"> </span><span class="sd">"""Based on the iceberg table partition spec, filter the arrow table into partitions with their keys.</span> |
| <a id="__codelineno-0-2611" name="__codelineno-0-2611"></a> |
| <a id="__codelineno-0-2612" name="__codelineno-0-2612"></a><span class="sd"> Example:</span> |
| <a id="__codelineno-0-2613" name="__codelineno-0-2613"></a><span class="sd"> Input:</span> |
| <a id="__codelineno-0-2614" name="__codelineno-0-2614"></a><span class="sd"> An arrow table with partition key of ['n_legs', 'year'] and with data of</span> |
| <a id="__codelineno-0-2615" name="__codelineno-0-2615"></a><span class="sd"> {'year': [2020, 2022, 2022, 2021, 2022, 2022, 2022, 2019, 2021],</span> |
| <a id="__codelineno-0-2616" name="__codelineno-0-2616"></a><span class="sd"> 'n_legs': [2, 2, 2, 4, 4, 4, 4, 5, 100],</span> |
| <a id="__codelineno-0-2617" name="__codelineno-0-2617"></a><span class="sd"> 'animal': ["Flamingo", "Parrot", "Parrot", "Dog", "Horse", "Horse", "Horse","Brittle stars", "Centipede"]}.</span> |
| <a id="__codelineno-0-2618" name="__codelineno-0-2618"></a><span class="sd"> The algorithm:</span> |
| <a id="__codelineno-0-2619" name="__codelineno-0-2619"></a><span class="sd"> - We determine the set of unique partition keys</span> |
| <a id="__codelineno-0-2620" name="__codelineno-0-2620"></a><span class="sd"> - Then we produce a set of partitions by filtering on each of the combinations</span> |
| <a id="__codelineno-0-2621" name="__codelineno-0-2621"></a><span class="sd"> - We combine the chunks to create a copy to avoid GIL congestion on the original table</span> |
| <a id="__codelineno-0-2622" name="__codelineno-0-2622"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2623" name="__codelineno-0-2623"></a> <span class="c1"># Assign unique names to columns where the partition transform has been applied</span> |
| <a id="__codelineno-0-2624" name="__codelineno-0-2624"></a> <span class="c1"># to avoid conflicts</span> |
| <a id="__codelineno-0-2625" name="__codelineno-0-2625"></a> <span class="n">partition_fields</span> <span class="o">=</span> <span class="p">[</span><span class="sa">f</span><span class="s2">"_partition_</span><span class="si">{</span><span class="n">field</span><span class="o">.</span><span class="n">name</span><span class="si">}</span><span class="s2">"</span> <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">]</span> |
| <a id="__codelineno-0-2626" name="__codelineno-0-2626"></a> |
| <a id="__codelineno-0-2627" name="__codelineno-0-2627"></a> <span class="k">for</span> <span class="n">partition</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">,</span> <span class="n">partition_fields</span><span class="p">):</span> |
| <a id="__codelineno-0-2628" name="__codelineno-0-2628"></a> <span class="n">source_field</span> <span class="o">=</span> <span class="n">schema</span><span class="o">.</span><span class="n">find_field</span><span class="p">(</span><span class="n">partition</span><span class="o">.</span><span class="n">source_id</span><span class="p">)</span> |
| <a id="__codelineno-0-2629" name="__codelineno-0-2629"></a> <span class="n">arrow_table</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="o">.</span><span class="n">append_column</span><span class="p">(</span> |
| <a id="__codelineno-0-2630" name="__codelineno-0-2630"></a> <span class="n">name</span><span class="p">,</span> <span class="n">partition</span><span class="o">.</span><span class="n">transform</span><span class="o">.</span><span class="n">pyarrow_transform</span><span class="p">(</span><span class="n">source_field</span><span class="o">.</span><span class="n">field_type</span><span class="p">)(</span><span class="n">arrow_table</span><span class="p">[</span><span class="n">source_field</span><span class="o">.</span><span class="n">name</span><span class="p">])</span> |
| <a id="__codelineno-0-2631" name="__codelineno-0-2631"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2632" name="__codelineno-0-2632"></a> |
| <a id="__codelineno-0-2633" name="__codelineno-0-2633"></a> <span class="n">unique_partition_fields</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">partition_fields</span><span class="p">)</span><span class="o">.</span><span class="n">group_by</span><span class="p">(</span><span class="n">partition_fields</span><span class="p">)</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([])</span> |
| <a id="__codelineno-0-2634" name="__codelineno-0-2634"></a> |
| <a id="__codelineno-0-2635" name="__codelineno-0-2635"></a> <span class="n">table_partitions</span> <span class="o">=</span> <span class="p">[]</span> |
| <a id="__codelineno-0-2636" name="__codelineno-0-2636"></a> <span class="c1"># TODO: As a next step, we could also play around with yielding instead of materializing the full list</span> |
| <a id="__codelineno-0-2637" name="__codelineno-0-2637"></a> <span class="k">for</span> <span class="n">unique_partition</span> <span class="ow">in</span> <span class="n">unique_partition_fields</span><span class="o">.</span><span class="n">to_pylist</span><span class="p">():</span> |
| <a id="__codelineno-0-2638" name="__codelineno-0-2638"></a> <span class="n">partition_key</span> <span class="o">=</span> <span class="n">PartitionKey</span><span class="p">(</span> |
| <a id="__codelineno-0-2639" name="__codelineno-0-2639"></a> <span class="n">field_values</span><span class="o">=</span><span class="p">[</span> |
| <a id="__codelineno-0-2640" name="__codelineno-0-2640"></a> <span class="n">PartitionFieldValue</span><span class="p">(</span><span class="n">field</span><span class="o">=</span><span class="n">field</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">unique_partition</span><span class="p">[</span><span class="n">name</span><span class="p">])</span> |
| <a id="__codelineno-0-2641" name="__codelineno-0-2641"></a> <span class="k">for</span> <span class="n">field</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">,</span> <span class="n">partition_fields</span><span class="p">)</span> |
| <a id="__codelineno-0-2642" name="__codelineno-0-2642"></a> <span class="p">],</span> |
| <a id="__codelineno-0-2643" name="__codelineno-0-2643"></a> <span class="n">partition_spec</span><span class="o">=</span><span class="n">spec</span><span class="p">,</span> |
| <a id="__codelineno-0-2644" name="__codelineno-0-2644"></a> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> |
| <a id="__codelineno-0-2645" name="__codelineno-0-2645"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2646" name="__codelineno-0-2646"></a> <span class="n">filtered_table</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span> |
| <a id="__codelineno-0-2647" name="__codelineno-0-2647"></a> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span> |
| <a id="__codelineno-0-2648" name="__codelineno-0-2648"></a> <span class="n">operator</span><span class="o">.</span><span class="n">and_</span><span class="p">,</span> |
| <a id="__codelineno-0-2649" name="__codelineno-0-2649"></a> <span class="p">[</span> |
| <a id="__codelineno-0-2650" name="__codelineno-0-2650"></a> <span class="n">pc</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">partition_field_name</span><span class="p">)</span> <span class="o">==</span> <span class="n">unique_partition</span><span class="p">[</span><span class="n">partition_field_name</span><span class="p">]</span> |
| <a id="__codelineno-0-2651" name="__codelineno-0-2651"></a> <span class="k">if</span> <span class="n">unique_partition</span><span class="p">[</span><span class="n">partition_field_name</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> |
| <a id="__codelineno-0-2652" name="__codelineno-0-2652"></a> <span class="k">else</span> <span class="n">pc</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">partition_field_name</span><span class="p">)</span><span class="o">.</span><span class="n">is_null</span><span class="p">()</span> |
| <a id="__codelineno-0-2653" name="__codelineno-0-2653"></a> <span class="k">for</span> <span class="n">field</span><span class="p">,</span> <span class="n">partition_field_name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">,</span> <span class="n">partition_fields</span><span class="p">)</span> |
| <a id="__codelineno-0-2654" name="__codelineno-0-2654"></a> <span class="p">],</span> |
| <a id="__codelineno-0-2655" name="__codelineno-0-2655"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2656" name="__codelineno-0-2656"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2657" name="__codelineno-0-2657"></a> <span class="n">filtered_table</span> <span class="o">=</span> <span class="n">filtered_table</span><span class="o">.</span><span class="n">drop_columns</span><span class="p">(</span><span class="n">partition_fields</span><span class="p">)</span> |
| <a id="__codelineno-0-2658" name="__codelineno-0-2658"></a> |
| <a id="__codelineno-0-2659" name="__codelineno-0-2659"></a> <span class="c1"># The combine_chunks seems to be counter-intuitive to do, but it actually returns</span> |
| <a id="__codelineno-0-2660" name="__codelineno-0-2660"></a> <span class="c1"># fresh buffers that don't interfere with each other when it is written out to file</span> |
| <a id="__codelineno-0-2661" name="__codelineno-0-2661"></a> <span class="n">table_partitions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> |
| <a id="__codelineno-0-2662" name="__codelineno-0-2662"></a> <span class="n">_TablePartition</span><span class="p">(</span><span class="n">partition_key</span><span class="o">=</span><span class="n">partition_key</span><span class="p">,</span> <span class="n">arrow_table_partition</span><span class="o">=</span><span class="n">filtered_table</span><span class="o">.</span><span class="n">combine_chunks</span><span class="p">())</span> |
| <a id="__codelineno-0-2663" name="__codelineno-0-2663"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2664" name="__codelineno-0-2664"></a> |
| <a id="__codelineno-0-2665" name="__codelineno-0-2665"></a> <span class="k">return</span> <span class="n">table_partitions</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_expression_to_complementary_pyarrow</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Complementary filter conversion function of expression_to_pyarrow.</p> |
| <p>Could not use expression_to_pyarrow(Not(expr)) to achieve this complementary effect because ~ in pyarrow.compute.Expression does not handle null.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-868">868</a></span> |
| <span class="normal"><a href="#__codelineno-0-869">869</a></span> |
| <span class="normal"><a href="#__codelineno-0-870">870</a></span> |
| <span class="normal"><a href="#__codelineno-0-871">871</a></span> |
| <span class="normal"><a href="#__codelineno-0-872">872</a></span> |
| <span class="normal"><a href="#__codelineno-0-873">873</a></span> |
| <span class="normal"><a href="#__codelineno-0-874">874</a></span> |
| <span class="normal"><a href="#__codelineno-0-875">875</a></span> |
| <span class="normal"><a href="#__codelineno-0-876">876</a></span> |
| <span class="normal"><a href="#__codelineno-0-877">877</a></span> |
| <span class="normal"><a href="#__codelineno-0-878">878</a></span> |
| <span class="normal"><a href="#__codelineno-0-879">879</a></span> |
| <span class="normal"><a href="#__codelineno-0-880">880</a></span> |
| <span class="normal"><a href="#__codelineno-0-881">881</a></span> |
| <span class="normal"><a href="#__codelineno-0-882">882</a></span> |
| <span class="normal"><a href="#__codelineno-0-883">883</a></span> |
| <span class="normal"><a href="#__codelineno-0-884">884</a></span> |
| <span class="normal"><a href="#__codelineno-0-885">885</a></span> |
| <span class="normal"><a href="#__codelineno-0-886">886</a></span> |
| <span class="normal"><a href="#__codelineno-0-887">887</a></span> |
| <span class="normal"><a href="#__codelineno-0-888">888</a></span> |
| <span class="normal"><a href="#__codelineno-0-889">889</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-868" name="__codelineno-0-868"></a><span class="k">def</span><span class="w"> </span><span class="nf">_expression_to_complementary_pyarrow</span><span class="p">(</span><span class="n">expr</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">)</span> <span class="o">-></span> <span class="n">pc</span><span class="o">.</span><span class="n">Expression</span><span class="p">:</span> |
| <a id="__codelineno-0-869" name="__codelineno-0-869"></a><span class="w"> </span><span class="sd">"""Complementary filter conversion function of expression_to_pyarrow.</span> |
| <a id="__codelineno-0-870" name="__codelineno-0-870"></a> |
| <a id="__codelineno-0-871" name="__codelineno-0-871"></a><span class="sd"> Could not use expression_to_pyarrow(Not(expr)) to achieve this complementary effect because ~ in pyarrow.compute.Expression does not handle null.</span> |
| <a id="__codelineno-0-872" name="__codelineno-0-872"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-873" name="__codelineno-0-873"></a> <span class="n">collector</span> <span class="o">=</span> <span class="n">_NullNaNUnmentionedTermsCollector</span><span class="p">()</span> |
| <a id="__codelineno-0-874" name="__codelineno-0-874"></a> <span class="n">collector</span><span class="o">.</span><span class="n">collect</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span> |
| <a id="__codelineno-0-875" name="__codelineno-0-875"></a> |
| <a id="__codelineno-0-876" name="__codelineno-0-876"></a> <span class="c1"># Convert the set of terms to a sorted list so that layout of the expression to build is deterministic.</span> |
| <a id="__codelineno-0-877" name="__codelineno-0-877"></a> <span class="n">null_unmentioned_bound_terms</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span> |
| <a id="__codelineno-0-878" name="__codelineno-0-878"></a> <span class="n">collector</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">term</span><span class="p">:</span> <span class="n">term</span><span class="o">.</span><span class="n">ref</span><span class="p">()</span><span class="o">.</span><span class="n">field</span><span class="o">.</span><span class="n">name</span> |
| <a id="__codelineno-0-879" name="__codelineno-0-879"></a> <span class="p">)</span> |
| <a id="__codelineno-0-880" name="__codelineno-0-880"></a> <span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span> |
| <a id="__codelineno-0-881" name="__codelineno-0-881"></a> <span class="n">collector</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">term</span><span class="p">:</span> <span class="n">term</span><span class="o">.</span><span class="n">ref</span><span class="p">()</span><span class="o">.</span><span class="n">field</span><span class="o">.</span><span class="n">name</span> |
| <a id="__codelineno-0-882" name="__codelineno-0-882"></a> <span class="p">)</span> |
| <a id="__codelineno-0-883" name="__codelineno-0-883"></a> |
| <a id="__codelineno-0-884" name="__codelineno-0-884"></a> <span class="n">preserve_expr</span><span class="p">:</span> <span class="n">BooleanExpression</span> <span class="o">=</span> <span class="n">Not</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span> |
| <a id="__codelineno-0-885" name="__codelineno-0-885"></a> <span class="k">for</span> <span class="n">term</span> <span class="ow">in</span> <span class="n">null_unmentioned_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-886" name="__codelineno-0-886"></a> <span class="n">preserve_expr</span> <span class="o">=</span> <span class="n">Or</span><span class="p">(</span><span class="n">preserve_expr</span><span class="p">,</span> <span class="n">BoundIsNull</span><span class="p">(</span><span class="n">term</span><span class="o">=</span><span class="n">term</span><span class="p">))</span> |
| <a id="__codelineno-0-887" name="__codelineno-0-887"></a> <span class="k">for</span> <span class="n">term</span> <span class="ow">in</span> <span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span> |
| <a id="__codelineno-0-888" name="__codelineno-0-888"></a> <span class="n">preserve_expr</span> <span class="o">=</span> <span class="n">Or</span><span class="p">(</span><span class="n">preserve_expr</span><span class="p">,</span> <span class="n">BoundIsNaN</span><span class="p">(</span><span class="n">term</span><span class="o">=</span><span class="n">term</span><span class="p">))</span> |
| <a id="__codelineno-0-889" name="__codelineno-0-889"></a> <span class="k">return</span> <span class="n">expression_to_pyarrow</span><span class="p">(</span><span class="n">preserve_expr</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow._get_column_projection_values" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">_get_column_projection_values</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="n">projected_schema</span><span class="p">,</span> <span class="n">partition_spec</span><span class="p">,</span> <span class="n">file_project_field_ids</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow._get_column_projection_values" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Apply Column Projection rules to File Schema.</p> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1314">1314</a></span> |
| <span class="normal"><a href="#__codelineno-0-1315">1315</a></span> |
| <span class="normal"><a href="#__codelineno-0-1316">1316</a></span> |
| <span class="normal"><a href="#__codelineno-0-1317">1317</a></span> |
| <span class="normal"><a href="#__codelineno-0-1318">1318</a></span> |
| <span class="normal"><a href="#__codelineno-0-1319">1319</a></span> |
| <span class="normal"><a href="#__codelineno-0-1320">1320</a></span> |
| <span class="normal"><a href="#__codelineno-0-1321">1321</a></span> |
| <span class="normal"><a href="#__codelineno-0-1322">1322</a></span> |
| <span class="normal"><a href="#__codelineno-0-1323">1323</a></span> |
| <span class="normal"><a href="#__codelineno-0-1324">1324</a></span> |
| <span class="normal"><a href="#__codelineno-0-1325">1325</a></span> |
| <span class="normal"><a href="#__codelineno-0-1326">1326</a></span> |
| <span class="normal"><a href="#__codelineno-0-1327">1327</a></span> |
| <span class="normal"><a href="#__codelineno-0-1328">1328</a></span> |
| <span class="normal"><a href="#__codelineno-0-1329">1329</a></span> |
| <span class="normal"><a href="#__codelineno-0-1330">1330</a></span> |
| <span class="normal"><a href="#__codelineno-0-1331">1331</a></span> |
| <span class="normal"><a href="#__codelineno-0-1332">1332</a></span> |
| <span class="normal"><a href="#__codelineno-0-1333">1333</a></span> |
| <span class="normal"><a href="#__codelineno-0-1334">1334</a></span> |
| <span class="normal"><a href="#__codelineno-0-1335">1335</a></span> |
| <span class="normal"><a href="#__codelineno-0-1336">1336</a></span> |
| <span class="normal"><a href="#__codelineno-0-1337">1337</a></span> |
| <span class="normal"><a href="#__codelineno-0-1338">1338</a></span> |
| <span class="normal"><a href="#__codelineno-0-1339">1339</a></span> |
| <span class="normal"><a href="#__codelineno-0-1340">1340</a></span> |
| <span class="normal"><a href="#__codelineno-0-1341">1341</a></span> |
| <span class="normal"><a href="#__codelineno-0-1342">1342</a></span> |
| <span class="normal"><a href="#__codelineno-0-1343">1343</a></span> |
| <span class="normal"><a href="#__codelineno-0-1344">1344</a></span> |
| <span class="normal"><a href="#__codelineno-0-1345">1345</a></span> |
| <span class="normal"><a href="#__codelineno-0-1346">1346</a></span> |
| <span class="normal"><a href="#__codelineno-0-1347">1347</a></span> |
| <span class="normal"><a href="#__codelineno-0-1348">1348</a></span> |
| <span class="normal"><a href="#__codelineno-0-1349">1349</a></span> |
| <span class="normal"><a href="#__codelineno-0-1350">1350</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1314" name="__codelineno-0-1314"></a><span class="k">def</span><span class="w"> </span><span class="nf">_get_column_projection_values</span><span class="p">(</span> |
| <a id="__codelineno-0-1315" name="__codelineno-0-1315"></a> <span class="n">file</span><span class="p">:</span> <span class="n">DataFile</span><span class="p">,</span> <span class="n">projected_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> <span class="n">partition_spec</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">PartitionSpec</span><span class="p">],</span> <span class="n">file_project_field_ids</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> |
| <a id="__codelineno-0-1316" name="__codelineno-0-1316"></a><span class="p">)</span> <span class="o">-></span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span> |
| <a id="__codelineno-0-1317" name="__codelineno-0-1317"></a><span class="w"> </span><span class="sd">"""Apply Column Projection rules to File Schema."""</span> |
| <a id="__codelineno-0-1318" name="__codelineno-0-1318"></a> <span class="n">project_schema_diff</span> <span class="o">=</span> <span class="n">projected_schema</span><span class="o">.</span><span class="n">field_ids</span><span class="o">.</span><span class="n">difference</span><span class="p">(</span><span class="n">file_project_field_ids</span><span class="p">)</span> |
| <a id="__codelineno-0-1319" name="__codelineno-0-1319"></a> <span class="n">should_project_columns</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">project_schema_diff</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> |
| <a id="__codelineno-0-1320" name="__codelineno-0-1320"></a> <span class="n">projected_missing_fields</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-1321" name="__codelineno-0-1321"></a> |
| <a id="__codelineno-0-1322" name="__codelineno-0-1322"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">should_project_columns</span><span class="p">:</span> |
| <a id="__codelineno-0-1323" name="__codelineno-0-1323"></a> <span class="k">return</span> <span class="kc">False</span><span class="p">,</span> <span class="p">{}</span> |
| <a id="__codelineno-0-1324" name="__codelineno-0-1324"></a> |
| <a id="__codelineno-0-1325" name="__codelineno-0-1325"></a> <span class="n">partition_schema</span><span class="p">:</span> <span class="n">StructType</span> |
| <a id="__codelineno-0-1326" name="__codelineno-0-1326"></a> <span class="n">accessors</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Accessor</span><span class="p">]</span> |
| <a id="__codelineno-0-1327" name="__codelineno-0-1327"></a> |
| <a id="__codelineno-0-1328" name="__codelineno-0-1328"></a> <span class="k">if</span> <span class="n">partition_spec</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1329" name="__codelineno-0-1329"></a> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">partition_spec</span><span class="o">.</span><span class="n">partition_type</span><span class="p">(</span><span class="n">projected_schema</span><span class="p">)</span> |
| <a id="__codelineno-0-1330" name="__codelineno-0-1330"></a> <span class="n">accessors</span> <span class="o">=</span> <span class="n">build_position_accessors</span><span class="p">(</span><span class="n">partition_schema</span><span class="p">)</span> |
| <a id="__codelineno-0-1331" name="__codelineno-0-1331"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-1332" name="__codelineno-0-1332"></a> <span class="k">return</span> <span class="kc">False</span><span class="p">,</span> <span class="p">{}</span> |
| <a id="__codelineno-0-1333" name="__codelineno-0-1333"></a> |
| <a id="__codelineno-0-1334" name="__codelineno-0-1334"></a> <span class="k">for</span> <span class="n">field_id</span> <span class="ow">in</span> <span class="n">project_schema_diff</span><span class="p">:</span> |
| <a id="__codelineno-0-1335" name="__codelineno-0-1335"></a> <span class="k">for</span> <span class="n">partition_field</span> <span class="ow">in</span> <span class="n">partition_spec</span><span class="o">.</span><span class="n">fields_by_source_id</span><span class="p">(</span><span class="n">field_id</span><span class="p">):</span> |
| <a id="__codelineno-0-1336" name="__codelineno-0-1336"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">partition_field</span><span class="o">.</span><span class="n">transform</span><span class="p">,</span> <span class="n">IdentityTransform</span><span class="p">):</span> |
| <a id="__codelineno-0-1337" name="__codelineno-0-1337"></a> <span class="n">accessor</span> <span class="o">=</span> <span class="n">accessors</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">partition_field</span><span class="o">.</span><span class="n">field_id</span><span class="p">)</span> |
| <a id="__codelineno-0-1338" name="__codelineno-0-1338"></a> |
| <a id="__codelineno-0-1339" name="__codelineno-0-1339"></a> <span class="k">if</span> <span class="n">accessor</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span> |
| <a id="__codelineno-0-1340" name="__codelineno-0-1340"></a> <span class="k">continue</span> |
| <a id="__codelineno-0-1341" name="__codelineno-0-1341"></a> |
| <a id="__codelineno-0-1342" name="__codelineno-0-1342"></a> <span class="c1"># The partition field may not exist in the partition record of the data file.</span> |
| <a id="__codelineno-0-1343" name="__codelineno-0-1343"></a> <span class="c1"># This can happen when new partition fields are introduced after the file was written.</span> |
| <a id="__codelineno-0-1344" name="__codelineno-0-1344"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-1345" name="__codelineno-0-1345"></a> <span class="k">if</span> <span class="n">partition_value</span> <span class="o">:=</span> <span class="n">accessor</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">file</span><span class="o">.</span><span class="n">partition</span><span class="p">):</span> |
| <a id="__codelineno-0-1346" name="__codelineno-0-1346"></a> <span class="n">projected_missing_fields</span><span class="p">[</span><span class="n">partition_field</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">partition_value</span> |
| <a id="__codelineno-0-1347" name="__codelineno-0-1347"></a> <span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span> |
| <a id="__codelineno-0-1348" name="__codelineno-0-1348"></a> <span class="k">continue</span> |
| <a id="__codelineno-0-1349" name="__codelineno-0-1349"></a> |
| <a id="__codelineno-0-1350" name="__codelineno-0-1350"></a> <span class="k">return</span> <span class="kc">True</span><span class="p">,</span> <span class="n">projected_missing_fields</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.compute_statistics_plan" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">compute_statistics_plan</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Compute the statistics plan for all columns.</p> |
| <p>The resulting list is assumed to have the same length and same order as the columns in the pyarrow table. |
| This allows the list to map from the column index to the Iceberg column ID. |
| For each element, the desired metrics collection that was provided by the user in the configuration |
| is computed and then adjusted according to the data type of the column. For nested columns the minimum |
| and maximum values are not computed. And truncation is only applied to text of binary strings.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>table_properties</code> |
| </td> |
| <td> |
| <code>from pyiceberg.table.metadata.TableMetadata</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The Iceberg table metadata properties. |
| They are required to compute the mapping of column position to iceberg schema type id. It's also |
| used to set the mode for column metrics collection</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2092">2092</a></span> |
| <span class="normal"><a href="#__codelineno-0-2093">2093</a></span> |
| <span class="normal"><a href="#__codelineno-0-2094">2094</a></span> |
| <span class="normal"><a href="#__codelineno-0-2095">2095</a></span> |
| <span class="normal"><a href="#__codelineno-0-2096">2096</a></span> |
| <span class="normal"><a href="#__codelineno-0-2097">2097</a></span> |
| <span class="normal"><a href="#__codelineno-0-2098">2098</a></span> |
| <span class="normal"><a href="#__codelineno-0-2099">2099</a></span> |
| <span class="normal"><a href="#__codelineno-0-2100">2100</a></span> |
| <span class="normal"><a href="#__codelineno-0-2101">2101</a></span> |
| <span class="normal"><a href="#__codelineno-0-2102">2102</a></span> |
| <span class="normal"><a href="#__codelineno-0-2103">2103</a></span> |
| <span class="normal"><a href="#__codelineno-0-2104">2104</a></span> |
| <span class="normal"><a href="#__codelineno-0-2105">2105</a></span> |
| <span class="normal"><a href="#__codelineno-0-2106">2106</a></span> |
| <span class="normal"><a href="#__codelineno-0-2107">2107</a></span> |
| <span class="normal"><a href="#__codelineno-0-2108">2108</a></span> |
| <span class="normal"><a href="#__codelineno-0-2109">2109</a></span> |
| <span class="normal"><a href="#__codelineno-0-2110">2110</a></span> |
| <span class="normal"><a href="#__codelineno-0-2111">2111</a></span> |
| <span class="normal"><a href="#__codelineno-0-2112">2112</a></span> |
| <span class="normal"><a href="#__codelineno-0-2113">2113</a></span> |
| <span class="normal"><a href="#__codelineno-0-2114">2114</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2092" name="__codelineno-0-2092"></a><span class="k">def</span><span class="w"> </span><span class="nf">compute_statistics_plan</span><span class="p">(</span> |
| <a id="__codelineno-0-2093" name="__codelineno-0-2093"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> |
| <a id="__codelineno-0-2094" name="__codelineno-0-2094"></a> <span class="n">table_properties</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> |
| <a id="__codelineno-0-2095" name="__codelineno-0-2095"></a><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]:</span> |
| <a id="__codelineno-0-2096" name="__codelineno-0-2096"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-2097" name="__codelineno-0-2097"></a><span class="sd"> Compute the statistics plan for all columns.</span> |
| <a id="__codelineno-0-2098" name="__codelineno-0-2098"></a> |
| <a id="__codelineno-0-2099" name="__codelineno-0-2099"></a><span class="sd"> The resulting list is assumed to have the same length and same order as the columns in the pyarrow table.</span> |
| <a id="__codelineno-0-2100" name="__codelineno-0-2100"></a><span class="sd"> This allows the list to map from the column index to the Iceberg column ID.</span> |
| <a id="__codelineno-0-2101" name="__codelineno-0-2101"></a><span class="sd"> For each element, the desired metrics collection that was provided by the user in the configuration</span> |
| <a id="__codelineno-0-2102" name="__codelineno-0-2102"></a><span class="sd"> is computed and then adjusted according to the data type of the column. For nested columns the minimum</span> |
| <a id="__codelineno-0-2103" name="__codelineno-0-2103"></a><span class="sd"> and maximum values are not computed. And truncation is only applied to text of binary strings.</span> |
| <a id="__codelineno-0-2104" name="__codelineno-0-2104"></a> |
| <a id="__codelineno-0-2105" name="__codelineno-0-2105"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-2106" name="__codelineno-0-2106"></a><span class="sd"> table_properties (from pyiceberg.table.metadata.TableMetadata): The Iceberg table metadata properties.</span> |
| <a id="__codelineno-0-2107" name="__codelineno-0-2107"></a><span class="sd"> They are required to compute the mapping of column position to iceberg schema type id. It's also</span> |
| <a id="__codelineno-0-2108" name="__codelineno-0-2108"></a><span class="sd"> used to set the mode for column metrics collection</span> |
| <a id="__codelineno-0-2109" name="__codelineno-0-2109"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2110" name="__codelineno-0-2110"></a> <span class="n">stats_cols</span> <span class="o">=</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">PyArrowStatisticsCollector</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">))</span> |
| <a id="__codelineno-0-2111" name="__codelineno-0-2111"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2112" name="__codelineno-0-2112"></a> <span class="k">for</span> <span class="n">stats_col</span> <span class="ow">in</span> <span class="n">stats_cols</span><span class="p">:</span> |
| <a id="__codelineno-0-2113" name="__codelineno-0-2113"></a> <span class="n">result</span><span class="p">[</span><span class="n">stats_col</span><span class="o">.</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">stats_col</span> |
| <a id="__codelineno-0-2114" name="__codelineno-0-2114"></a> <span class="k">return</span> <span class="n">result</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">data_file_statistics_from_parquet_metadata</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="p">,</span> <span class="n">stats_columns</span><span class="p">,</span> <span class="n">parquet_column_mapping</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Compute and return DataFileStatistics that includes the following.</p> |
| <ul> |
| <li>record_count</li> |
| <li>column_sizes</li> |
| <li>value_counts</li> |
| <li>null_value_counts</li> |
| <li>nan_value_counts</li> |
| <li>column_aggregates</li> |
| <li>split_offsets</li> |
| </ul> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>parquet_metadata</code> |
| </td> |
| <td> |
| <code><span title="pyarrow.parquet.FileMetaData">FileMetaData</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>A pyarrow metadata object.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code>stats_columns</code> |
| </td> |
| <td> |
| <code><span title="typing.Dict">Dict</span>[<span title="int">int</span>, <span title="pyiceberg.io.pyarrow.StatisticsCollector">StatisticsCollector</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The statistics gathering plan. It is required to |
| set the mode for column metrics collection</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code>parquet_column_mapping</code> |
| </td> |
| <td> |
| <code><span title="typing.Dict">Dict</span>[<span title="str">str</span>, <span title="int">int</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The mapping of the parquet file name to the field ID</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2253">2253</a></span> |
| <span class="normal"><a href="#__codelineno-0-2254">2254</a></span> |
| <span class="normal"><a href="#__codelineno-0-2255">2255</a></span> |
| <span class="normal"><a href="#__codelineno-0-2256">2256</a></span> |
| <span class="normal"><a href="#__codelineno-0-2257">2257</a></span> |
| <span class="normal"><a href="#__codelineno-0-2258">2258</a></span> |
| <span class="normal"><a href="#__codelineno-0-2259">2259</a></span> |
| <span class="normal"><a href="#__codelineno-0-2260">2260</a></span> |
| <span class="normal"><a href="#__codelineno-0-2261">2261</a></span> |
| <span class="normal"><a href="#__codelineno-0-2262">2262</a></span> |
| <span class="normal"><a href="#__codelineno-0-2263">2263</a></span> |
| <span class="normal"><a href="#__codelineno-0-2264">2264</a></span> |
| <span class="normal"><a href="#__codelineno-0-2265">2265</a></span> |
| <span class="normal"><a href="#__codelineno-0-2266">2266</a></span> |
| <span class="normal"><a href="#__codelineno-0-2267">2267</a></span> |
| <span class="normal"><a href="#__codelineno-0-2268">2268</a></span> |
| <span class="normal"><a href="#__codelineno-0-2269">2269</a></span> |
| <span class="normal"><a href="#__codelineno-0-2270">2270</a></span> |
| <span class="normal"><a href="#__codelineno-0-2271">2271</a></span> |
| <span class="normal"><a href="#__codelineno-0-2272">2272</a></span> |
| <span class="normal"><a href="#__codelineno-0-2273">2273</a></span> |
| <span class="normal"><a href="#__codelineno-0-2274">2274</a></span> |
| <span class="normal"><a href="#__codelineno-0-2275">2275</a></span> |
| <span class="normal"><a href="#__codelineno-0-2276">2276</a></span> |
| <span class="normal"><a href="#__codelineno-0-2277">2277</a></span> |
| <span class="normal"><a href="#__codelineno-0-2278">2278</a></span> |
| <span class="normal"><a href="#__codelineno-0-2279">2279</a></span> |
| <span class="normal"><a href="#__codelineno-0-2280">2280</a></span> |
| <span class="normal"><a href="#__codelineno-0-2281">2281</a></span> |
| <span class="normal"><a href="#__codelineno-0-2282">2282</a></span> |
| <span class="normal"><a href="#__codelineno-0-2283">2283</a></span> |
| <span class="normal"><a href="#__codelineno-0-2284">2284</a></span> |
| <span class="normal"><a href="#__codelineno-0-2285">2285</a></span> |
| <span class="normal"><a href="#__codelineno-0-2286">2286</a></span> |
| <span class="normal"><a href="#__codelineno-0-2287">2287</a></span> |
| <span class="normal"><a href="#__codelineno-0-2288">2288</a></span> |
| <span class="normal"><a href="#__codelineno-0-2289">2289</a></span> |
| <span class="normal"><a href="#__codelineno-0-2290">2290</a></span> |
| <span class="normal"><a href="#__codelineno-0-2291">2291</a></span> |
| <span class="normal"><a href="#__codelineno-0-2292">2292</a></span> |
| <span class="normal"><a href="#__codelineno-0-2293">2293</a></span> |
| <span class="normal"><a href="#__codelineno-0-2294">2294</a></span> |
| <span class="normal"><a href="#__codelineno-0-2295">2295</a></span> |
| <span class="normal"><a href="#__codelineno-0-2296">2296</a></span> |
| <span class="normal"><a href="#__codelineno-0-2297">2297</a></span> |
| <span class="normal"><a href="#__codelineno-0-2298">2298</a></span> |
| <span class="normal"><a href="#__codelineno-0-2299">2299</a></span> |
| <span class="normal"><a href="#__codelineno-0-2300">2300</a></span> |
| <span class="normal"><a href="#__codelineno-0-2301">2301</a></span> |
| <span class="normal"><a href="#__codelineno-0-2302">2302</a></span> |
| <span class="normal"><a href="#__codelineno-0-2303">2303</a></span> |
| <span class="normal"><a href="#__codelineno-0-2304">2304</a></span> |
| <span class="normal"><a href="#__codelineno-0-2305">2305</a></span> |
| <span class="normal"><a href="#__codelineno-0-2306">2306</a></span> |
| <span class="normal"><a href="#__codelineno-0-2307">2307</a></span> |
| <span class="normal"><a href="#__codelineno-0-2308">2308</a></span> |
| <span class="normal"><a href="#__codelineno-0-2309">2309</a></span> |
| <span class="normal"><a href="#__codelineno-0-2310">2310</a></span> |
| <span class="normal"><a href="#__codelineno-0-2311">2311</a></span> |
| <span class="normal"><a href="#__codelineno-0-2312">2312</a></span> |
| <span class="normal"><a href="#__codelineno-0-2313">2313</a></span> |
| <span class="normal"><a href="#__codelineno-0-2314">2314</a></span> |
| <span class="normal"><a href="#__codelineno-0-2315">2315</a></span> |
| <span class="normal"><a href="#__codelineno-0-2316">2316</a></span> |
| <span class="normal"><a href="#__codelineno-0-2317">2317</a></span> |
| <span class="normal"><a href="#__codelineno-0-2318">2318</a></span> |
| <span class="normal"><a href="#__codelineno-0-2319">2319</a></span> |
| <span class="normal"><a href="#__codelineno-0-2320">2320</a></span> |
| <span class="normal"><a href="#__codelineno-0-2321">2321</a></span> |
| <span class="normal"><a href="#__codelineno-0-2322">2322</a></span> |
| <span class="normal"><a href="#__codelineno-0-2323">2323</a></span> |
| <span class="normal"><a href="#__codelineno-0-2324">2324</a></span> |
| <span class="normal"><a href="#__codelineno-0-2325">2325</a></span> |
| <span class="normal"><a href="#__codelineno-0-2326">2326</a></span> |
| <span class="normal"><a href="#__codelineno-0-2327">2327</a></span> |
| <span class="normal"><a href="#__codelineno-0-2328">2328</a></span> |
| <span class="normal"><a href="#__codelineno-0-2329">2329</a></span> |
| <span class="normal"><a href="#__codelineno-0-2330">2330</a></span> |
| <span class="normal"><a href="#__codelineno-0-2331">2331</a></span> |
| <span class="normal"><a href="#__codelineno-0-2332">2332</a></span> |
| <span class="normal"><a href="#__codelineno-0-2333">2333</a></span> |
| <span class="normal"><a href="#__codelineno-0-2334">2334</a></span> |
| <span class="normal"><a href="#__codelineno-0-2335">2335</a></span> |
| <span class="normal"><a href="#__codelineno-0-2336">2336</a></span> |
| <span class="normal"><a href="#__codelineno-0-2337">2337</a></span> |
| <span class="normal"><a href="#__codelineno-0-2338">2338</a></span> |
| <span class="normal"><a href="#__codelineno-0-2339">2339</a></span> |
| <span class="normal"><a href="#__codelineno-0-2340">2340</a></span> |
| <span class="normal"><a href="#__codelineno-0-2341">2341</a></span> |
| <span class="normal"><a href="#__codelineno-0-2342">2342</a></span> |
| <span class="normal"><a href="#__codelineno-0-2343">2343</a></span> |
| <span class="normal"><a href="#__codelineno-0-2344">2344</a></span> |
| <span class="normal"><a href="#__codelineno-0-2345">2345</a></span> |
| <span class="normal"><a href="#__codelineno-0-2346">2346</a></span> |
| <span class="normal"><a href="#__codelineno-0-2347">2347</a></span> |
| <span class="normal"><a href="#__codelineno-0-2348">2348</a></span> |
| <span class="normal"><a href="#__codelineno-0-2349">2349</a></span> |
| <span class="normal"><a href="#__codelineno-0-2350">2350</a></span> |
| <span class="normal"><a href="#__codelineno-0-2351">2351</a></span> |
| <span class="normal"><a href="#__codelineno-0-2352">2352</a></span> |
| <span class="normal"><a href="#__codelineno-0-2353">2353</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2253" name="__codelineno-0-2253"></a><span class="k">def</span><span class="w"> </span><span class="nf">data_file_statistics_from_parquet_metadata</span><span class="p">(</span> |
| <a id="__codelineno-0-2254" name="__codelineno-0-2254"></a> <span class="n">parquet_metadata</span><span class="p">:</span> <span class="n">pq</span><span class="o">.</span><span class="n">FileMetaData</span><span class="p">,</span> |
| <a id="__codelineno-0-2255" name="__codelineno-0-2255"></a> <span class="n">stats_columns</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">],</span> |
| <a id="__codelineno-0-2256" name="__codelineno-0-2256"></a> <span class="n">parquet_column_mapping</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span> |
| <a id="__codelineno-0-2257" name="__codelineno-0-2257"></a><span class="p">)</span> <span class="o">-></span> <span class="n">DataFileStatistics</span><span class="p">:</span> |
| <a id="__codelineno-0-2258" name="__codelineno-0-2258"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-2259" name="__codelineno-0-2259"></a><span class="sd"> Compute and return DataFileStatistics that includes the following.</span> |
| <a id="__codelineno-0-2260" name="__codelineno-0-2260"></a> |
| <a id="__codelineno-0-2261" name="__codelineno-0-2261"></a><span class="sd"> - record_count</span> |
| <a id="__codelineno-0-2262" name="__codelineno-0-2262"></a><span class="sd"> - column_sizes</span> |
| <a id="__codelineno-0-2263" name="__codelineno-0-2263"></a><span class="sd"> - value_counts</span> |
| <a id="__codelineno-0-2264" name="__codelineno-0-2264"></a><span class="sd"> - null_value_counts</span> |
| <a id="__codelineno-0-2265" name="__codelineno-0-2265"></a><span class="sd"> - nan_value_counts</span> |
| <a id="__codelineno-0-2266" name="__codelineno-0-2266"></a><span class="sd"> - column_aggregates</span> |
| <a id="__codelineno-0-2267" name="__codelineno-0-2267"></a><span class="sd"> - split_offsets</span> |
| <a id="__codelineno-0-2268" name="__codelineno-0-2268"></a> |
| <a id="__codelineno-0-2269" name="__codelineno-0-2269"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-2270" name="__codelineno-0-2270"></a><span class="sd"> parquet_metadata (pyarrow.parquet.FileMetaData): A pyarrow metadata object.</span> |
| <a id="__codelineno-0-2271" name="__codelineno-0-2271"></a><span class="sd"> stats_columns (Dict[int, StatisticsCollector]): The statistics gathering plan. It is required to</span> |
| <a id="__codelineno-0-2272" name="__codelineno-0-2272"></a><span class="sd"> set the mode for column metrics collection</span> |
| <a id="__codelineno-0-2273" name="__codelineno-0-2273"></a><span class="sd"> parquet_column_mapping (Dict[str, int]): The mapping of the parquet file name to the field ID</span> |
| <a id="__codelineno-0-2274" name="__codelineno-0-2274"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2275" name="__codelineno-0-2275"></a> <span class="n">column_sizes</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2276" name="__codelineno-0-2276"></a> <span class="n">value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2277" name="__codelineno-0-2277"></a> <span class="n">split_offsets</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span> |
| <a id="__codelineno-0-2278" name="__codelineno-0-2278"></a> |
| <a id="__codelineno-0-2279" name="__codelineno-0-2279"></a> <span class="n">null_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2280" name="__codelineno-0-2280"></a> <span class="n">nan_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2281" name="__codelineno-0-2281"></a> |
| <a id="__codelineno-0-2282" name="__codelineno-0-2282"></a> <span class="n">col_aggs</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2283" name="__codelineno-0-2283"></a> |
| <a id="__codelineno-0-2284" name="__codelineno-0-2284"></a> <span class="n">invalidate_col</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span> |
| <a id="__codelineno-0-2285" name="__codelineno-0-2285"></a> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_row_groups</span><span class="p">):</span> |
| <a id="__codelineno-0-2286" name="__codelineno-0-2286"></a> <span class="c1"># References:</span> |
| <a id="__codelineno-0-2287" name="__codelineno-0-2287"></a> <span class="c1"># https://github.com/apache/iceberg/blob/fc381a81a1fdb8f51a0637ca27cd30673bd7aad3/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java#L232</span> |
| <a id="__codelineno-0-2288" name="__codelineno-0-2288"></a> <span class="c1"># https://github.com/apache/parquet-mr/blob/ac29db4611f86a07cc6877b416aa4b183e09b353/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java#L184</span> |
| <a id="__codelineno-0-2289" name="__codelineno-0-2289"></a> |
| <a id="__codelineno-0-2290" name="__codelineno-0-2290"></a> <span class="n">row_group</span> <span class="o">=</span> <span class="n">parquet_metadata</span><span class="o">.</span><span class="n">row_group</span><span class="p">(</span><span class="n">r</span><span class="p">)</span> |
| <a id="__codelineno-0-2291" name="__codelineno-0-2291"></a> |
| <a id="__codelineno-0-2292" name="__codelineno-0-2292"></a> <span class="n">data_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">data_page_offset</span> |
| <a id="__codelineno-0-2293" name="__codelineno-0-2293"></a> <span class="n">dictionary_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">dictionary_page_offset</span> |
| <a id="__codelineno-0-2294" name="__codelineno-0-2294"></a> |
| <a id="__codelineno-0-2295" name="__codelineno-0-2295"></a> <span class="k">if</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">has_dictionary_page</span> <span class="ow">and</span> <span class="n">dictionary_offset</span> <span class="o"><</span> <span class="n">data_offset</span><span class="p">:</span> |
| <a id="__codelineno-0-2296" name="__codelineno-0-2296"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dictionary_offset</span><span class="p">)</span> |
| <a id="__codelineno-0-2297" name="__codelineno-0-2297"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-2298" name="__codelineno-0-2298"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">data_offset</span><span class="p">)</span> |
| <a id="__codelineno-0-2299" name="__codelineno-0-2299"></a> |
| <a id="__codelineno-0-2300" name="__codelineno-0-2300"></a> <span class="k">for</span> <span class="n">pos</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_columns</span><span class="p">):</span> |
| <a id="__codelineno-0-2301" name="__codelineno-0-2301"></a> <span class="n">column</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="n">pos</span><span class="p">)</span> |
| <a id="__codelineno-0-2302" name="__codelineno-0-2302"></a> <span class="n">field_id</span> <span class="o">=</span> <span class="n">parquet_column_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">path_in_schema</span><span class="p">]</span> |
| <a id="__codelineno-0-2303" name="__codelineno-0-2303"></a> |
| <a id="__codelineno-0-2304" name="__codelineno-0-2304"></a> <span class="n">stats_col</span> <span class="o">=</span> <span class="n">stats_columns</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> |
| <a id="__codelineno-0-2305" name="__codelineno-0-2305"></a> |
| <a id="__codelineno-0-2306" name="__codelineno-0-2306"></a> <span class="n">column_sizes</span><span class="o">.</span><span class="n">setdefault</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> |
| <a id="__codelineno-0-2307" name="__codelineno-0-2307"></a> <span class="n">column_sizes</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">+=</span> <span class="n">column</span><span class="o">.</span><span class="n">total_compressed_size</span> |
| <a id="__codelineno-0-2308" name="__codelineno-0-2308"></a> |
| <a id="__codelineno-0-2309" name="__codelineno-0-2309"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">NONE</span><span class="p">):</span> |
| <a id="__codelineno-0-2310" name="__codelineno-0-2310"></a> <span class="k">continue</span> |
| <a id="__codelineno-0-2311" name="__codelineno-0-2311"></a> |
| <a id="__codelineno-0-2312" name="__codelineno-0-2312"></a> <span class="n">value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">column</span><span class="o">.</span><span class="n">num_values</span> |
| <a id="__codelineno-0-2313" name="__codelineno-0-2313"></a> |
| <a id="__codelineno-0-2314" name="__codelineno-0-2314"></a> <span class="k">if</span> <span class="n">column</span><span class="o">.</span><span class="n">is_stats_set</span><span class="p">:</span> |
| <a id="__codelineno-0-2315" name="__codelineno-0-2315"></a> <span class="k">try</span><span class="p">:</span> |
| <a id="__codelineno-0-2316" name="__codelineno-0-2316"></a> <span class="n">statistics</span> <span class="o">=</span> <span class="n">column</span><span class="o">.</span><span class="n">statistics</span> |
| <a id="__codelineno-0-2317" name="__codelineno-0-2317"></a> |
| <a id="__codelineno-0-2318" name="__codelineno-0-2318"></a> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">has_null_count</span><span class="p">:</span> |
| <a id="__codelineno-0-2319" name="__codelineno-0-2319"></a> <span class="n">null_value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">statistics</span><span class="o">.</span><span class="n">null_count</span> |
| <a id="__codelineno-0-2320" name="__codelineno-0-2320"></a> |
| <a id="__codelineno-0-2321" name="__codelineno-0-2321"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">COUNTS</span><span class="p">):</span> |
| <a id="__codelineno-0-2322" name="__codelineno-0-2322"></a> <span class="k">continue</span> |
| <a id="__codelineno-0-2323" name="__codelineno-0-2323"></a> |
| <a id="__codelineno-0-2324" name="__codelineno-0-2324"></a> <span class="k">if</span> <span class="n">field_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">col_aggs</span><span class="p">:</span> |
| <a id="__codelineno-0-2325" name="__codelineno-0-2325"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">StatsAggregator</span><span class="p">(</span> |
| <a id="__codelineno-0-2326" name="__codelineno-0-2326"></a> <span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="p">,</span> <span class="n">statistics</span><span class="o">.</span><span class="n">physical_type</span><span class="p">,</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span><span class="o">.</span><span class="n">length</span> |
| <a id="__codelineno-0-2327" name="__codelineno-0-2327"></a> <span class="p">)</span> |
| <a id="__codelineno-0-2328" name="__codelineno-0-2328"></a> |
| <a id="__codelineno-0-2329" name="__codelineno-0-2329"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_min</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">min</span><span class="p">)</span> |
| <a id="__codelineno-0-2330" name="__codelineno-0-2330"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_max</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">max</span><span class="p">)</span> |
| <a id="__codelineno-0-2331" name="__codelineno-0-2331"></a> |
| <a id="__codelineno-0-2332" name="__codelineno-0-2332"></a> <span class="k">except</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">lib</span><span class="o">.</span><span class="n">ArrowNotImplementedError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> |
| <a id="__codelineno-0-2333" name="__codelineno-0-2333"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span> |
| <a id="__codelineno-0-2334" name="__codelineno-0-2334"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> |
| <a id="__codelineno-0-2335" name="__codelineno-0-2335"></a> <span class="k">else</span><span class="p">:</span> |
| <a id="__codelineno-0-2336" name="__codelineno-0-2336"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span> |
| <a id="__codelineno-0-2337" name="__codelineno-0-2337"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"PyArrow statistics missing for column </span><span class="si">%d</span><span class="s2"> when writing file"</span><span class="p">,</span> <span class="n">pos</span><span class="p">)</span> |
| <a id="__codelineno-0-2338" name="__codelineno-0-2338"></a> |
| <a id="__codelineno-0-2339" name="__codelineno-0-2339"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span> |
| <a id="__codelineno-0-2340" name="__codelineno-0-2340"></a> |
| <a id="__codelineno-0-2341" name="__codelineno-0-2341"></a> <span class="k">for</span> <span class="n">field_id</span> <span class="ow">in</span> <span class="n">invalidate_col</span><span class="p">:</span> |
| <a id="__codelineno-0-2342" name="__codelineno-0-2342"></a> <span class="n">col_aggs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <a id="__codelineno-0-2343" name="__codelineno-0-2343"></a> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> |
| <a id="__codelineno-0-2344" name="__codelineno-0-2344"></a> |
| <a id="__codelineno-0-2345" name="__codelineno-0-2345"></a> <span class="k">return</span> <span class="n">DataFileStatistics</span><span class="p">(</span> |
| <a id="__codelineno-0-2346" name="__codelineno-0-2346"></a> <span class="n">record_count</span><span class="o">=</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_rows</span><span class="p">,</span> |
| <a id="__codelineno-0-2347" name="__codelineno-0-2347"></a> <span class="n">column_sizes</span><span class="o">=</span><span class="n">column_sizes</span><span class="p">,</span> |
| <a id="__codelineno-0-2348" name="__codelineno-0-2348"></a> <span class="n">value_counts</span><span class="o">=</span><span class="n">value_counts</span><span class="p">,</span> |
| <a id="__codelineno-0-2349" name="__codelineno-0-2349"></a> <span class="n">null_value_counts</span><span class="o">=</span><span class="n">null_value_counts</span><span class="p">,</span> |
| <a id="__codelineno-0-2350" name="__codelineno-0-2350"></a> <span class="n">nan_value_counts</span><span class="o">=</span><span class="n">nan_value_counts</span><span class="p">,</span> |
| <a id="__codelineno-0-2351" name="__codelineno-0-2351"></a> <span class="n">column_aggregates</span><span class="o">=</span><span class="n">col_aggs</span><span class="p">,</span> |
| <a id="__codelineno-0-2352" name="__codelineno-0-2352"></a> <span class="n">split_offsets</span><span class="o">=</span><span class="n">split_offsets</span><span class="p">,</span> |
| <a id="__codelineno-0-2353" name="__codelineno-0-2353"></a> <span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">parquet_path_to_id_mapping</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Compute the mapping of parquet column path to Iceberg ID.</p> |
| <p>For each column, the parquet file metadata has a path_in_schema attribute that follows |
| a specific naming scheme for nested columnds. This function computes a mapping of |
| the full paths to the corresponding Iceberg IDs.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>schema</code> |
| </td> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="pyiceberg.schema.Schema" href="../../schema/#pyiceberg.schema.Schema">Schema</a></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>The current table schema.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2170">2170</a></span> |
| <span class="normal"><a href="#__codelineno-0-2171">2171</a></span> |
| <span class="normal"><a href="#__codelineno-0-2172">2172</a></span> |
| <span class="normal"><a href="#__codelineno-0-2173">2173</a></span> |
| <span class="normal"><a href="#__codelineno-0-2174">2174</a></span> |
| <span class="normal"><a href="#__codelineno-0-2175">2175</a></span> |
| <span class="normal"><a href="#__codelineno-0-2176">2176</a></span> |
| <span class="normal"><a href="#__codelineno-0-2177">2177</a></span> |
| <span class="normal"><a href="#__codelineno-0-2178">2178</a></span> |
| <span class="normal"><a href="#__codelineno-0-2179">2179</a></span> |
| <span class="normal"><a href="#__codelineno-0-2180">2180</a></span> |
| <span class="normal"><a href="#__codelineno-0-2181">2181</a></span> |
| <span class="normal"><a href="#__codelineno-0-2182">2182</a></span> |
| <span class="normal"><a href="#__codelineno-0-2183">2183</a></span> |
| <span class="normal"><a href="#__codelineno-0-2184">2184</a></span> |
| <span class="normal"><a href="#__codelineno-0-2185">2185</a></span> |
| <span class="normal"><a href="#__codelineno-0-2186">2186</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2170" name="__codelineno-0-2170"></a><span class="k">def</span><span class="w"> </span><span class="nf">parquet_path_to_id_mapping</span><span class="p">(</span> |
| <a id="__codelineno-0-2171" name="__codelineno-0-2171"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> |
| <a id="__codelineno-0-2172" name="__codelineno-0-2172"></a><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span> |
| <a id="__codelineno-0-2173" name="__codelineno-0-2173"></a><span class="w"> </span><span class="sd">"""</span> |
| <a id="__codelineno-0-2174" name="__codelineno-0-2174"></a><span class="sd"> Compute the mapping of parquet column path to Iceberg ID.</span> |
| <a id="__codelineno-0-2175" name="__codelineno-0-2175"></a> |
| <a id="__codelineno-0-2176" name="__codelineno-0-2176"></a><span class="sd"> For each column, the parquet file metadata has a path_in_schema attribute that follows</span> |
| <a id="__codelineno-0-2177" name="__codelineno-0-2177"></a><span class="sd"> a specific naming scheme for nested columnds. This function computes a mapping of</span> |
| <a id="__codelineno-0-2178" name="__codelineno-0-2178"></a><span class="sd"> the full paths to the corresponding Iceberg IDs.</span> |
| <a id="__codelineno-0-2179" name="__codelineno-0-2179"></a> |
| <a id="__codelineno-0-2180" name="__codelineno-0-2180"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-2181" name="__codelineno-0-2181"></a><span class="sd"> schema (pyiceberg.schema.Schema): The current table schema.</span> |
| <a id="__codelineno-0-2182" name="__codelineno-0-2182"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-2183" name="__codelineno-0-2183"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> |
| <a id="__codelineno-0-2184" name="__codelineno-0-2184"></a> <span class="k">for</span> <span class="n">pair</span> <span class="ow">in</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">ID2ParquetPathVisitor</span><span class="p">()):</span> |
| <a id="__codelineno-0-2185" name="__codelineno-0-2185"></a> <span class="n">result</span><span class="p">[</span><span class="n">pair</span><span class="o">.</span><span class="n">parquet_path</span><span class="p">]</span> <span class="o">=</span> <span class="n">pair</span><span class="o">.</span><span class="n">field_id</span> |
| <a id="__codelineno-0-2186" name="__codelineno-0-2186"></a> <span class="k">return</span> <span class="n">result</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| <div class="doc doc-object doc-function"> |
| |
| |
| <h2 id="pyiceberg.io.pyarrow.visit_pyarrow" class="doc doc-heading"> |
| <code class="highlight language-python"><span class="n">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">visitor</span><span class="p">)</span></code> |
| |
| <a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="headerlink" title="Permanent link">¶</a></h2> |
| |
| |
| <div class="doc doc-contents "> |
| |
| <p>Apply a pyarrow schema visitor to any point within a schema.</p> |
| <p>The function traverses the schema in post-order fashion.</p> |
| |
| |
| <p><span class="doc-section-title">Parameters:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Name</th> |
| <th>Type</th> |
| <th>Description</th> |
| <th>Default</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code>obj</code> |
| </td> |
| <td> |
| <code><span title="typing.Union">Union</span>[<span title="pyarrow.DataType">DataType</span>, <span title="pyarrow.Schema">Schema</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>An instance of a Schema or an IcebergType.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| <tr class="doc-section-item"> |
| <td> |
| <code>visitor</code> |
| </td> |
| <td> |
| <code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor">PyArrowSchemaVisitor</a>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>An instance of an implementation of the generic PyarrowSchemaVisitor base class.</p> |
| </div> |
| </td> |
| <td> |
| <em>required</em> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| |
| <p><span class="doc-section-title">Raises:</span></p> |
| <table> |
| <thead> |
| <tr> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr class="doc-section-item"> |
| <td> |
| <code><span title="NotImplementedError">NotImplementedError</span></code> |
| </td> |
| <td> |
| <div class="doc-md-description"> |
| <p>If attempting to visit an unrecognized object type.</p> |
| </div> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| |
| <details class="quote"> |
| <summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary> |
| <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-962">962</a></span> |
| <span class="normal"><a href="#__codelineno-0-963">963</a></span> |
| <span class="normal"><a href="#__codelineno-0-964">964</a></span> |
| <span class="normal"><a href="#__codelineno-0-965">965</a></span> |
| <span class="normal"><a href="#__codelineno-0-966">966</a></span> |
| <span class="normal"><a href="#__codelineno-0-967">967</a></span> |
| <span class="normal"><a href="#__codelineno-0-968">968</a></span> |
| <span class="normal"><a href="#__codelineno-0-969">969</a></span> |
| <span class="normal"><a href="#__codelineno-0-970">970</a></span> |
| <span class="normal"><a href="#__codelineno-0-971">971</a></span> |
| <span class="normal"><a href="#__codelineno-0-972">972</a></span> |
| <span class="normal"><a href="#__codelineno-0-973">973</a></span> |
| <span class="normal"><a href="#__codelineno-0-974">974</a></span> |
| <span class="normal"><a href="#__codelineno-0-975">975</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-962" name="__codelineno-0-962"></a><span class="nd">@singledispatch</span> |
| <a id="__codelineno-0-963" name="__codelineno-0-963"></a><span class="k">def</span><span class="w"> </span><span class="nf">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">],</span> <span class="n">visitor</span><span class="p">:</span> <span class="n">PyArrowSchemaVisitor</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-></span> <span class="n">T</span><span class="p">:</span> |
| <a id="__codelineno-0-964" name="__codelineno-0-964"></a><span class="w"> </span><span class="sd">"""Apply a pyarrow schema visitor to any point within a schema.</span> |
| <a id="__codelineno-0-965" name="__codelineno-0-965"></a> |
| <a id="__codelineno-0-966" name="__codelineno-0-966"></a><span class="sd"> The function traverses the schema in post-order fashion.</span> |
| <a id="__codelineno-0-967" name="__codelineno-0-967"></a> |
| <a id="__codelineno-0-968" name="__codelineno-0-968"></a><span class="sd"> Args:</span> |
| <a id="__codelineno-0-969" name="__codelineno-0-969"></a><span class="sd"> obj (Union[pa.DataType, pa.Schema]): An instance of a Schema or an IcebergType.</span> |
| <a id="__codelineno-0-970" name="__codelineno-0-970"></a><span class="sd"> visitor (PyArrowSchemaVisitor[T]): An instance of an implementation of the generic PyarrowSchemaVisitor base class.</span> |
| <a id="__codelineno-0-971" name="__codelineno-0-971"></a> |
| <a id="__codelineno-0-972" name="__codelineno-0-972"></a><span class="sd"> Raises:</span> |
| <a id="__codelineno-0-973" name="__codelineno-0-973"></a><span class="sd"> NotImplementedError: If attempting to visit an unrecognized object type.</span> |
| <a id="__codelineno-0-974" name="__codelineno-0-974"></a><span class="sd"> """</span> |
| <a id="__codelineno-0-975" name="__codelineno-0-975"></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Cannot visit non-type: </span><span class="si">{</span><span class="n">obj</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| </code></pre></div></td></tr></table></div> |
| </details> |
| </div> |
| |
| </div> |
| |
| |
| |
| </div> |
| |
| </div> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </article> |
| </div> |
| |
| |
| <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script> |
| </div> |
| |
| <button type="button" class="md-top md-icon" data-md-component="top" hidden> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg> |
| Back to top |
| </button> |
| |
| </main> |
| |
| <footer class="md-footer"> |
| |
| <div class="md-footer-meta md-typeset"> |
| <div class="md-footer-meta__inner md-grid"> |
| <div class="md-copyright"> |
| |
| |
| Made with |
| <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener"> |
| Material for MkDocs |
| </a> |
| |
| </div> |
| |
| </div> |
| </div> |
| </footer> |
| |
| </div> |
| <div class="md-dialog" data-md-component="dialog"> |
| <div class="md-dialog__inner md-typeset"></div> |
| </div> |
| |
| |
| <script id="__config" type="application/json">{"base": "../../../..", "features": ["navigation.top", "navigation.tracking", "navigation.tabs", "navigation.tabs.sticky"], "search": "../../../../assets/javascripts/workers/search.f8cc74c7.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script> |
| |
| |
| <script src="../../../../assets/javascripts/bundle.f1b6f286.min.js"></script> |
| |
| |
| </body> |
| </html> |