blob: 8aa9a08689ddca4be81f2611918d1fc7e10be5ca [file] [log] [blame]
<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://py.iceberg.apache.org/reference/pyiceberg/io/pyarrow/">
<link rel="prev" href="../fsspec/">
<link rel="next" href="../../manifest/">
<link rel="icon" href="../../../../assets/images/iceberg-logo-icon.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.21">
<title>pyarrow - PyIceberg</title>
<link rel="stylesheet" href="../../../../assets/stylesheets/main.2a3383ac.min.css">
<link rel="stylesheet" href="../../../../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Lato:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Lato";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../../../assets/_mkdocstrings.css">
<script>__md_scope=new URL("../../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["setDoNotTrack", true]);
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '82']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo -->
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#pyiceberg.io.pyarrow" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../../../.." title="PyIceberg" class="md-header__button md-logo" aria-label="PyIceberg" data-md-component="logo">
<img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
PyIceberg
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
pyarrow
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
apache/iceberg-python
</div>
</a>
</div>
</nav>
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../../../.." class="md-tabs__link">
Getting started
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../configuration/" class="md-tabs__link">
Configuration
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../cli/" class="md-tabs__link">
CLI
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../api/" class="md-tabs__link">
API
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../contributing/" class="md-tabs__link">
Contributing
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../community/" class="md-tabs__link">
Community
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../verify-release/" class="md-tabs__link">
Releases
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../../" class="md-tabs__link">
Code Reference
</a>
</li>
</ul>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../../.." title="PyIceberg" class="md-nav__button md-logo" aria-label="PyIceberg" data-md-component="logo">
<img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo">
</a>
PyIceberg
</label>
<div class="md-nav__source">
<a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.0.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
apache/iceberg-python
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../.." class="md-nav__link">
<span class="md-ellipsis">
Getting started
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../configuration/" class="md-nav__link">
<span class="md-ellipsis">
Configuration
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../cli/" class="md-nav__link">
<span class="md-ellipsis">
CLI
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
<div class="md-nav__link md-nav__container">
<a href="../../../../api/" class="md-nav__link ">
<span class="md-ellipsis">
API
</span>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
API
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../../row-filter-syntax/" class="md-nav__link">
<span class="md-ellipsis">
Row Filter Syntax
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../expression-dsl/" class="md-nav__link">
<span class="md-ellipsis">
Expression DSL
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../../contributing/" class="md-nav__link">
<span class="md-ellipsis">
Contributing
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../community/" class="md-nav__link">
<span class="md-ellipsis">
Community
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
<span class="md-ellipsis">
Releases
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_7">
<span class="md-nav__icon md-icon"></span>
Releases
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../../verify-release/" class="md-nav__link">
<span class="md-ellipsis">
Verify a release
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../how-to-release/" class="md-nav__link">
<span class="md-ellipsis">
How to release
</span>
</a>
</li>
<li class="md-nav__item">
<a href="https://github.com/apache/iceberg-python/releases" class="md-nav__link">
<span class="md-ellipsis">
Release Notes
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../nightly-build/" class="md-nav__link">
<span class="md-ellipsis">
Nightly Build
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" checked>
<label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="">
<span class="md-ellipsis">
Code Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_8">
<span class="md-nav__icon md-icon"></span>
Code Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1" checked>
<div class="md-nav__link md-nav__container">
<a href="../../" class="md-nav__link ">
<span class="md-ellipsis">
pyiceberg
</span>
</a>
<label class="md-nav__link " for="__nav_8_1" id="__nav_8_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_8_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_8_1">
<span class="md-nav__icon md-icon"></span>
pyiceberg
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1" >
<div class="md-nav__link md-nav__container">
<a href="../../avro/" class="md-nav__link ">
<span class="md-ellipsis">
avro
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_1" id="__nav_8_1_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_1">
<span class="md-nav__icon md-icon"></span>
avro
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1_1" >
<div class="md-nav__link md-nav__container">
<a href="../../avro/codecs/" class="md-nav__link ">
<span class="md-ellipsis">
codecs
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_1_1" id="__nav_8_1_1_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_1_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_1_1">
<span class="md-nav__icon md-icon"></span>
codecs
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../avro/codecs/bzip2/" class="md-nav__link">
<span class="md-ellipsis">
bzip2
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/codec/" class="md-nav__link">
<span class="md-ellipsis">
codec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/deflate/" class="md-nav__link">
<span class="md-ellipsis">
deflate
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/snappy_codec/" class="md-nav__link">
<span class="md-ellipsis">
snappy_codec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/zstandard_codec/" class="md-nav__link">
<span class="md-ellipsis">
zstandard_codec
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../avro/decoder/" class="md-nav__link">
<span class="md-ellipsis">
decoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/encoder/" class="md-nav__link">
<span class="md-ellipsis">
encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/file/" class="md-nav__link">
<span class="md-ellipsis">
file
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/reader/" class="md-nav__link">
<span class="md-ellipsis">
reader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/resolver/" class="md-nav__link">
<span class="md-ellipsis">
resolver
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/writer/" class="md-nav__link">
<span class="md-ellipsis">
writer
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2" >
<div class="md-nav__link md-nav__container">
<a href="../../catalog/" class="md-nav__link ">
<span class="md-ellipsis">
catalog
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_2" id="__nav_8_1_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_2">
<span class="md-nav__icon md-icon"></span>
catalog
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../catalog/bigquery_metastore/" class="md-nav__link">
<span class="md-ellipsis">
bigquery_metastore
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/dynamodb/" class="md-nav__link">
<span class="md-ellipsis">
dynamodb
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/glue/" class="md-nav__link">
<span class="md-ellipsis">
glue
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/hive/" class="md-nav__link">
<span class="md-ellipsis">
hive
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/memory/" class="md-nav__link">
<span class="md-ellipsis">
memory
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/noop/" class="md-nav__link">
<span class="md-ellipsis">
noop
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2_7" >
<div class="md-nav__link md-nav__container">
<a href="../../catalog/rest/" class="md-nav__link ">
<span class="md-ellipsis">
rest
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_2_7" id="__nav_8_1_2_7_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_2_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_2_7">
<span class="md-nav__icon md-icon"></span>
rest
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../catalog/rest/auth/" class="md-nav__link">
<span class="md-ellipsis">
auth
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/rest/response/" class="md-nav__link">
<span class="md-ellipsis">
response
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../catalog/sql/" class="md-nav__link">
<span class="md-ellipsis">
sql
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_3" >
<div class="md-nav__link md-nav__container">
<a href="../../cli/" class="md-nav__link ">
<span class="md-ellipsis">
cli
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_3" id="__nav_8_1_3_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_3">
<span class="md-nav__icon md-icon"></span>
cli
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../cli/console/" class="md-nav__link">
<span class="md-ellipsis">
console
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/output/" class="md-nav__link">
<span class="md-ellipsis">
output
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../conversions/" class="md-nav__link">
<span class="md-ellipsis">
conversions
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../exceptions/" class="md-nav__link">
<span class="md-ellipsis">
exceptions
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_6" >
<div class="md-nav__link md-nav__container">
<a href="../../expressions/" class="md-nav__link ">
<span class="md-ellipsis">
expressions
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_6" id="__nav_8_1_6_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_6">
<span class="md-nav__icon md-icon"></span>
expressions
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../expressions/literals/" class="md-nav__link">
<span class="md-ellipsis">
literals
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../expressions/parser/" class="md-nav__link">
<span class="md-ellipsis">
parser
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../expressions/visitors/" class="md-nav__link">
<span class="md-ellipsis">
visitors
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_7" checked>
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
io
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_7" id="__nav_8_1_7_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_7_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_8_1_7">
<span class="md-nav__icon md-icon"></span>
io
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../fsspec/" class="md-nav__link">
<span class="md-ellipsis">
fsspec
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
pyarrow
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
pyarrow
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow" class="md-nav__link">
<span class="md-ellipsis">
pyarrow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link">
<span class="md-ellipsis">
ArrowScan
</span>
</a>
<nav class="md-nav" aria-label="ArrowScan">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link">
<span class="md-ellipsis">
to_record_batches
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link">
<span class="md-ellipsis">
to_table
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFile
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFile">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link">
<span class="md-ellipsis">
__len__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link">
<span class="md-ellipsis">
create
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link">
<span class="md-ellipsis">
exists
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link">
<span class="md-ellipsis">
open
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link">
<span class="md-ellipsis">
to_input_file
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFileIO
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFileIO">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link">
<span class="md-ellipsis">
__getstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link">
<span class="md-ellipsis">
__setstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link">
<span class="md-ellipsis">
delete
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link">
<span class="md-ellipsis">
new_input
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link">
<span class="md-ellipsis">
new_output
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link">
<span class="md-ellipsis">
parse_location
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link">
<span class="md-ellipsis">
PyArrowSchemaVisitor
</span>
</a>
<nav class="md-nav" aria-label="PyArrowSchemaVisitor">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link">
<span class="md-ellipsis">
after_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link">
<span class="md-ellipsis">
after_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link">
<span class="md-ellipsis">
after_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link">
<span class="md-ellipsis">
after_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link">
<span class="md-ellipsis">
before_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link">
<span class="md-ellipsis">
before_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link">
<span class="md-ellipsis">
before_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link">
<span class="md-ellipsis">
before_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link">
<span class="md-ellipsis">
field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link">
<span class="md-ellipsis">
list
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link">
<span class="md-ellipsis">
map
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link">
<span class="md-ellipsis">
primitive
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link">
<span class="md-ellipsis">
struct
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link">
<span class="md-ellipsis">
UnsupportedPyArrowTypeException
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link">
<span class="md-ellipsis">
compute_statistics_plan
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link">
<span class="md-ellipsis">
data_file_statistics_from_parquet_metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link">
<span class="md-ellipsis">
parquet_path_to_id_mapping
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link">
<span class="md-ellipsis">
visit_pyarrow
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../manifest/" class="md-nav__link">
<span class="md-ellipsis">
manifest
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../partitioning/" class="md-nav__link">
<span class="md-ellipsis">
partitioning
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../schema/" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../serializers/" class="md-nav__link">
<span class="md-ellipsis">
serializers
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12" >
<div class="md-nav__link md-nav__container">
<a href="../../table/" class="md-nav__link ">
<span class="md-ellipsis">
table
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_12" id="__nav_8_1_12_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_12_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_12">
<span class="md-nav__icon md-icon"></span>
table
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../table/inspect/" class="md-nav__link">
<span class="md-ellipsis">
inspect
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/locations/" class="md-nav__link">
<span class="md-ellipsis">
locations
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/maintenance/" class="md-nav__link">
<span class="md-ellipsis">
maintenance
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/metadata/" class="md-nav__link">
<span class="md-ellipsis">
metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/name_mapping/" class="md-nav__link">
<span class="md-ellipsis">
name_mapping
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/puffin/" class="md-nav__link">
<span class="md-ellipsis">
puffin
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/refs/" class="md-nav__link">
<span class="md-ellipsis">
refs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/snapshots/" class="md-nav__link">
<span class="md-ellipsis">
snapshots
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/sorting/" class="md-nav__link">
<span class="md-ellipsis">
sorting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/statistics/" class="md-nav__link">
<span class="md-ellipsis">
statistics
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12_11" >
<div class="md-nav__link md-nav__container">
<a href="../../table/update/" class="md-nav__link ">
<span class="md-ellipsis">
update
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_12_11" id="__nav_8_1_12_11_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_12_11_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_12_11">
<span class="md-nav__icon md-icon"></span>
update
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../table/update/schema/" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/snapshot/" class="md-nav__link">
<span class="md-ellipsis">
snapshot
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/sorting/" class="md-nav__link">
<span class="md-ellipsis">
sorting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/spec/" class="md-nav__link">
<span class="md-ellipsis">
spec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/statistics/" class="md-nav__link">
<span class="md-ellipsis">
statistics
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/validate/" class="md-nav__link">
<span class="md-ellipsis">
validate
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../table/upsert_util/" class="md-nav__link">
<span class="md-ellipsis">
upsert_util
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../transforms/" class="md-nav__link">
<span class="md-ellipsis">
transforms
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../typedef/" class="md-nav__link">
<span class="md-ellipsis">
typedef
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../types/" class="md-nav__link">
<span class="md-ellipsis">
types
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_16" >
<div class="md-nav__link md-nav__container">
<a href="../../utils/" class="md-nav__link ">
<span class="md-ellipsis">
utils
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_16" id="__nav_8_1_16_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_16_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_16">
<span class="md-nav__icon md-icon"></span>
utils
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../utils/bin_packing/" class="md-nav__link">
<span class="md-ellipsis">
bin_packing
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/concurrent/" class="md-nav__link">
<span class="md-ellipsis">
concurrent
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/config/" class="md-nav__link">
<span class="md-ellipsis">
config
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/datetime/" class="md-nav__link">
<span class="md-ellipsis">
datetime
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/decimal/" class="md-nav__link">
<span class="md-ellipsis">
decimal
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/deprecated/" class="md-nav__link">
<span class="md-ellipsis">
deprecated
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/lazydict/" class="md-nav__link">
<span class="md-ellipsis">
lazydict
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/parsing/" class="md-nav__link">
<span class="md-ellipsis">
parsing
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/properties/" class="md-nav__link">
<span class="md-ellipsis">
properties
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/schema_conversion/" class="md-nav__link">
<span class="md-ellipsis">
schema_conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/singleton/" class="md-nav__link">
<span class="md-ellipsis">
singleton
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/truncate/" class="md-nav__link">
<span class="md-ellipsis">
truncate
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow" class="md-nav__link">
<span class="md-ellipsis">
pyarrow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link">
<span class="md-ellipsis">
ArrowScan
</span>
</a>
<nav class="md-nav" aria-label="ArrowScan">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link">
<span class="md-ellipsis">
to_record_batches
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link">
<span class="md-ellipsis">
to_table
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFile
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFile">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link">
<span class="md-ellipsis">
__len__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link">
<span class="md-ellipsis">
create
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link">
<span class="md-ellipsis">
exists
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link">
<span class="md-ellipsis">
open
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link">
<span class="md-ellipsis">
to_input_file
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFileIO
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFileIO">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link">
<span class="md-ellipsis">
__getstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link">
<span class="md-ellipsis">
__setstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link">
<span class="md-ellipsis">
delete
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link">
<span class="md-ellipsis">
new_input
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link">
<span class="md-ellipsis">
new_output
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link">
<span class="md-ellipsis">
parse_location
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link">
<span class="md-ellipsis">
PyArrowSchemaVisitor
</span>
</a>
<nav class="md-nav" aria-label="PyArrowSchemaVisitor">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link">
<span class="md-ellipsis">
after_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link">
<span class="md-ellipsis">
after_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link">
<span class="md-ellipsis">
after_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link">
<span class="md-ellipsis">
after_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link">
<span class="md-ellipsis">
before_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link">
<span class="md-ellipsis">
before_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link">
<span class="md-ellipsis">
before_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link">
<span class="md-ellipsis">
before_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link">
<span class="md-ellipsis">
field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link">
<span class="md-ellipsis">
list
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link">
<span class="md-ellipsis">
map
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link">
<span class="md-ellipsis">
primitive
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link">
<span class="md-ellipsis">
struct
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link">
<span class="md-ellipsis">
UnsupportedPyArrowTypeException
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link">
<span class="md-ellipsis">
compute_statistics_plan
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link">
<span class="md-ellipsis">
data_file_statistics_from_parquet_metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link">
<span class="md-ellipsis">
parquet_path_to_id_mapping
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link">
<span class="md-ellipsis">
visit_pyarrow
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1>pyarrow</h1>
<div class="doc doc-object doc-module">
<a id="pyiceberg.io.pyarrow"></a>
<div class="doc doc-contents first">
<p>FileIO implementation for reading and writing table files that uses pyarrow.fs.</p>
<p>This file contains a FileIO implementation that relies on the filesystem interface provided
by PyArrow. It relies on PyArrow's <code>from_uri</code> method that infers the correct filesystem
type to use. Theoretically, this allows the supported storage types to grow naturally
with the pyarrow library.</p>
<div class="doc doc-children">
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.ArrowScan" class="doc doc-heading">
<code>ArrowScan</code>
<a href="#pyiceberg.io.pyarrow.ArrowScan" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1621">1621</a></span>
<span class="normal"><a href="#__codelineno-0-1622">1622</a></span>
<span class="normal"><a href="#__codelineno-0-1623">1623</a></span>
<span class="normal"><a href="#__codelineno-0-1624">1624</a></span>
<span class="normal"><a href="#__codelineno-0-1625">1625</a></span>
<span class="normal"><a href="#__codelineno-0-1626">1626</a></span>
<span class="normal"><a href="#__codelineno-0-1627">1627</a></span>
<span class="normal"><a href="#__codelineno-0-1628">1628</a></span>
<span class="normal"><a href="#__codelineno-0-1629">1629</a></span>
<span class="normal"><a href="#__codelineno-0-1630">1630</a></span>
<span class="normal"><a href="#__codelineno-0-1631">1631</a></span>
<span class="normal"><a href="#__codelineno-0-1632">1632</a></span>
<span class="normal"><a href="#__codelineno-0-1633">1633</a></span>
<span class="normal"><a href="#__codelineno-0-1634">1634</a></span>
<span class="normal"><a href="#__codelineno-0-1635">1635</a></span>
<span class="normal"><a href="#__codelineno-0-1636">1636</a></span>
<span class="normal"><a href="#__codelineno-0-1637">1637</a></span>
<span class="normal"><a href="#__codelineno-0-1638">1638</a></span>
<span class="normal"><a href="#__codelineno-0-1639">1639</a></span>
<span class="normal"><a href="#__codelineno-0-1640">1640</a></span>
<span class="normal"><a href="#__codelineno-0-1641">1641</a></span>
<span class="normal"><a href="#__codelineno-0-1642">1642</a></span>
<span class="normal"><a href="#__codelineno-0-1643">1643</a></span>
<span class="normal"><a href="#__codelineno-0-1644">1644</a></span>
<span class="normal"><a href="#__codelineno-0-1645">1645</a></span>
<span class="normal"><a href="#__codelineno-0-1646">1646</a></span>
<span class="normal"><a href="#__codelineno-0-1647">1647</a></span>
<span class="normal"><a href="#__codelineno-0-1648">1648</a></span>
<span class="normal"><a href="#__codelineno-0-1649">1649</a></span>
<span class="normal"><a href="#__codelineno-0-1650">1650</a></span>
<span class="normal"><a href="#__codelineno-0-1651">1651</a></span>
<span class="normal"><a href="#__codelineno-0-1652">1652</a></span>
<span class="normal"><a href="#__codelineno-0-1653">1653</a></span>
<span class="normal"><a href="#__codelineno-0-1654">1654</a></span>
<span class="normal"><a href="#__codelineno-0-1655">1655</a></span>
<span class="normal"><a href="#__codelineno-0-1656">1656</a></span>
<span class="normal"><a href="#__codelineno-0-1657">1657</a></span>
<span class="normal"><a href="#__codelineno-0-1658">1658</a></span>
<span class="normal"><a href="#__codelineno-0-1659">1659</a></span>
<span class="normal"><a href="#__codelineno-0-1660">1660</a></span>
<span class="normal"><a href="#__codelineno-0-1661">1661</a></span>
<span class="normal"><a href="#__codelineno-0-1662">1662</a></span>
<span class="normal"><a href="#__codelineno-0-1663">1663</a></span>
<span class="normal"><a href="#__codelineno-0-1664">1664</a></span>
<span class="normal"><a href="#__codelineno-0-1665">1665</a></span>
<span class="normal"><a href="#__codelineno-0-1666">1666</a></span>
<span class="normal"><a href="#__codelineno-0-1667">1667</a></span>
<span class="normal"><a href="#__codelineno-0-1668">1668</a></span>
<span class="normal"><a href="#__codelineno-0-1669">1669</a></span>
<span class="normal"><a href="#__codelineno-0-1670">1670</a></span>
<span class="normal"><a href="#__codelineno-0-1671">1671</a></span>
<span class="normal"><a href="#__codelineno-0-1672">1672</a></span>
<span class="normal"><a href="#__codelineno-0-1673">1673</a></span>
<span class="normal"><a href="#__codelineno-0-1674">1674</a></span>
<span class="normal"><a href="#__codelineno-0-1675">1675</a></span>
<span class="normal"><a href="#__codelineno-0-1676">1676</a></span>
<span class="normal"><a href="#__codelineno-0-1677">1677</a></span>
<span class="normal"><a href="#__codelineno-0-1678">1678</a></span>
<span class="normal"><a href="#__codelineno-0-1679">1679</a></span>
<span class="normal"><a href="#__codelineno-0-1680">1680</a></span>
<span class="normal"><a href="#__codelineno-0-1681">1681</a></span>
<span class="normal"><a href="#__codelineno-0-1682">1682</a></span>
<span class="normal"><a href="#__codelineno-0-1683">1683</a></span>
<span class="normal"><a href="#__codelineno-0-1684">1684</a></span>
<span class="normal"><a href="#__codelineno-0-1685">1685</a></span>
<span class="normal"><a href="#__codelineno-0-1686">1686</a></span>
<span class="normal"><a href="#__codelineno-0-1687">1687</a></span>
<span class="normal"><a href="#__codelineno-0-1688">1688</a></span>
<span class="normal"><a href="#__codelineno-0-1689">1689</a></span>
<span class="normal"><a href="#__codelineno-0-1690">1690</a></span>
<span class="normal"><a href="#__codelineno-0-1691">1691</a></span>
<span class="normal"><a href="#__codelineno-0-1692">1692</a></span>
<span class="normal"><a href="#__codelineno-0-1693">1693</a></span>
<span class="normal"><a href="#__codelineno-0-1694">1694</a></span>
<span class="normal"><a href="#__codelineno-0-1695">1695</a></span>
<span class="normal"><a href="#__codelineno-0-1696">1696</a></span>
<span class="normal"><a href="#__codelineno-0-1697">1697</a></span>
<span class="normal"><a href="#__codelineno-0-1698">1698</a></span>
<span class="normal"><a href="#__codelineno-0-1699">1699</a></span>
<span class="normal"><a href="#__codelineno-0-1700">1700</a></span>
<span class="normal"><a href="#__codelineno-0-1701">1701</a></span>
<span class="normal"><a href="#__codelineno-0-1702">1702</a></span>
<span class="normal"><a href="#__codelineno-0-1703">1703</a></span>
<span class="normal"><a href="#__codelineno-0-1704">1704</a></span>
<span class="normal"><a href="#__codelineno-0-1705">1705</a></span>
<span class="normal"><a href="#__codelineno-0-1706">1706</a></span>
<span class="normal"><a href="#__codelineno-0-1707">1707</a></span>
<span class="normal"><a href="#__codelineno-0-1708">1708</a></span>
<span class="normal"><a href="#__codelineno-0-1709">1709</a></span>
<span class="normal"><a href="#__codelineno-0-1710">1710</a></span>
<span class="normal"><a href="#__codelineno-0-1711">1711</a></span>
<span class="normal"><a href="#__codelineno-0-1712">1712</a></span>
<span class="normal"><a href="#__codelineno-0-1713">1713</a></span>
<span class="normal"><a href="#__codelineno-0-1714">1714</a></span>
<span class="normal"><a href="#__codelineno-0-1715">1715</a></span>
<span class="normal"><a href="#__codelineno-0-1716">1716</a></span>
<span class="normal"><a href="#__codelineno-0-1717">1717</a></span>
<span class="normal"><a href="#__codelineno-0-1718">1718</a></span>
<span class="normal"><a href="#__codelineno-0-1719">1719</a></span>
<span class="normal"><a href="#__codelineno-0-1720">1720</a></span>
<span class="normal"><a href="#__codelineno-0-1721">1721</a></span>
<span class="normal"><a href="#__codelineno-0-1722">1722</a></span>
<span class="normal"><a href="#__codelineno-0-1723">1723</a></span>
<span class="normal"><a href="#__codelineno-0-1724">1724</a></span>
<span class="normal"><a href="#__codelineno-0-1725">1725</a></span>
<span class="normal"><a href="#__codelineno-0-1726">1726</a></span>
<span class="normal"><a href="#__codelineno-0-1727">1727</a></span>
<span class="normal"><a href="#__codelineno-0-1728">1728</a></span>
<span class="normal"><a href="#__codelineno-0-1729">1729</a></span>
<span class="normal"><a href="#__codelineno-0-1730">1730</a></span>
<span class="normal"><a href="#__codelineno-0-1731">1731</a></span>
<span class="normal"><a href="#__codelineno-0-1732">1732</a></span>
<span class="normal"><a href="#__codelineno-0-1733">1733</a></span>
<span class="normal"><a href="#__codelineno-0-1734">1734</a></span>
<span class="normal"><a href="#__codelineno-0-1735">1735</a></span>
<span class="normal"><a href="#__codelineno-0-1736">1736</a></span>
<span class="normal"><a href="#__codelineno-0-1737">1737</a></span>
<span class="normal"><a href="#__codelineno-0-1738">1738</a></span>
<span class="normal"><a href="#__codelineno-0-1739">1739</a></span>
<span class="normal"><a href="#__codelineno-0-1740">1740</a></span>
<span class="normal"><a href="#__codelineno-0-1741">1741</a></span>
<span class="normal"><a href="#__codelineno-0-1742">1742</a></span>
<span class="normal"><a href="#__codelineno-0-1743">1743</a></span>
<span class="normal"><a href="#__codelineno-0-1744">1744</a></span>
<span class="normal"><a href="#__codelineno-0-1745">1745</a></span>
<span class="normal"><a href="#__codelineno-0-1746">1746</a></span>
<span class="normal"><a href="#__codelineno-0-1747">1747</a></span>
<span class="normal"><a href="#__codelineno-0-1748">1748</a></span>
<span class="normal"><a href="#__codelineno-0-1749">1749</a></span>
<span class="normal"><a href="#__codelineno-0-1750">1750</a></span>
<span class="normal"><a href="#__codelineno-0-1751">1751</a></span>
<span class="normal"><a href="#__codelineno-0-1752">1752</a></span>
<span class="normal"><a href="#__codelineno-0-1753">1753</a></span>
<span class="normal"><a href="#__codelineno-0-1754">1754</a></span>
<span class="normal"><a href="#__codelineno-0-1755">1755</a></span>
<span class="normal"><a href="#__codelineno-0-1756">1756</a></span>
<span class="normal"><a href="#__codelineno-0-1757">1757</a></span>
<span class="normal"><a href="#__codelineno-0-1758">1758</a></span>
<span class="normal"><a href="#__codelineno-0-1759">1759</a></span>
<span class="normal"><a href="#__codelineno-0-1760">1760</a></span>
<span class="normal"><a href="#__codelineno-0-1761">1761</a></span>
<span class="normal"><a href="#__codelineno-0-1762">1762</a></span>
<span class="normal"><a href="#__codelineno-0-1763">1763</a></span>
<span class="normal"><a href="#__codelineno-0-1764">1764</a></span>
<span class="normal"><a href="#__codelineno-0-1765">1765</a></span>
<span class="normal"><a href="#__codelineno-0-1766">1766</a></span>
<span class="normal"><a href="#__codelineno-0-1767">1767</a></span>
<span class="normal"><a href="#__codelineno-0-1768">1768</a></span>
<span class="normal"><a href="#__codelineno-0-1769">1769</a></span>
<span class="normal"><a href="#__codelineno-0-1770">1770</a></span>
<span class="normal"><a href="#__codelineno-0-1771">1771</a></span>
<span class="normal"><a href="#__codelineno-0-1772">1772</a></span>
<span class="normal"><a href="#__codelineno-0-1773">1773</a></span>
<span class="normal"><a href="#__codelineno-0-1774">1774</a></span>
<span class="normal"><a href="#__codelineno-0-1775">1775</a></span>
<span class="normal"><a href="#__codelineno-0-1776">1776</a></span>
<span class="normal"><a href="#__codelineno-0-1777">1777</a></span>
<span class="normal"><a href="#__codelineno-0-1778">1778</a></span>
<span class="normal"><a href="#__codelineno-0-1779">1779</a></span>
<span class="normal"><a href="#__codelineno-0-1780">1780</a></span>
<span class="normal"><a href="#__codelineno-0-1781">1781</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1621" name="__codelineno-0-1621"></a><span class="k">class</span><span class="w"> </span><span class="nc">ArrowScan</span><span class="p">:</span>
<a id="__codelineno-0-1622" name="__codelineno-0-1622"></a> <span class="n">_table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span>
<a id="__codelineno-0-1623" name="__codelineno-0-1623"></a> <span class="n">_io</span><span class="p">:</span> <span class="n">FileIO</span>
<a id="__codelineno-0-1624" name="__codelineno-0-1624"></a> <span class="n">_projected_schema</span><span class="p">:</span> <span class="n">Schema</span>
<a id="__codelineno-0-1625" name="__codelineno-0-1625"></a> <span class="n">_bound_row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span>
<a id="__codelineno-0-1626" name="__codelineno-0-1626"></a> <span class="n">_case_sensitive</span><span class="p">:</span> <span class="nb">bool</span>
<a id="__codelineno-0-1627" name="__codelineno-0-1627"></a> <span class="n">_limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
<a id="__codelineno-0-1628" name="__codelineno-0-1628"></a> <span class="n">_downcast_ns_timestamp_to_us</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span>
<a id="__codelineno-0-1629" name="__codelineno-0-1629"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg Table and create an Arrow construct.</span>
<a id="__codelineno-0-1630" name="__codelineno-0-1630"></a>
<a id="__codelineno-0-1631" name="__codelineno-0-1631"></a><span class="sd"> Attributes:</span>
<a id="__codelineno-0-1632" name="__codelineno-0-1632"></a><span class="sd"> _table_metadata: Current table metadata of the Iceberg table</span>
<a id="__codelineno-0-1633" name="__codelineno-0-1633"></a><span class="sd"> _io: PyIceberg FileIO implementation from which to fetch the io properties</span>
<a id="__codelineno-0-1634" name="__codelineno-0-1634"></a><span class="sd"> _projected_schema: Iceberg Schema to project onto the data files</span>
<a id="__codelineno-0-1635" name="__codelineno-0-1635"></a><span class="sd"> _bound_row_filter: Schema bound row expression to filter the data with</span>
<a id="__codelineno-0-1636" name="__codelineno-0-1636"></a><span class="sd"> _case_sensitive: Case sensitivity when looking up column names</span>
<a id="__codelineno-0-1637" name="__codelineno-0-1637"></a><span class="sd"> _limit: Limit the number of records.</span>
<a id="__codelineno-0-1638" name="__codelineno-0-1638"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1639" name="__codelineno-0-1639"></a>
<a id="__codelineno-0-1640" name="__codelineno-0-1640"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<a id="__codelineno-0-1641" name="__codelineno-0-1641"></a> <span class="bp">self</span><span class="p">,</span>
<a id="__codelineno-0-1642" name="__codelineno-0-1642"></a> <span class="n">table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span><span class="p">,</span>
<a id="__codelineno-0-1643" name="__codelineno-0-1643"></a> <span class="n">io</span><span class="p">:</span> <span class="n">FileIO</span><span class="p">,</span>
<a id="__codelineno-0-1644" name="__codelineno-0-1644"></a> <span class="n">projected_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span>
<a id="__codelineno-0-1645" name="__codelineno-0-1645"></a> <span class="n">row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span>
<a id="__codelineno-0-1646" name="__codelineno-0-1646"></a> <span class="n">case_sensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<a id="__codelineno-0-1647" name="__codelineno-0-1647"></a> <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<a id="__codelineno-0-1648" name="__codelineno-0-1648"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1649" name="__codelineno-0-1649"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span> <span class="o">=</span> <span class="n">table_metadata</span>
<a id="__codelineno-0-1650" name="__codelineno-0-1650"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span> <span class="o">=</span> <span class="n">io</span>
<a id="__codelineno-0-1651" name="__codelineno-0-1651"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span> <span class="o">=</span> <span class="n">projected_schema</span>
<a id="__codelineno-0-1652" name="__codelineno-0-1652"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span> <span class="o">=</span> <span class="n">bind</span><span class="p">(</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">(),</span> <span class="n">row_filter</span><span class="p">,</span> <span class="n">case_sensitive</span><span class="o">=</span><span class="n">case_sensitive</span><span class="p">)</span>
<a id="__codelineno-0-1653" name="__codelineno-0-1653"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span> <span class="o">=</span> <span class="n">case_sensitive</span>
<a id="__codelineno-0-1654" name="__codelineno-0-1654"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">=</span> <span class="n">limit</span>
<a id="__codelineno-0-1655" name="__codelineno-0-1655"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span> <span class="o">=</span> <span class="n">Config</span><span class="p">()</span><span class="o">.</span><span class="n">get_bool</span><span class="p">(</span><span class="n">DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE</span><span class="p">)</span>
<a id="__codelineno-0-1656" name="__codelineno-0-1656"></a>
<a id="__codelineno-0-1657" name="__codelineno-0-1657"></a> <span class="nd">@property</span>
<a id="__codelineno-0-1658" name="__codelineno-0-1658"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_projected_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<a id="__codelineno-0-1659" name="__codelineno-0-1659"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Set of field IDs that should be projected from the data files.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1660" name="__codelineno-0-1660"></a> <span class="k">return</span> <span class="p">{</span>
<a id="__codelineno-0-1661" name="__codelineno-0-1661"></a> <span class="nb">id</span>
<a id="__codelineno-0-1662" name="__codelineno-0-1662"></a> <span class="k">for</span> <span class="nb">id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">field_ids</span>
<a id="__codelineno-0-1663" name="__codelineno-0-1663"></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">find_type</span><span class="p">(</span><span class="nb">id</span><span class="p">),</span> <span class="p">(</span><span class="n">MapType</span><span class="p">,</span> <span class="n">ListType</span><span class="p">))</span>
<a id="__codelineno-0-1664" name="__codelineno-0-1664"></a> <span class="p">}</span><span class="o">.</span><span class="n">union</span><span class="p">(</span><span class="n">extract_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">))</span>
<a id="__codelineno-0-1665" name="__codelineno-0-1665"></a>
<a id="__codelineno-0-1666" name="__codelineno-0-1666"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span>
<a id="__codelineno-0-1667" name="__codelineno-0-1667"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return a pa.Table.</span>
<a id="__codelineno-0-1668" name="__codelineno-0-1668"></a>
<a id="__codelineno-0-1669" name="__codelineno-0-1669"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span>
<a id="__codelineno-0-1670" name="__codelineno-0-1670"></a><span class="sd"> right columns that match the current table schema. Only data that</span>
<a id="__codelineno-0-1671" name="__codelineno-0-1671"></a><span class="sd"> matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1672" name="__codelineno-0-1672"></a>
<a id="__codelineno-0-1673" name="__codelineno-0-1673"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1674" name="__codelineno-0-1674"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1675" name="__codelineno-0-1675"></a>
<a id="__codelineno-0-1676" name="__codelineno-0-1676"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1677" name="__codelineno-0-1677"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1678" name="__codelineno-0-1678"></a>
<a id="__codelineno-0-1679" name="__codelineno-0-1679"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1680" name="__codelineno-0-1680"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1681" name="__codelineno-0-1681"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1682" name="__codelineno-0-1682"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1683" name="__codelineno-0-1683"></a> <span class="n">arrow_schema</span> <span class="o">=</span> <span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<a id="__codelineno-0-1684" name="__codelineno-0-1684"></a>
<a id="__codelineno-0-1685" name="__codelineno-0-1685"></a> <span class="n">batches</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1686" name="__codelineno-0-1686"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-1687" name="__codelineno-0-1687"></a> <span class="n">first_batch</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span>
<a id="__codelineno-0-1688" name="__codelineno-0-1688"></a> <span class="k">except</span> <span class="ne">StopIteration</span><span class="p">:</span>
<a id="__codelineno-0-1689" name="__codelineno-0-1689"></a> <span class="c1"># Empty</span>
<a id="__codelineno-0-1690" name="__codelineno-0-1690"></a> <span class="k">return</span> <span class="n">arrow_schema</span><span class="o">.</span><span class="n">empty_table</span><span class="p">()</span>
<a id="__codelineno-0-1691" name="__codelineno-0-1691"></a>
<a id="__codelineno-0-1692" name="__codelineno-0-1692"></a> <span class="c1"># Note: cannot use pa.Table.from_batches(itertools.chain([first_batch], batches)))</span>
<a id="__codelineno-0-1693" name="__codelineno-0-1693"></a> <span class="c1"># as different batches can use different schema&#39;s (due to large_ types)</span>
<a id="__codelineno-0-1694" name="__codelineno-0-1694"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span>
<a id="__codelineno-0-1695" name="__codelineno-0-1695"></a> <span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([</span><span class="n">batch</span><span class="p">])</span> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">([</span><span class="n">first_batch</span><span class="p">],</span> <span class="n">batches</span><span class="p">)),</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">&quot;permissive&quot;</span>
<a id="__codelineno-0-1696" name="__codelineno-0-1696"></a> <span class="p">)</span>
<a id="__codelineno-0-1697" name="__codelineno-0-1697"></a>
<a id="__codelineno-0-1698" name="__codelineno-0-1698"></a> <span class="k">if</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<a id="__codelineno-0-1699" name="__codelineno-0-1699"></a> <span class="n">deprecation_message</span><span class="p">(</span>
<a id="__codelineno-0-1700" name="__codelineno-0-1700"></a> <span class="n">deprecated_in</span><span class="o">=</span><span class="s2">&quot;0.10.0&quot;</span><span class="p">,</span>
<a id="__codelineno-0-1701" name="__codelineno-0-1701"></a> <span class="n">removed_in</span><span class="o">=</span><span class="s2">&quot;0.11.0&quot;</span><span class="p">,</span>
<a id="__codelineno-0-1702" name="__codelineno-0-1702"></a> <span class="n">help_message</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;Property `</span><span class="si">{</span><span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="si">}</span><span class="s2">` will be removed.&quot;</span><span class="p">,</span>
<a id="__codelineno-0-1703" name="__codelineno-0-1703"></a> <span class="p">)</span>
<a id="__codelineno-0-1704" name="__codelineno-0-1704"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">arrow_schema</span><span class="p">)</span>
<a id="__codelineno-0-1705" name="__codelineno-0-1705"></a>
<a id="__codelineno-0-1706" name="__codelineno-0-1706"></a> <span class="k">return</span> <span class="n">result</span>
<a id="__codelineno-0-1707" name="__codelineno-0-1707"></a>
<a id="__codelineno-0-1708" name="__codelineno-0-1708"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1709" name="__codelineno-0-1709"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span>
<a id="__codelineno-0-1710" name="__codelineno-0-1710"></a>
<a id="__codelineno-0-1711" name="__codelineno-0-1711"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span>
<a id="__codelineno-0-1712" name="__codelineno-0-1712"></a><span class="sd"> by resolving the right columns that match the current table schema.</span>
<a id="__codelineno-0-1713" name="__codelineno-0-1713"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1714" name="__codelineno-0-1714"></a>
<a id="__codelineno-0-1715" name="__codelineno-0-1715"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1716" name="__codelineno-0-1716"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1717" name="__codelineno-0-1717"></a>
<a id="__codelineno-0-1718" name="__codelineno-0-1718"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1719" name="__codelineno-0-1719"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span>
<a id="__codelineno-0-1720" name="__codelineno-0-1720"></a><span class="sd"> Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1721" name="__codelineno-0-1721"></a>
<a id="__codelineno-0-1722" name="__codelineno-0-1722"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1723" name="__codelineno-0-1723"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1724" name="__codelineno-0-1724"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1725" name="__codelineno-0-1725"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1726" name="__codelineno-0-1726"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1727" name="__codelineno-0-1727"></a>
<a id="__codelineno-0-1728" name="__codelineno-0-1728"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span>
<a id="__codelineno-0-1729" name="__codelineno-0-1729"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span>
<a id="__codelineno-0-1730" name="__codelineno-0-1730"></a>
<a id="__codelineno-0-1731" name="__codelineno-0-1731"></a> <span class="k">def</span><span class="w"> </span><span class="nf">batches_for_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1732" name="__codelineno-0-1732"></a> <span class="c1"># Materialize the iterator here to ensure execution happens within the executor.</span>
<a id="__codelineno-0-1733" name="__codelineno-0-1733"></a> <span class="c1"># Otherwise, the iterator would be lazily consumed later (in the main thread),</span>
<a id="__codelineno-0-1734" name="__codelineno-0-1734"></a> <span class="c1"># defeating the purpose of using executor.map.</span>
<a id="__codelineno-0-1735" name="__codelineno-0-1735"></a> <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span>
<a id="__codelineno-0-1736" name="__codelineno-0-1736"></a>
<a id="__codelineno-0-1737" name="__codelineno-0-1737"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">False</span>
<a id="__codelineno-0-1738" name="__codelineno-0-1738"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">executor</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">batches_for_task</span><span class="p">,</span> <span class="n">tasks</span><span class="p">):</span>
<a id="__codelineno-0-1739" name="__codelineno-0-1739"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span>
<a id="__codelineno-0-1740" name="__codelineno-0-1740"></a> <span class="n">current_batch_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
<a id="__codelineno-0-1741" name="__codelineno-0-1741"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="n">current_batch_size</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1742" name="__codelineno-0-1742"></a> <span class="k">yield</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span>
<a id="__codelineno-0-1743" name="__codelineno-0-1743"></a>
<a id="__codelineno-0-1744" name="__codelineno-0-1744"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">True</span>
<a id="__codelineno-0-1745" name="__codelineno-0-1745"></a> <span class="k">break</span>
<a id="__codelineno-0-1746" name="__codelineno-0-1746"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1747" name="__codelineno-0-1747"></a> <span class="k">yield</span> <span class="n">batch</span>
<a id="__codelineno-0-1748" name="__codelineno-0-1748"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="n">current_batch_size</span>
<a id="__codelineno-0-1749" name="__codelineno-0-1749"></a>
<a id="__codelineno-0-1750" name="__codelineno-0-1750"></a> <span class="k">if</span> <span class="n">limit_reached</span><span class="p">:</span>
<a id="__codelineno-0-1751" name="__codelineno-0-1751"></a> <span class="c1"># This break will also cancel all running tasks in the executor</span>
<a id="__codelineno-0-1752" name="__codelineno-0-1752"></a> <span class="k">break</span>
<a id="__codelineno-0-1753" name="__codelineno-0-1753"></a>
<a id="__codelineno-0-1754" name="__codelineno-0-1754"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span>
<a id="__codelineno-0-1755" name="__codelineno-0-1755"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">ChunkedArray</span><span class="p">]]</span>
<a id="__codelineno-0-1756" name="__codelineno-0-1756"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1757" name="__codelineno-0-1757"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span>
<a id="__codelineno-0-1758" name="__codelineno-0-1758"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span><span class="p">:</span>
<a id="__codelineno-0-1759" name="__codelineno-0-1759"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1760" name="__codelineno-0-1760"></a> <span class="k">break</span>
<a id="__codelineno-0-1761" name="__codelineno-0-1761"></a> <span class="n">batches</span> <span class="o">=</span> <span class="n">_task_to_record_batches</span><span class="p">(</span>
<a id="__codelineno-0-1762" name="__codelineno-0-1762"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span>
<a id="__codelineno-0-1763" name="__codelineno-0-1763"></a> <span class="n">task</span><span class="p">,</span>
<a id="__codelineno-0-1764" name="__codelineno-0-1764"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">,</span>
<a id="__codelineno-0-1765" name="__codelineno-0-1765"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span>
<a id="__codelineno-0-1766" name="__codelineno-0-1766"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_field_ids</span><span class="p">,</span>
<a id="__codelineno-0-1767" name="__codelineno-0-1767"></a> <span class="n">deletes_per_file</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">file_path</span><span class="p">),</span>
<a id="__codelineno-0-1768" name="__codelineno-0-1768"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span><span class="p">,</span>
<a id="__codelineno-0-1769" name="__codelineno-0-1769"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">name_mapping</span><span class="p">(),</span>
<a id="__codelineno-0-1770" name="__codelineno-0-1770"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">specs</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">spec_id</span><span class="p">),</span>
<a id="__codelineno-0-1771" name="__codelineno-0-1771"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">format_version</span><span class="p">,</span>
<a id="__codelineno-0-1772" name="__codelineno-0-1772"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span><span class="p">,</span>
<a id="__codelineno-0-1773" name="__codelineno-0-1773"></a> <span class="p">)</span>
<a id="__codelineno-0-1774" name="__codelineno-0-1774"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span>
<a id="__codelineno-0-1775" name="__codelineno-0-1775"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1776" name="__codelineno-0-1776"></a> <span class="k">if</span> <span class="n">total_row_count</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1777" name="__codelineno-0-1777"></a> <span class="k">break</span>
<a id="__codelineno-0-1778" name="__codelineno-0-1778"></a> <span class="k">elif</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1779" name="__codelineno-0-1779"></a> <span class="n">batch</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span>
<a id="__codelineno-0-1780" name="__codelineno-0-1780"></a> <span class="k">yield</span> <span class="n">batch</span>
<a id="__codelineno-0-1781" name="__codelineno-0-1781"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="doc doc-heading">
<code class="highlight language-python"><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Scan the Iceberg table and return an Iterator[pa.RecordBatch].</p>
<p>Returns an Iterator of pa.RecordBatch with data from the Iceberg table
by resolving the right columns that match the current table schema.
Only data that matches the provided row_filter expression is returned.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>tasks</code>
</td>
<td>
<code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="FileScanTask
dataclass
(pyiceberg.table.FileScanTask)" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>FileScanTasks representing the data files and delete files to read from.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>An Iterator of PyArrow RecordBatches.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>Total number of rows will be capped if specified.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a required field cannot be found in the file</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="ValueError">ValueError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a field type in the file cannot be projected to the schema type</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1708">1708</a></span>
<span class="normal"><a href="#__codelineno-0-1709">1709</a></span>
<span class="normal"><a href="#__codelineno-0-1710">1710</a></span>
<span class="normal"><a href="#__codelineno-0-1711">1711</a></span>
<span class="normal"><a href="#__codelineno-0-1712">1712</a></span>
<span class="normal"><a href="#__codelineno-0-1713">1713</a></span>
<span class="normal"><a href="#__codelineno-0-1714">1714</a></span>
<span class="normal"><a href="#__codelineno-0-1715">1715</a></span>
<span class="normal"><a href="#__codelineno-0-1716">1716</a></span>
<span class="normal"><a href="#__codelineno-0-1717">1717</a></span>
<span class="normal"><a href="#__codelineno-0-1718">1718</a></span>
<span class="normal"><a href="#__codelineno-0-1719">1719</a></span>
<span class="normal"><a href="#__codelineno-0-1720">1720</a></span>
<span class="normal"><a href="#__codelineno-0-1721">1721</a></span>
<span class="normal"><a href="#__codelineno-0-1722">1722</a></span>
<span class="normal"><a href="#__codelineno-0-1723">1723</a></span>
<span class="normal"><a href="#__codelineno-0-1724">1724</a></span>
<span class="normal"><a href="#__codelineno-0-1725">1725</a></span>
<span class="normal"><a href="#__codelineno-0-1726">1726</a></span>
<span class="normal"><a href="#__codelineno-0-1727">1727</a></span>
<span class="normal"><a href="#__codelineno-0-1728">1728</a></span>
<span class="normal"><a href="#__codelineno-0-1729">1729</a></span>
<span class="normal"><a href="#__codelineno-0-1730">1730</a></span>
<span class="normal"><a href="#__codelineno-0-1731">1731</a></span>
<span class="normal"><a href="#__codelineno-0-1732">1732</a></span>
<span class="normal"><a href="#__codelineno-0-1733">1733</a></span>
<span class="normal"><a href="#__codelineno-0-1734">1734</a></span>
<span class="normal"><a href="#__codelineno-0-1735">1735</a></span>
<span class="normal"><a href="#__codelineno-0-1736">1736</a></span>
<span class="normal"><a href="#__codelineno-0-1737">1737</a></span>
<span class="normal"><a href="#__codelineno-0-1738">1738</a></span>
<span class="normal"><a href="#__codelineno-0-1739">1739</a></span>
<span class="normal"><a href="#__codelineno-0-1740">1740</a></span>
<span class="normal"><a href="#__codelineno-0-1741">1741</a></span>
<span class="normal"><a href="#__codelineno-0-1742">1742</a></span>
<span class="normal"><a href="#__codelineno-0-1743">1743</a></span>
<span class="normal"><a href="#__codelineno-0-1744">1744</a></span>
<span class="normal"><a href="#__codelineno-0-1745">1745</a></span>
<span class="normal"><a href="#__codelineno-0-1746">1746</a></span>
<span class="normal"><a href="#__codelineno-0-1747">1747</a></span>
<span class="normal"><a href="#__codelineno-0-1748">1748</a></span>
<span class="normal"><a href="#__codelineno-0-1749">1749</a></span>
<span class="normal"><a href="#__codelineno-0-1750">1750</a></span>
<span class="normal"><a href="#__codelineno-0-1751">1751</a></span>
<span class="normal"><a href="#__codelineno-0-1752">1752</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1708" name="__codelineno-0-1708"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1709" name="__codelineno-0-1709"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span>
<a id="__codelineno-0-1710" name="__codelineno-0-1710"></a>
<a id="__codelineno-0-1711" name="__codelineno-0-1711"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span>
<a id="__codelineno-0-1712" name="__codelineno-0-1712"></a><span class="sd"> by resolving the right columns that match the current table schema.</span>
<a id="__codelineno-0-1713" name="__codelineno-0-1713"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1714" name="__codelineno-0-1714"></a>
<a id="__codelineno-0-1715" name="__codelineno-0-1715"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1716" name="__codelineno-0-1716"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1717" name="__codelineno-0-1717"></a>
<a id="__codelineno-0-1718" name="__codelineno-0-1718"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1719" name="__codelineno-0-1719"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span>
<a id="__codelineno-0-1720" name="__codelineno-0-1720"></a><span class="sd"> Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1721" name="__codelineno-0-1721"></a>
<a id="__codelineno-0-1722" name="__codelineno-0-1722"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1723" name="__codelineno-0-1723"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1724" name="__codelineno-0-1724"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1725" name="__codelineno-0-1725"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1726" name="__codelineno-0-1726"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1727" name="__codelineno-0-1727"></a>
<a id="__codelineno-0-1728" name="__codelineno-0-1728"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span>
<a id="__codelineno-0-1729" name="__codelineno-0-1729"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span>
<a id="__codelineno-0-1730" name="__codelineno-0-1730"></a>
<a id="__codelineno-0-1731" name="__codelineno-0-1731"></a> <span class="k">def</span><span class="w"> </span><span class="nf">batches_for_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1732" name="__codelineno-0-1732"></a> <span class="c1"># Materialize the iterator here to ensure execution happens within the executor.</span>
<a id="__codelineno-0-1733" name="__codelineno-0-1733"></a> <span class="c1"># Otherwise, the iterator would be lazily consumed later (in the main thread),</span>
<a id="__codelineno-0-1734" name="__codelineno-0-1734"></a> <span class="c1"># defeating the purpose of using executor.map.</span>
<a id="__codelineno-0-1735" name="__codelineno-0-1735"></a> <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span>
<a id="__codelineno-0-1736" name="__codelineno-0-1736"></a>
<a id="__codelineno-0-1737" name="__codelineno-0-1737"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">False</span>
<a id="__codelineno-0-1738" name="__codelineno-0-1738"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">executor</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">batches_for_task</span><span class="p">,</span> <span class="n">tasks</span><span class="p">):</span>
<a id="__codelineno-0-1739" name="__codelineno-0-1739"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span>
<a id="__codelineno-0-1740" name="__codelineno-0-1740"></a> <span class="n">current_batch_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
<a id="__codelineno-0-1741" name="__codelineno-0-1741"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="n">current_batch_size</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1742" name="__codelineno-0-1742"></a> <span class="k">yield</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span>
<a id="__codelineno-0-1743" name="__codelineno-0-1743"></a>
<a id="__codelineno-0-1744" name="__codelineno-0-1744"></a> <span class="n">limit_reached</span> <span class="o">=</span> <span class="kc">True</span>
<a id="__codelineno-0-1745" name="__codelineno-0-1745"></a> <span class="k">break</span>
<a id="__codelineno-0-1746" name="__codelineno-0-1746"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1747" name="__codelineno-0-1747"></a> <span class="k">yield</span> <span class="n">batch</span>
<a id="__codelineno-0-1748" name="__codelineno-0-1748"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="n">current_batch_size</span>
<a id="__codelineno-0-1749" name="__codelineno-0-1749"></a>
<a id="__codelineno-0-1750" name="__codelineno-0-1750"></a> <span class="k">if</span> <span class="n">limit_reached</span><span class="p">:</span>
<a id="__codelineno-0-1751" name="__codelineno-0-1751"></a> <span class="c1"># This break will also cancel all running tasks in the executor</span>
<a id="__codelineno-0-1752" name="__codelineno-0-1752"></a> <span class="k">break</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.ArrowScan.to_table" class="doc doc-heading">
<code class="highlight language-python"><span class="n">to_table</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Scan the Iceberg table and return a pa.Table.</p>
<p>Returns a pa.Table with data from the Iceberg table by resolving the
right columns that match the current table schema. Only data that
matches the provided row_filter expression is returned.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>tasks</code>
</td>
<td>
<code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="FileScanTask
dataclass
(pyiceberg.table.FileScanTask)" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>FileScanTasks representing the data files and delete files to read from.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="pyarrow.Table">Table</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A PyArrow table. Total number of rows will be capped if specified.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a required field cannot be found in the file</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="ValueError">ValueError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a field type in the file cannot be projected to the schema type</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1666">1666</a></span>
<span class="normal"><a href="#__codelineno-0-1667">1667</a></span>
<span class="normal"><a href="#__codelineno-0-1668">1668</a></span>
<span class="normal"><a href="#__codelineno-0-1669">1669</a></span>
<span class="normal"><a href="#__codelineno-0-1670">1670</a></span>
<span class="normal"><a href="#__codelineno-0-1671">1671</a></span>
<span class="normal"><a href="#__codelineno-0-1672">1672</a></span>
<span class="normal"><a href="#__codelineno-0-1673">1673</a></span>
<span class="normal"><a href="#__codelineno-0-1674">1674</a></span>
<span class="normal"><a href="#__codelineno-0-1675">1675</a></span>
<span class="normal"><a href="#__codelineno-0-1676">1676</a></span>
<span class="normal"><a href="#__codelineno-0-1677">1677</a></span>
<span class="normal"><a href="#__codelineno-0-1678">1678</a></span>
<span class="normal"><a href="#__codelineno-0-1679">1679</a></span>
<span class="normal"><a href="#__codelineno-0-1680">1680</a></span>
<span class="normal"><a href="#__codelineno-0-1681">1681</a></span>
<span class="normal"><a href="#__codelineno-0-1682">1682</a></span>
<span class="normal"><a href="#__codelineno-0-1683">1683</a></span>
<span class="normal"><a href="#__codelineno-0-1684">1684</a></span>
<span class="normal"><a href="#__codelineno-0-1685">1685</a></span>
<span class="normal"><a href="#__codelineno-0-1686">1686</a></span>
<span class="normal"><a href="#__codelineno-0-1687">1687</a></span>
<span class="normal"><a href="#__codelineno-0-1688">1688</a></span>
<span class="normal"><a href="#__codelineno-0-1689">1689</a></span>
<span class="normal"><a href="#__codelineno-0-1690">1690</a></span>
<span class="normal"><a href="#__codelineno-0-1691">1691</a></span>
<span class="normal"><a href="#__codelineno-0-1692">1692</a></span>
<span class="normal"><a href="#__codelineno-0-1693">1693</a></span>
<span class="normal"><a href="#__codelineno-0-1694">1694</a></span>
<span class="normal"><a href="#__codelineno-0-1695">1695</a></span>
<span class="normal"><a href="#__codelineno-0-1696">1696</a></span>
<span class="normal"><a href="#__codelineno-0-1697">1697</a></span>
<span class="normal"><a href="#__codelineno-0-1698">1698</a></span>
<span class="normal"><a href="#__codelineno-0-1699">1699</a></span>
<span class="normal"><a href="#__codelineno-0-1700">1700</a></span>
<span class="normal"><a href="#__codelineno-0-1701">1701</a></span>
<span class="normal"><a href="#__codelineno-0-1702">1702</a></span>
<span class="normal"><a href="#__codelineno-0-1703">1703</a></span>
<span class="normal"><a href="#__codelineno-0-1704">1704</a></span>
<span class="normal"><a href="#__codelineno-0-1705">1705</a></span>
<span class="normal"><a href="#__codelineno-0-1706">1706</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1666" name="__codelineno-0-1666"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span>
<a id="__codelineno-0-1667" name="__codelineno-0-1667"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return a pa.Table.</span>
<a id="__codelineno-0-1668" name="__codelineno-0-1668"></a>
<a id="__codelineno-0-1669" name="__codelineno-0-1669"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span>
<a id="__codelineno-0-1670" name="__codelineno-0-1670"></a><span class="sd"> right columns that match the current table schema. Only data that</span>
<a id="__codelineno-0-1671" name="__codelineno-0-1671"></a><span class="sd"> matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1672" name="__codelineno-0-1672"></a>
<a id="__codelineno-0-1673" name="__codelineno-0-1673"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1674" name="__codelineno-0-1674"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1675" name="__codelineno-0-1675"></a>
<a id="__codelineno-0-1676" name="__codelineno-0-1676"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1677" name="__codelineno-0-1677"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1678" name="__codelineno-0-1678"></a>
<a id="__codelineno-0-1679" name="__codelineno-0-1679"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1680" name="__codelineno-0-1680"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1681" name="__codelineno-0-1681"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1682" name="__codelineno-0-1682"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1683" name="__codelineno-0-1683"></a> <span class="n">arrow_schema</span> <span class="o">=</span> <span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<a id="__codelineno-0-1684" name="__codelineno-0-1684"></a>
<a id="__codelineno-0-1685" name="__codelineno-0-1685"></a> <span class="n">batches</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1686" name="__codelineno-0-1686"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-1687" name="__codelineno-0-1687"></a> <span class="n">first_batch</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span>
<a id="__codelineno-0-1688" name="__codelineno-0-1688"></a> <span class="k">except</span> <span class="ne">StopIteration</span><span class="p">:</span>
<a id="__codelineno-0-1689" name="__codelineno-0-1689"></a> <span class="c1"># Empty</span>
<a id="__codelineno-0-1690" name="__codelineno-0-1690"></a> <span class="k">return</span> <span class="n">arrow_schema</span><span class="o">.</span><span class="n">empty_table</span><span class="p">()</span>
<a id="__codelineno-0-1691" name="__codelineno-0-1691"></a>
<a id="__codelineno-0-1692" name="__codelineno-0-1692"></a> <span class="c1"># Note: cannot use pa.Table.from_batches(itertools.chain([first_batch], batches)))</span>
<a id="__codelineno-0-1693" name="__codelineno-0-1693"></a> <span class="c1"># as different batches can use different schema&#39;s (due to large_ types)</span>
<a id="__codelineno-0-1694" name="__codelineno-0-1694"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span>
<a id="__codelineno-0-1695" name="__codelineno-0-1695"></a> <span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([</span><span class="n">batch</span><span class="p">])</span> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">([</span><span class="n">first_batch</span><span class="p">],</span> <span class="n">batches</span><span class="p">)),</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">&quot;permissive&quot;</span>
<a id="__codelineno-0-1696" name="__codelineno-0-1696"></a> <span class="p">)</span>
<a id="__codelineno-0-1697" name="__codelineno-0-1697"></a>
<a id="__codelineno-0-1698" name="__codelineno-0-1698"></a> <span class="k">if</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<a id="__codelineno-0-1699" name="__codelineno-0-1699"></a> <span class="n">deprecation_message</span><span class="p">(</span>
<a id="__codelineno-0-1700" name="__codelineno-0-1700"></a> <span class="n">deprecated_in</span><span class="o">=</span><span class="s2">&quot;0.10.0&quot;</span><span class="p">,</span>
<a id="__codelineno-0-1701" name="__codelineno-0-1701"></a> <span class="n">removed_in</span><span class="o">=</span><span class="s2">&quot;0.11.0&quot;</span><span class="p">,</span>
<a id="__codelineno-0-1702" name="__codelineno-0-1702"></a> <span class="n">help_message</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;Property `</span><span class="si">{</span><span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="si">}</span><span class="s2">` will be removed.&quot;</span><span class="p">,</span>
<a id="__codelineno-0-1703" name="__codelineno-0-1703"></a> <span class="p">)</span>
<a id="__codelineno-0-1704" name="__codelineno-0-1704"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">arrow_schema</span><span class="p">)</span>
<a id="__codelineno-0-1705" name="__codelineno-0-1705"></a>
<a id="__codelineno-0-1706" name="__codelineno-0-1706"></a> <span class="k">return</span> <span class="n">result</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.PyArrowFile" class="doc doc-heading">
<code>PyArrowFile</code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="InputFile (pyiceberg.io.InputFile)" href="../#pyiceberg.io.InputFile">InputFile</a></code>, <code><a class="autorefs autorefs-internal" title="OutputFile (pyiceberg.io.OutputFile)" href="../#pyiceberg.io.OutputFile">OutputFile</a></code></p>
<p>A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="str">str</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A URI or a path to a local file.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Attributes:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.PyArrowFile.location(str)">location(str)</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>The URI or path to a local file for a PyArrowFile instance.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Examples:</span></p>
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.io.pyarrow</span><span class="w"> </span><span class="kn">import</span> <span class="n">PyArrowFile</span>
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># input_file = PyArrowFile(&quot;s3://foo/bar.txt&quot;)</span>
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Read the contents of the PyArrowFile instance</span>
<a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># file_content = input_file.open().read()</span>
</code></pre></div>
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># output_file = PyArrowFile(&quot;s3://baz/qux.txt&quot;)</span>
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Write bytes to a file</span>
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># output_file.create().write(b&#39;foobytes&#39;)</span>
</code></pre></div>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-256">256</a></span>
<span class="normal"><a href="#__codelineno-0-257">257</a></span>
<span class="normal"><a href="#__codelineno-0-258">258</a></span>
<span class="normal"><a href="#__codelineno-0-259">259</a></span>
<span class="normal"><a href="#__codelineno-0-260">260</a></span>
<span class="normal"><a href="#__codelineno-0-261">261</a></span>
<span class="normal"><a href="#__codelineno-0-262">262</a></span>
<span class="normal"><a href="#__codelineno-0-263">263</a></span>
<span class="normal"><a href="#__codelineno-0-264">264</a></span>
<span class="normal"><a href="#__codelineno-0-265">265</a></span>
<span class="normal"><a href="#__codelineno-0-266">266</a></span>
<span class="normal"><a href="#__codelineno-0-267">267</a></span>
<span class="normal"><a href="#__codelineno-0-268">268</a></span>
<span class="normal"><a href="#__codelineno-0-269">269</a></span>
<span class="normal"><a href="#__codelineno-0-270">270</a></span>
<span class="normal"><a href="#__codelineno-0-271">271</a></span>
<span class="normal"><a href="#__codelineno-0-272">272</a></span>
<span class="normal"><a href="#__codelineno-0-273">273</a></span>
<span class="normal"><a href="#__codelineno-0-274">274</a></span>
<span class="normal"><a href="#__codelineno-0-275">275</a></span>
<span class="normal"><a href="#__codelineno-0-276">276</a></span>
<span class="normal"><a href="#__codelineno-0-277">277</a></span>
<span class="normal"><a href="#__codelineno-0-278">278</a></span>
<span class="normal"><a href="#__codelineno-0-279">279</a></span>
<span class="normal"><a href="#__codelineno-0-280">280</a></span>
<span class="normal"><a href="#__codelineno-0-281">281</a></span>
<span class="normal"><a href="#__codelineno-0-282">282</a></span>
<span class="normal"><a href="#__codelineno-0-283">283</a></span>
<span class="normal"><a href="#__codelineno-0-284">284</a></span>
<span class="normal"><a href="#__codelineno-0-285">285</a></span>
<span class="normal"><a href="#__codelineno-0-286">286</a></span>
<span class="normal"><a href="#__codelineno-0-287">287</a></span>
<span class="normal"><a href="#__codelineno-0-288">288</a></span>
<span class="normal"><a href="#__codelineno-0-289">289</a></span>
<span class="normal"><a href="#__codelineno-0-290">290</a></span>
<span class="normal"><a href="#__codelineno-0-291">291</a></span>
<span class="normal"><a href="#__codelineno-0-292">292</a></span>
<span class="normal"><a href="#__codelineno-0-293">293</a></span>
<span class="normal"><a href="#__codelineno-0-294">294</a></span>
<span class="normal"><a href="#__codelineno-0-295">295</a></span>
<span class="normal"><a href="#__codelineno-0-296">296</a></span>
<span class="normal"><a href="#__codelineno-0-297">297</a></span>
<span class="normal"><a href="#__codelineno-0-298">298</a></span>
<span class="normal"><a href="#__codelineno-0-299">299</a></span>
<span class="normal"><a href="#__codelineno-0-300">300</a></span>
<span class="normal"><a href="#__codelineno-0-301">301</a></span>
<span class="normal"><a href="#__codelineno-0-302">302</a></span>
<span class="normal"><a href="#__codelineno-0-303">303</a></span>
<span class="normal"><a href="#__codelineno-0-304">304</a></span>
<span class="normal"><a href="#__codelineno-0-305">305</a></span>
<span class="normal"><a href="#__codelineno-0-306">306</a></span>
<span class="normal"><a href="#__codelineno-0-307">307</a></span>
<span class="normal"><a href="#__codelineno-0-308">308</a></span>
<span class="normal"><a href="#__codelineno-0-309">309</a></span>
<span class="normal"><a href="#__codelineno-0-310">310</a></span>
<span class="normal"><a href="#__codelineno-0-311">311</a></span>
<span class="normal"><a href="#__codelineno-0-312">312</a></span>
<span class="normal"><a href="#__codelineno-0-313">313</a></span>
<span class="normal"><a href="#__codelineno-0-314">314</a></span>
<span class="normal"><a href="#__codelineno-0-315">315</a></span>
<span class="normal"><a href="#__codelineno-0-316">316</a></span>
<span class="normal"><a href="#__codelineno-0-317">317</a></span>
<span class="normal"><a href="#__codelineno-0-318">318</a></span>
<span class="normal"><a href="#__codelineno-0-319">319</a></span>
<span class="normal"><a href="#__codelineno-0-320">320</a></span>
<span class="normal"><a href="#__codelineno-0-321">321</a></span>
<span class="normal"><a href="#__codelineno-0-322">322</a></span>
<span class="normal"><a href="#__codelineno-0-323">323</a></span>
<span class="normal"><a href="#__codelineno-0-324">324</a></span>
<span class="normal"><a href="#__codelineno-0-325">325</a></span>
<span class="normal"><a href="#__codelineno-0-326">326</a></span>
<span class="normal"><a href="#__codelineno-0-327">327</a></span>
<span class="normal"><a href="#__codelineno-0-328">328</a></span>
<span class="normal"><a href="#__codelineno-0-329">329</a></span>
<span class="normal"><a href="#__codelineno-0-330">330</a></span>
<span class="normal"><a href="#__codelineno-0-331">331</a></span>
<span class="normal"><a href="#__codelineno-0-332">332</a></span>
<span class="normal"><a href="#__codelineno-0-333">333</a></span>
<span class="normal"><a href="#__codelineno-0-334">334</a></span>
<span class="normal"><a href="#__codelineno-0-335">335</a></span>
<span class="normal"><a href="#__codelineno-0-336">336</a></span>
<span class="normal"><a href="#__codelineno-0-337">337</a></span>
<span class="normal"><a href="#__codelineno-0-338">338</a></span>
<span class="normal"><a href="#__codelineno-0-339">339</a></span>
<span class="normal"><a href="#__codelineno-0-340">340</a></span>
<span class="normal"><a href="#__codelineno-0-341">341</a></span>
<span class="normal"><a href="#__codelineno-0-342">342</a></span>
<span class="normal"><a href="#__codelineno-0-343">343</a></span>
<span class="normal"><a href="#__codelineno-0-344">344</a></span>
<span class="normal"><a href="#__codelineno-0-345">345</a></span>
<span class="normal"><a href="#__codelineno-0-346">346</a></span>
<span class="normal"><a href="#__codelineno-0-347">347</a></span>
<span class="normal"><a href="#__codelineno-0-348">348</a></span>
<span class="normal"><a href="#__codelineno-0-349">349</a></span>
<span class="normal"><a href="#__codelineno-0-350">350</a></span>
<span class="normal"><a href="#__codelineno-0-351">351</a></span>
<span class="normal"><a href="#__codelineno-0-352">352</a></span>
<span class="normal"><a href="#__codelineno-0-353">353</a></span>
<span class="normal"><a href="#__codelineno-0-354">354</a></span>
<span class="normal"><a href="#__codelineno-0-355">355</a></span>
<span class="normal"><a href="#__codelineno-0-356">356</a></span>
<span class="normal"><a href="#__codelineno-0-357">357</a></span>
<span class="normal"><a href="#__codelineno-0-358">358</a></span>
<span class="normal"><a href="#__codelineno-0-359">359</a></span>
<span class="normal"><a href="#__codelineno-0-360">360</a></span>
<span class="normal"><a href="#__codelineno-0-361">361</a></span>
<span class="normal"><a href="#__codelineno-0-362">362</a></span>
<span class="normal"><a href="#__codelineno-0-363">363</a></span>
<span class="normal"><a href="#__codelineno-0-364">364</a></span>
<span class="normal"><a href="#__codelineno-0-365">365</a></span>
<span class="normal"><a href="#__codelineno-0-366">366</a></span>
<span class="normal"><a href="#__codelineno-0-367">367</a></span>
<span class="normal"><a href="#__codelineno-0-368">368</a></span>
<span class="normal"><a href="#__codelineno-0-369">369</a></span>
<span class="normal"><a href="#__codelineno-0-370">370</a></span>
<span class="normal"><a href="#__codelineno-0-371">371</a></span>
<span class="normal"><a href="#__codelineno-0-372">372</a></span>
<span class="normal"><a href="#__codelineno-0-373">373</a></span>
<span class="normal"><a href="#__codelineno-0-374">374</a></span>
<span class="normal"><a href="#__codelineno-0-375">375</a></span>
<span class="normal"><a href="#__codelineno-0-376">376</a></span>
<span class="normal"><a href="#__codelineno-0-377">377</a></span>
<span class="normal"><a href="#__codelineno-0-378">378</a></span>
<span class="normal"><a href="#__codelineno-0-379">379</a></span>
<span class="normal"><a href="#__codelineno-0-380">380</a></span>
<span class="normal"><a href="#__codelineno-0-381">381</a></span>
<span class="normal"><a href="#__codelineno-0-382">382</a></span>
<span class="normal"><a href="#__codelineno-0-383">383</a></span>
<span class="normal"><a href="#__codelineno-0-384">384</a></span>
<span class="normal"><a href="#__codelineno-0-385">385</a></span>
<span class="normal"><a href="#__codelineno-0-386">386</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-256" name="__codelineno-0-256"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFile</span><span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">):</span>
<a id="__codelineno-0-257" name="__codelineno-0-257"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</span>
<a id="__codelineno-0-258" name="__codelineno-0-258"></a>
<a id="__codelineno-0-259" name="__codelineno-0-259"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-260" name="__codelineno-0-260"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-261" name="__codelineno-0-261"></a>
<a id="__codelineno-0-262" name="__codelineno-0-262"></a><span class="sd"> Attributes:</span>
<a id="__codelineno-0-263" name="__codelineno-0-263"></a><span class="sd"> location(str): The URI or path to a local file for a PyArrowFile instance.</span>
<a id="__codelineno-0-264" name="__codelineno-0-264"></a>
<a id="__codelineno-0-265" name="__codelineno-0-265"></a><span class="sd"> Examples:</span>
<a id="__codelineno-0-266" name="__codelineno-0-266"></a><span class="sd"> &gt;&gt;&gt; from pyiceberg.io.pyarrow import PyArrowFile</span>
<a id="__codelineno-0-267" name="__codelineno-0-267"></a><span class="sd"> &gt;&gt;&gt; # input_file = PyArrowFile(&quot;s3://foo/bar.txt&quot;)</span>
<a id="__codelineno-0-268" name="__codelineno-0-268"></a><span class="sd"> &gt;&gt;&gt; # Read the contents of the PyArrowFile instance</span>
<a id="__codelineno-0-269" name="__codelineno-0-269"></a><span class="sd"> &gt;&gt;&gt; # Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-270" name="__codelineno-0-270"></a><span class="sd"> &gt;&gt;&gt; # file_content = input_file.open().read()</span>
<a id="__codelineno-0-271" name="__codelineno-0-271"></a>
<a id="__codelineno-0-272" name="__codelineno-0-272"></a><span class="sd"> &gt;&gt;&gt; # output_file = PyArrowFile(&quot;s3://baz/qux.txt&quot;)</span>
<a id="__codelineno-0-273" name="__codelineno-0-273"></a><span class="sd"> &gt;&gt;&gt; # Write bytes to a file</span>
<a id="__codelineno-0-274" name="__codelineno-0-274"></a><span class="sd"> &gt;&gt;&gt; # Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-275" name="__codelineno-0-275"></a><span class="sd"> &gt;&gt;&gt; # output_file.create().write(b&#39;foobytes&#39;)</span>
<a id="__codelineno-0-276" name="__codelineno-0-276"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-277" name="__codelineno-0-277"></a>
<a id="__codelineno-0-278" name="__codelineno-0-278"></a> <span class="n">_filesystem</span><span class="p">:</span> <span class="n">FileSystem</span>
<a id="__codelineno-0-279" name="__codelineno-0-279"></a> <span class="n">_path</span><span class="p">:</span> <span class="nb">str</span>
<a id="__codelineno-0-280" name="__codelineno-0-280"></a> <span class="n">_buffer_size</span><span class="p">:</span> <span class="nb">int</span>
<a id="__codelineno-0-281" name="__codelineno-0-281"></a>
<a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">fs</span><span class="p">:</span> <span class="n">FileSystem</span><span class="p">,</span> <span class="n">buffer_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">ONE_MEGABYTE</span><span class="p">):</span>
<a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span> <span class="o">=</span> <span class="n">fs</span>
<a id="__codelineno-0-284" name="__codelineno-0-284"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_path</span> <span class="o">=</span> <span class="n">path</span>
<a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span> <span class="o">=</span> <span class="n">buffer_size</span>
<a id="__codelineno-0-286" name="__codelineno-0-286"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-287" name="__codelineno-0-287"></a>
<a id="__codelineno-0-288" name="__codelineno-0-288"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_file_info</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileInfo</span><span class="p">:</span>
<a id="__codelineno-0-289" name="__codelineno-0-289"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Retrieve a pyarrow.fs.FileInfo object for the location.</span>
<a id="__codelineno-0-290" name="__codelineno-0-290"></a>
<a id="__codelineno-0-291" name="__codelineno-0-291"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-292" name="__codelineno-0-292"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-293" name="__codelineno-0-293"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-295" name="__codelineno-0-295"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-296" name="__codelineno-0-296"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">get_file_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span>
<a id="__codelineno-0-297" name="__codelineno-0-297"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-298" name="__codelineno-0-298"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-299" name="__codelineno-0-299"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot get file info, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-300" name="__codelineno-0-300"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-301" name="__codelineno-0-301"></a>
<a id="__codelineno-0-302" name="__codelineno-0-302"></a> <span class="k">if</span> <span class="n">file_info</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="n">FileType</span><span class="o">.</span><span class="n">NotFound</span><span class="p">:</span>
<a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot get file info, file not found: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-304" name="__codelineno-0-304"></a> <span class="k">return</span> <span class="n">file_info</span>
<a id="__codelineno-0-305" name="__codelineno-0-305"></a>
<a id="__codelineno-0-306" name="__codelineno-0-306"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<a id="__codelineno-0-307" name="__codelineno-0-307"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return the total length of the file, in bytes.&quot;&quot;&quot;</span>
<a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span>
<a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span>
<a id="__codelineno-0-310" name="__codelineno-0-310"></a>
<a id="__codelineno-0-311" name="__codelineno-0-311"></a> <span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<a id="__codelineno-0-312" name="__codelineno-0-312"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether the location exists.&quot;&quot;&quot;</span>
<a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span>
<a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">return</span> <span class="kc">True</span>
<a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-317" name="__codelineno-0-317"></a> <span class="k">return</span> <span class="kc">False</span>
<a id="__codelineno-0-318" name="__codelineno-0-318"></a>
<a id="__codelineno-0-319" name="__codelineno-0-319"></a> <span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">InputStream</span><span class="p">:</span>
<a id="__codelineno-0-320" name="__codelineno-0-320"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Open the location using a PyArrow FileSystem inferred from the location.</span>
<a id="__codelineno-0-321" name="__codelineno-0-321"></a>
<a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-323" name="__codelineno-0-323"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span>
<a id="__codelineno-0-324" name="__codelineno-0-324"></a>
<a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-326" name="__codelineno-0-326"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span>
<a id="__codelineno-0-327" name="__codelineno-0-327"></a>
<a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-329" name="__codelineno-0-329"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span>
<a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-333" name="__codelineno-0-333"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-334" name="__codelineno-0-334"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span>
<a id="__codelineno-0-335" name="__codelineno-0-335"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span>
<a id="__codelineno-0-336" name="__codelineno-0-336"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">except</span> <span class="p">(</span><span class="ne">FileNotFoundError</span><span class="p">,</span> <span class="ne">PermissionError</span><span class="p">):</span>
<a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span>
<a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">return</span> <span class="n">input_file</span>
<a id="__codelineno-0-347" name="__codelineno-0-347"></a>
<a id="__codelineno-0-348" name="__codelineno-0-348"></a> <span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">OutputStream</span><span class="p">:</span>
<a id="__codelineno-0-349" name="__codelineno-0-349"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a writable pyarrow.lib.NativeFile for this PyArrowFile&#39;s location.</span>
<a id="__codelineno-0-350" name="__codelineno-0-350"></a>
<a id="__codelineno-0-351" name="__codelineno-0-351"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span>
<a id="__codelineno-0-353" name="__codelineno-0-353"></a>
<a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span>
<a id="__codelineno-0-356" name="__codelineno-0-356"></a>
<a id="__codelineno-0-357" name="__codelineno-0-357"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-358" name="__codelineno-0-358"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span>
<a id="__codelineno-0-359" name="__codelineno-0-359"></a>
<a id="__codelineno-0-360" name="__codelineno-0-360"></a><span class="sd"> Note:</span>
<a id="__codelineno-0-361" name="__codelineno-0-361"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span>
<a id="__codelineno-0-362" name="__codelineno-0-362"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span>
<a id="__codelineno-0-363" name="__codelineno-0-363"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span>
<a id="__codelineno-0-364" name="__codelineno-0-364"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span>
<a id="__codelineno-0-365" name="__codelineno-0-365"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span>
<a id="__codelineno-0-366" name="__codelineno-0-366"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-367" name="__codelineno-0-367"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-368" name="__codelineno-0-368"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">raise</span>
<a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-376" name="__codelineno-0-376"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-377" name="__codelineno-0-377"></a> <span class="k">return</span> <span class="n">output_file</span>
<a id="__codelineno-0-378" name="__codelineno-0-378"></a>
<a id="__codelineno-0-379" name="__codelineno-0-379"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-380" name="__codelineno-0-380"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span>
<a id="__codelineno-0-381" name="__codelineno-0-381"></a>
<a id="__codelineno-0-382" name="__codelineno-0-382"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span>
<a id="__codelineno-0-383" name="__codelineno-0-383"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span>
<a id="__codelineno-0-384" name="__codelineno-0-384"></a><span class="sd"> a copy of the same instance.</span>
<a id="__codelineno-0-385" name="__codelineno-0-385"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.__len__" class="doc doc-heading">
<code class="highlight language-python"><span class="fm">__len__</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Return the total length of the file, in bytes.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-306">306</a></span>
<span class="normal"><a href="#__codelineno-0-307">307</a></span>
<span class="normal"><a href="#__codelineno-0-308">308</a></span>
<span class="normal"><a href="#__codelineno-0-309">309</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-306" name="__codelineno-0-306"></a><span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<a id="__codelineno-0-307" name="__codelineno-0-307"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return the total length of the file, in bytes.&quot;&quot;&quot;</span>
<a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span>
<a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.create" class="doc doc-heading">
<code class="highlight language-python"><span class="n">create</span><span class="p">(</span><span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>overwrite</code>
</td>
<td>
<code><span title="bool">bool</span></code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to overwrite the file if it already exists.</p>
</div>
</td>
<td>
<code>False</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="OutputStream (pyiceberg.io.OutputStream)" href="../#pyiceberg.io.OutputStream">OutputStream</a></code>
</td>
<td>
<div class="doc-md-description">
<p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="FileExistsError">FileExistsError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file already exists at <code>self.location</code> and <code>overwrite</code> is False.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="note" open>
<summary>Note</summary>
<p>This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,
a check is first performed to verify that the file does not exist. This is not thread-safe and
a possibility does exist that the file can be created by a concurrent process after the existence
check yet before the output stream is created. In such a case, the default pyarrow behavior will
truncate the contents of the existing file when opening the output stream.</p>
</details>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-348">348</a></span>
<span class="normal"><a href="#__codelineno-0-349">349</a></span>
<span class="normal"><a href="#__codelineno-0-350">350</a></span>
<span class="normal"><a href="#__codelineno-0-351">351</a></span>
<span class="normal"><a href="#__codelineno-0-352">352</a></span>
<span class="normal"><a href="#__codelineno-0-353">353</a></span>
<span class="normal"><a href="#__codelineno-0-354">354</a></span>
<span class="normal"><a href="#__codelineno-0-355">355</a></span>
<span class="normal"><a href="#__codelineno-0-356">356</a></span>
<span class="normal"><a href="#__codelineno-0-357">357</a></span>
<span class="normal"><a href="#__codelineno-0-358">358</a></span>
<span class="normal"><a href="#__codelineno-0-359">359</a></span>
<span class="normal"><a href="#__codelineno-0-360">360</a></span>
<span class="normal"><a href="#__codelineno-0-361">361</a></span>
<span class="normal"><a href="#__codelineno-0-362">362</a></span>
<span class="normal"><a href="#__codelineno-0-363">363</a></span>
<span class="normal"><a href="#__codelineno-0-364">364</a></span>
<span class="normal"><a href="#__codelineno-0-365">365</a></span>
<span class="normal"><a href="#__codelineno-0-366">366</a></span>
<span class="normal"><a href="#__codelineno-0-367">367</a></span>
<span class="normal"><a href="#__codelineno-0-368">368</a></span>
<span class="normal"><a href="#__codelineno-0-369">369</a></span>
<span class="normal"><a href="#__codelineno-0-370">370</a></span>
<span class="normal"><a href="#__codelineno-0-371">371</a></span>
<span class="normal"><a href="#__codelineno-0-372">372</a></span>
<span class="normal"><a href="#__codelineno-0-373">373</a></span>
<span class="normal"><a href="#__codelineno-0-374">374</a></span>
<span class="normal"><a href="#__codelineno-0-375">375</a></span>
<span class="normal"><a href="#__codelineno-0-376">376</a></span>
<span class="normal"><a href="#__codelineno-0-377">377</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-348" name="__codelineno-0-348"></a><span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">OutputStream</span><span class="p">:</span>
<a id="__codelineno-0-349" name="__codelineno-0-349"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a writable pyarrow.lib.NativeFile for this PyArrowFile&#39;s location.</span>
<a id="__codelineno-0-350" name="__codelineno-0-350"></a>
<a id="__codelineno-0-351" name="__codelineno-0-351"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span>
<a id="__codelineno-0-353" name="__codelineno-0-353"></a>
<a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span>
<a id="__codelineno-0-356" name="__codelineno-0-356"></a>
<a id="__codelineno-0-357" name="__codelineno-0-357"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-358" name="__codelineno-0-358"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span>
<a id="__codelineno-0-359" name="__codelineno-0-359"></a>
<a id="__codelineno-0-360" name="__codelineno-0-360"></a><span class="sd"> Note:</span>
<a id="__codelineno-0-361" name="__codelineno-0-361"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span>
<a id="__codelineno-0-362" name="__codelineno-0-362"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span>
<a id="__codelineno-0-363" name="__codelineno-0-363"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span>
<a id="__codelineno-0-364" name="__codelineno-0-364"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span>
<a id="__codelineno-0-365" name="__codelineno-0-365"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span>
<a id="__codelineno-0-366" name="__codelineno-0-366"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-367" name="__codelineno-0-367"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-368" name="__codelineno-0-368"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">raise</span>
<a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-376" name="__codelineno-0-376"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-377" name="__codelineno-0-377"></a> <span class="k">return</span> <span class="n">output_file</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.exists" class="doc doc-heading">
<code class="highlight language-python"><span class="n">exists</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Check whether the location exists.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-311">311</a></span>
<span class="normal"><a href="#__codelineno-0-312">312</a></span>
<span class="normal"><a href="#__codelineno-0-313">313</a></span>
<span class="normal"><a href="#__codelineno-0-314">314</a></span>
<span class="normal"><a href="#__codelineno-0-315">315</a></span>
<span class="normal"><a href="#__codelineno-0-316">316</a></span>
<span class="normal"><a href="#__codelineno-0-317">317</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-311" name="__codelineno-0-311"></a><span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<a id="__codelineno-0-312" name="__codelineno-0-312"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether the location exists.&quot;&quot;&quot;</span>
<a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span>
<a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">return</span> <span class="kc">True</span>
<a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-317" name="__codelineno-0-317"></a> <span class="k">return</span> <span class="kc">False</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.open" class="doc doc-heading">
<code class="highlight language-python"><span class="nb">open</span><span class="p">(</span><span class="n">seekable</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Open the location using a PyArrow FileSystem inferred from the location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>seekable</code>
</td>
<td>
<code><span title="bool">bool</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the stream should support seek, or if it is consumed sequential.</p>
</div>
</td>
<td>
<code>True</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="InputStream (pyiceberg.io.InputStream)" href="../#pyiceberg.io.InputStream">InputStream</a></code>
</td>
<td>
<div class="doc-md-description">
<p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at <code>self.location</code>.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="FileNotFoundError">FileNotFoundError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file at self.location does not exist.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="PermissionError">PermissionError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file at self.location cannot be accessed due to a permission error such as
an AWS error code 15.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-319">319</a></span>
<span class="normal"><a href="#__codelineno-0-320">320</a></span>
<span class="normal"><a href="#__codelineno-0-321">321</a></span>
<span class="normal"><a href="#__codelineno-0-322">322</a></span>
<span class="normal"><a href="#__codelineno-0-323">323</a></span>
<span class="normal"><a href="#__codelineno-0-324">324</a></span>
<span class="normal"><a href="#__codelineno-0-325">325</a></span>
<span class="normal"><a href="#__codelineno-0-326">326</a></span>
<span class="normal"><a href="#__codelineno-0-327">327</a></span>
<span class="normal"><a href="#__codelineno-0-328">328</a></span>
<span class="normal"><a href="#__codelineno-0-329">329</a></span>
<span class="normal"><a href="#__codelineno-0-330">330</a></span>
<span class="normal"><a href="#__codelineno-0-331">331</a></span>
<span class="normal"><a href="#__codelineno-0-332">332</a></span>
<span class="normal"><a href="#__codelineno-0-333">333</a></span>
<span class="normal"><a href="#__codelineno-0-334">334</a></span>
<span class="normal"><a href="#__codelineno-0-335">335</a></span>
<span class="normal"><a href="#__codelineno-0-336">336</a></span>
<span class="normal"><a href="#__codelineno-0-337">337</a></span>
<span class="normal"><a href="#__codelineno-0-338">338</a></span>
<span class="normal"><a href="#__codelineno-0-339">339</a></span>
<span class="normal"><a href="#__codelineno-0-340">340</a></span>
<span class="normal"><a href="#__codelineno-0-341">341</a></span>
<span class="normal"><a href="#__codelineno-0-342">342</a></span>
<span class="normal"><a href="#__codelineno-0-343">343</a></span>
<span class="normal"><a href="#__codelineno-0-344">344</a></span>
<span class="normal"><a href="#__codelineno-0-345">345</a></span>
<span class="normal"><a href="#__codelineno-0-346">346</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">InputStream</span><span class="p">:</span>
<a id="__codelineno-0-320" name="__codelineno-0-320"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Open the location using a PyArrow FileSystem inferred from the location.</span>
<a id="__codelineno-0-321" name="__codelineno-0-321"></a>
<a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-323" name="__codelineno-0-323"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span>
<a id="__codelineno-0-324" name="__codelineno-0-324"></a>
<a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-326" name="__codelineno-0-326"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span>
<a id="__codelineno-0-327" name="__codelineno-0-327"></a>
<a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-329" name="__codelineno-0-329"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span>
<a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-333" name="__codelineno-0-333"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-334" name="__codelineno-0-334"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span>
<a id="__codelineno-0-335" name="__codelineno-0-335"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span>
<a id="__codelineno-0-336" name="__codelineno-0-336"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">except</span> <span class="p">(</span><span class="ne">FileNotFoundError</span><span class="p">,</span> <span class="ne">PermissionError</span><span class="p">):</span>
<a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span>
<a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">return</span> <span class="n">input_file</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="doc doc-heading">
<code class="highlight language-python"><span class="n">to_input_file</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Return a new PyArrowFile for the location of an existing PyArrowFile instance.</p>
<p>This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single
PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns
a copy of the same instance.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-379">379</a></span>
<span class="normal"><a href="#__codelineno-0-380">380</a></span>
<span class="normal"><a href="#__codelineno-0-381">381</a></span>
<span class="normal"><a href="#__codelineno-0-382">382</a></span>
<span class="normal"><a href="#__codelineno-0-383">383</a></span>
<span class="normal"><a href="#__codelineno-0-384">384</a></span>
<span class="normal"><a href="#__codelineno-0-385">385</a></span>
<span class="normal"><a href="#__codelineno-0-386">386</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-379" name="__codelineno-0-379"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-380" name="__codelineno-0-380"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span>
<a id="__codelineno-0-381" name="__codelineno-0-381"></a>
<a id="__codelineno-0-382" name="__codelineno-0-382"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span>
<a id="__codelineno-0-383" name="__codelineno-0-383"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span>
<a id="__codelineno-0-384" name="__codelineno-0-384"></a><span class="sd"> a copy of the same instance.</span>
<a id="__codelineno-0-385" name="__codelineno-0-385"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.PyArrowFileIO" class="doc doc-heading">
<code>PyArrowFileIO</code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="FileIO (pyiceberg.io.FileIO)" href="../#pyiceberg.io.FileIO">FileIO</a></code></p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-389">389</a></span>
<span class="normal"><a href="#__codelineno-0-390">390</a></span>
<span class="normal"><a href="#__codelineno-0-391">391</a></span>
<span class="normal"><a href="#__codelineno-0-392">392</a></span>
<span class="normal"><a href="#__codelineno-0-393">393</a></span>
<span class="normal"><a href="#__codelineno-0-394">394</a></span>
<span class="normal"><a href="#__codelineno-0-395">395</a></span>
<span class="normal"><a href="#__codelineno-0-396">396</a></span>
<span class="normal"><a href="#__codelineno-0-397">397</a></span>
<span class="normal"><a href="#__codelineno-0-398">398</a></span>
<span class="normal"><a href="#__codelineno-0-399">399</a></span>
<span class="normal"><a href="#__codelineno-0-400">400</a></span>
<span class="normal"><a href="#__codelineno-0-401">401</a></span>
<span class="normal"><a href="#__codelineno-0-402">402</a></span>
<span class="normal"><a href="#__codelineno-0-403">403</a></span>
<span class="normal"><a href="#__codelineno-0-404">404</a></span>
<span class="normal"><a href="#__codelineno-0-405">405</a></span>
<span class="normal"><a href="#__codelineno-0-406">406</a></span>
<span class="normal"><a href="#__codelineno-0-407">407</a></span>
<span class="normal"><a href="#__codelineno-0-408">408</a></span>
<span class="normal"><a href="#__codelineno-0-409">409</a></span>
<span class="normal"><a href="#__codelineno-0-410">410</a></span>
<span class="normal"><a href="#__codelineno-0-411">411</a></span>
<span class="normal"><a href="#__codelineno-0-412">412</a></span>
<span class="normal"><a href="#__codelineno-0-413">413</a></span>
<span class="normal"><a href="#__codelineno-0-414">414</a></span>
<span class="normal"><a href="#__codelineno-0-415">415</a></span>
<span class="normal"><a href="#__codelineno-0-416">416</a></span>
<span class="normal"><a href="#__codelineno-0-417">417</a></span>
<span class="normal"><a href="#__codelineno-0-418">418</a></span>
<span class="normal"><a href="#__codelineno-0-419">419</a></span>
<span class="normal"><a href="#__codelineno-0-420">420</a></span>
<span class="normal"><a href="#__codelineno-0-421">421</a></span>
<span class="normal"><a href="#__codelineno-0-422">422</a></span>
<span class="normal"><a href="#__codelineno-0-423">423</a></span>
<span class="normal"><a href="#__codelineno-0-424">424</a></span>
<span class="normal"><a href="#__codelineno-0-425">425</a></span>
<span class="normal"><a href="#__codelineno-0-426">426</a></span>
<span class="normal"><a href="#__codelineno-0-427">427</a></span>
<span class="normal"><a href="#__codelineno-0-428">428</a></span>
<span class="normal"><a href="#__codelineno-0-429">429</a></span>
<span class="normal"><a href="#__codelineno-0-430">430</a></span>
<span class="normal"><a href="#__codelineno-0-431">431</a></span>
<span class="normal"><a href="#__codelineno-0-432">432</a></span>
<span class="normal"><a href="#__codelineno-0-433">433</a></span>
<span class="normal"><a href="#__codelineno-0-434">434</a></span>
<span class="normal"><a href="#__codelineno-0-435">435</a></span>
<span class="normal"><a href="#__codelineno-0-436">436</a></span>
<span class="normal"><a href="#__codelineno-0-437">437</a></span>
<span class="normal"><a href="#__codelineno-0-438">438</a></span>
<span class="normal"><a href="#__codelineno-0-439">439</a></span>
<span class="normal"><a href="#__codelineno-0-440">440</a></span>
<span class="normal"><a href="#__codelineno-0-441">441</a></span>
<span class="normal"><a href="#__codelineno-0-442">442</a></span>
<span class="normal"><a href="#__codelineno-0-443">443</a></span>
<span class="normal"><a href="#__codelineno-0-444">444</a></span>
<span class="normal"><a href="#__codelineno-0-445">445</a></span>
<span class="normal"><a href="#__codelineno-0-446">446</a></span>
<span class="normal"><a href="#__codelineno-0-447">447</a></span>
<span class="normal"><a href="#__codelineno-0-448">448</a></span>
<span class="normal"><a href="#__codelineno-0-449">449</a></span>
<span class="normal"><a href="#__codelineno-0-450">450</a></span>
<span class="normal"><a href="#__codelineno-0-451">451</a></span>
<span class="normal"><a href="#__codelineno-0-452">452</a></span>
<span class="normal"><a href="#__codelineno-0-453">453</a></span>
<span class="normal"><a href="#__codelineno-0-454">454</a></span>
<span class="normal"><a href="#__codelineno-0-455">455</a></span>
<span class="normal"><a href="#__codelineno-0-456">456</a></span>
<span class="normal"><a href="#__codelineno-0-457">457</a></span>
<span class="normal"><a href="#__codelineno-0-458">458</a></span>
<span class="normal"><a href="#__codelineno-0-459">459</a></span>
<span class="normal"><a href="#__codelineno-0-460">460</a></span>
<span class="normal"><a href="#__codelineno-0-461">461</a></span>
<span class="normal"><a href="#__codelineno-0-462">462</a></span>
<span class="normal"><a href="#__codelineno-0-463">463</a></span>
<span class="normal"><a href="#__codelineno-0-464">464</a></span>
<span class="normal"><a href="#__codelineno-0-465">465</a></span>
<span class="normal"><a href="#__codelineno-0-466">466</a></span>
<span class="normal"><a href="#__codelineno-0-467">467</a></span>
<span class="normal"><a href="#__codelineno-0-468">468</a></span>
<span class="normal"><a href="#__codelineno-0-469">469</a></span>
<span class="normal"><a href="#__codelineno-0-470">470</a></span>
<span class="normal"><a href="#__codelineno-0-471">471</a></span>
<span class="normal"><a href="#__codelineno-0-472">472</a></span>
<span class="normal"><a href="#__codelineno-0-473">473</a></span>
<span class="normal"><a href="#__codelineno-0-474">474</a></span>
<span class="normal"><a href="#__codelineno-0-475">475</a></span>
<span class="normal"><a href="#__codelineno-0-476">476</a></span>
<span class="normal"><a href="#__codelineno-0-477">477</a></span>
<span class="normal"><a href="#__codelineno-0-478">478</a></span>
<span class="normal"><a href="#__codelineno-0-479">479</a></span>
<span class="normal"><a href="#__codelineno-0-480">480</a></span>
<span class="normal"><a href="#__codelineno-0-481">481</a></span>
<span class="normal"><a href="#__codelineno-0-482">482</a></span>
<span class="normal"><a href="#__codelineno-0-483">483</a></span>
<span class="normal"><a href="#__codelineno-0-484">484</a></span>
<span class="normal"><a href="#__codelineno-0-485">485</a></span>
<span class="normal"><a href="#__codelineno-0-486">486</a></span>
<span class="normal"><a href="#__codelineno-0-487">487</a></span>
<span class="normal"><a href="#__codelineno-0-488">488</a></span>
<span class="normal"><a href="#__codelineno-0-489">489</a></span>
<span class="normal"><a href="#__codelineno-0-490">490</a></span>
<span class="normal"><a href="#__codelineno-0-491">491</a></span>
<span class="normal"><a href="#__codelineno-0-492">492</a></span>
<span class="normal"><a href="#__codelineno-0-493">493</a></span>
<span class="normal"><a href="#__codelineno-0-494">494</a></span>
<span class="normal"><a href="#__codelineno-0-495">495</a></span>
<span class="normal"><a href="#__codelineno-0-496">496</a></span>
<span class="normal"><a href="#__codelineno-0-497">497</a></span>
<span class="normal"><a href="#__codelineno-0-498">498</a></span>
<span class="normal"><a href="#__codelineno-0-499">499</a></span>
<span class="normal"><a href="#__codelineno-0-500">500</a></span>
<span class="normal"><a href="#__codelineno-0-501">501</a></span>
<span class="normal"><a href="#__codelineno-0-502">502</a></span>
<span class="normal"><a href="#__codelineno-0-503">503</a></span>
<span class="normal"><a href="#__codelineno-0-504">504</a></span>
<span class="normal"><a href="#__codelineno-0-505">505</a></span>
<span class="normal"><a href="#__codelineno-0-506">506</a></span>
<span class="normal"><a href="#__codelineno-0-507">507</a></span>
<span class="normal"><a href="#__codelineno-0-508">508</a></span>
<span class="normal"><a href="#__codelineno-0-509">509</a></span>
<span class="normal"><a href="#__codelineno-0-510">510</a></span>
<span class="normal"><a href="#__codelineno-0-511">511</a></span>
<span class="normal"><a href="#__codelineno-0-512">512</a></span>
<span class="normal"><a href="#__codelineno-0-513">513</a></span>
<span class="normal"><a href="#__codelineno-0-514">514</a></span>
<span class="normal"><a href="#__codelineno-0-515">515</a></span>
<span class="normal"><a href="#__codelineno-0-516">516</a></span>
<span class="normal"><a href="#__codelineno-0-517">517</a></span>
<span class="normal"><a href="#__codelineno-0-518">518</a></span>
<span class="normal"><a href="#__codelineno-0-519">519</a></span>
<span class="normal"><a href="#__codelineno-0-520">520</a></span>
<span class="normal"><a href="#__codelineno-0-521">521</a></span>
<span class="normal"><a href="#__codelineno-0-522">522</a></span>
<span class="normal"><a href="#__codelineno-0-523">523</a></span>
<span class="normal"><a href="#__codelineno-0-524">524</a></span>
<span class="normal"><a href="#__codelineno-0-525">525</a></span>
<span class="normal"><a href="#__codelineno-0-526">526</a></span>
<span class="normal"><a href="#__codelineno-0-527">527</a></span>
<span class="normal"><a href="#__codelineno-0-528">528</a></span>
<span class="normal"><a href="#__codelineno-0-529">529</a></span>
<span class="normal"><a href="#__codelineno-0-530">530</a></span>
<span class="normal"><a href="#__codelineno-0-531">531</a></span>
<span class="normal"><a href="#__codelineno-0-532">532</a></span>
<span class="normal"><a href="#__codelineno-0-533">533</a></span>
<span class="normal"><a href="#__codelineno-0-534">534</a></span>
<span class="normal"><a href="#__codelineno-0-535">535</a></span>
<span class="normal"><a href="#__codelineno-0-536">536</a></span>
<span class="normal"><a href="#__codelineno-0-537">537</a></span>
<span class="normal"><a href="#__codelineno-0-538">538</a></span>
<span class="normal"><a href="#__codelineno-0-539">539</a></span>
<span class="normal"><a href="#__codelineno-0-540">540</a></span>
<span class="normal"><a href="#__codelineno-0-541">541</a></span>
<span class="normal"><a href="#__codelineno-0-542">542</a></span>
<span class="normal"><a href="#__codelineno-0-543">543</a></span>
<span class="normal"><a href="#__codelineno-0-544">544</a></span>
<span class="normal"><a href="#__codelineno-0-545">545</a></span>
<span class="normal"><a href="#__codelineno-0-546">546</a></span>
<span class="normal"><a href="#__codelineno-0-547">547</a></span>
<span class="normal"><a href="#__codelineno-0-548">548</a></span>
<span class="normal"><a href="#__codelineno-0-549">549</a></span>
<span class="normal"><a href="#__codelineno-0-550">550</a></span>
<span class="normal"><a href="#__codelineno-0-551">551</a></span>
<span class="normal"><a href="#__codelineno-0-552">552</a></span>
<span class="normal"><a href="#__codelineno-0-553">553</a></span>
<span class="normal"><a href="#__codelineno-0-554">554</a></span>
<span class="normal"><a href="#__codelineno-0-555">555</a></span>
<span class="normal"><a href="#__codelineno-0-556">556</a></span>
<span class="normal"><a href="#__codelineno-0-557">557</a></span>
<span class="normal"><a href="#__codelineno-0-558">558</a></span>
<span class="normal"><a href="#__codelineno-0-559">559</a></span>
<span class="normal"><a href="#__codelineno-0-560">560</a></span>
<span class="normal"><a href="#__codelineno-0-561">561</a></span>
<span class="normal"><a href="#__codelineno-0-562">562</a></span>
<span class="normal"><a href="#__codelineno-0-563">563</a></span>
<span class="normal"><a href="#__codelineno-0-564">564</a></span>
<span class="normal"><a href="#__codelineno-0-565">565</a></span>
<span class="normal"><a href="#__codelineno-0-566">566</a></span>
<span class="normal"><a href="#__codelineno-0-567">567</a></span>
<span class="normal"><a href="#__codelineno-0-568">568</a></span>
<span class="normal"><a href="#__codelineno-0-569">569</a></span>
<span class="normal"><a href="#__codelineno-0-570">570</a></span>
<span class="normal"><a href="#__codelineno-0-571">571</a></span>
<span class="normal"><a href="#__codelineno-0-572">572</a></span>
<span class="normal"><a href="#__codelineno-0-573">573</a></span>
<span class="normal"><a href="#__codelineno-0-574">574</a></span>
<span class="normal"><a href="#__codelineno-0-575">575</a></span>
<span class="normal"><a href="#__codelineno-0-576">576</a></span>
<span class="normal"><a href="#__codelineno-0-577">577</a></span>
<span class="normal"><a href="#__codelineno-0-578">578</a></span>
<span class="normal"><a href="#__codelineno-0-579">579</a></span>
<span class="normal"><a href="#__codelineno-0-580">580</a></span>
<span class="normal"><a href="#__codelineno-0-581">581</a></span>
<span class="normal"><a href="#__codelineno-0-582">582</a></span>
<span class="normal"><a href="#__codelineno-0-583">583</a></span>
<span class="normal"><a href="#__codelineno-0-584">584</a></span>
<span class="normal"><a href="#__codelineno-0-585">585</a></span>
<span class="normal"><a href="#__codelineno-0-586">586</a></span>
<span class="normal"><a href="#__codelineno-0-587">587</a></span>
<span class="normal"><a href="#__codelineno-0-588">588</a></span>
<span class="normal"><a href="#__codelineno-0-589">589</a></span>
<span class="normal"><a href="#__codelineno-0-590">590</a></span>
<span class="normal"><a href="#__codelineno-0-591">591</a></span>
<span class="normal"><a href="#__codelineno-0-592">592</a></span>
<span class="normal"><a href="#__codelineno-0-593">593</a></span>
<span class="normal"><a href="#__codelineno-0-594">594</a></span>
<span class="normal"><a href="#__codelineno-0-595">595</a></span>
<span class="normal"><a href="#__codelineno-0-596">596</a></span>
<span class="normal"><a href="#__codelineno-0-597">597</a></span>
<span class="normal"><a href="#__codelineno-0-598">598</a></span>
<span class="normal"><a href="#__codelineno-0-599">599</a></span>
<span class="normal"><a href="#__codelineno-0-600">600</a></span>
<span class="normal"><a href="#__codelineno-0-601">601</a></span>
<span class="normal"><a href="#__codelineno-0-602">602</a></span>
<span class="normal"><a href="#__codelineno-0-603">603</a></span>
<span class="normal"><a href="#__codelineno-0-604">604</a></span>
<span class="normal"><a href="#__codelineno-0-605">605</a></span>
<span class="normal"><a href="#__codelineno-0-606">606</a></span>
<span class="normal"><a href="#__codelineno-0-607">607</a></span>
<span class="normal"><a href="#__codelineno-0-608">608</a></span>
<span class="normal"><a href="#__codelineno-0-609">609</a></span>
<span class="normal"><a href="#__codelineno-0-610">610</a></span>
<span class="normal"><a href="#__codelineno-0-611">611</a></span>
<span class="normal"><a href="#__codelineno-0-612">612</a></span>
<span class="normal"><a href="#__codelineno-0-613">613</a></span>
<span class="normal"><a href="#__codelineno-0-614">614</a></span>
<span class="normal"><a href="#__codelineno-0-615">615</a></span>
<span class="normal"><a href="#__codelineno-0-616">616</a></span>
<span class="normal"><a href="#__codelineno-0-617">617</a></span>
<span class="normal"><a href="#__codelineno-0-618">618</a></span>
<span class="normal"><a href="#__codelineno-0-619">619</a></span>
<span class="normal"><a href="#__codelineno-0-620">620</a></span>
<span class="normal"><a href="#__codelineno-0-621">621</a></span>
<span class="normal"><a href="#__codelineno-0-622">622</a></span>
<span class="normal"><a href="#__codelineno-0-623">623</a></span>
<span class="normal"><a href="#__codelineno-0-624">624</a></span>
<span class="normal"><a href="#__codelineno-0-625">625</a></span>
<span class="normal"><a href="#__codelineno-0-626">626</a></span>
<span class="normal"><a href="#__codelineno-0-627">627</a></span>
<span class="normal"><a href="#__codelineno-0-628">628</a></span>
<span class="normal"><a href="#__codelineno-0-629">629</a></span>
<span class="normal"><a href="#__codelineno-0-630">630</a></span>
<span class="normal"><a href="#__codelineno-0-631">631</a></span>
<span class="normal"><a href="#__codelineno-0-632">632</a></span>
<span class="normal"><a href="#__codelineno-0-633">633</a></span>
<span class="normal"><a href="#__codelineno-0-634">634</a></span>
<span class="normal"><a href="#__codelineno-0-635">635</a></span>
<span class="normal"><a href="#__codelineno-0-636">636</a></span>
<span class="normal"><a href="#__codelineno-0-637">637</a></span>
<span class="normal"><a href="#__codelineno-0-638">638</a></span>
<span class="normal"><a href="#__codelineno-0-639">639</a></span>
<span class="normal"><a href="#__codelineno-0-640">640</a></span>
<span class="normal"><a href="#__codelineno-0-641">641</a></span>
<span class="normal"><a href="#__codelineno-0-642">642</a></span>
<span class="normal"><a href="#__codelineno-0-643">643</a></span>
<span class="normal"><a href="#__codelineno-0-644">644</a></span>
<span class="normal"><a href="#__codelineno-0-645">645</a></span>
<span class="normal"><a href="#__codelineno-0-646">646</a></span>
<span class="normal"><a href="#__codelineno-0-647">647</a></span>
<span class="normal"><a href="#__codelineno-0-648">648</a></span>
<span class="normal"><a href="#__codelineno-0-649">649</a></span>
<span class="normal"><a href="#__codelineno-0-650">650</a></span>
<span class="normal"><a href="#__codelineno-0-651">651</a></span>
<span class="normal"><a href="#__codelineno-0-652">652</a></span>
<span class="normal"><a href="#__codelineno-0-653">653</a></span>
<span class="normal"><a href="#__codelineno-0-654">654</a></span>
<span class="normal"><a href="#__codelineno-0-655">655</a></span>
<span class="normal"><a href="#__codelineno-0-656">656</a></span>
<span class="normal"><a href="#__codelineno-0-657">657</a></span>
<span class="normal"><a href="#__codelineno-0-658">658</a></span>
<span class="normal"><a href="#__codelineno-0-659">659</a></span>
<span class="normal"><a href="#__codelineno-0-660">660</a></span>
<span class="normal"><a href="#__codelineno-0-661">661</a></span>
<span class="normal"><a href="#__codelineno-0-662">662</a></span>
<span class="normal"><a href="#__codelineno-0-663">663</a></span>
<span class="normal"><a href="#__codelineno-0-664">664</a></span>
<span class="normal"><a href="#__codelineno-0-665">665</a></span>
<span class="normal"><a href="#__codelineno-0-666">666</a></span>
<span class="normal"><a href="#__codelineno-0-667">667</a></span>
<span class="normal"><a href="#__codelineno-0-668">668</a></span>
<span class="normal"><a href="#__codelineno-0-669">669</a></span>
<span class="normal"><a href="#__codelineno-0-670">670</a></span>
<span class="normal"><a href="#__codelineno-0-671">671</a></span>
<span class="normal"><a href="#__codelineno-0-672">672</a></span>
<span class="normal"><a href="#__codelineno-0-673">673</a></span>
<span class="normal"><a href="#__codelineno-0-674">674</a></span>
<span class="normal"><a href="#__codelineno-0-675">675</a></span>
<span class="normal"><a href="#__codelineno-0-676">676</a></span>
<span class="normal"><a href="#__codelineno-0-677">677</a></span>
<span class="normal"><a href="#__codelineno-0-678">678</a></span>
<span class="normal"><a href="#__codelineno-0-679">679</a></span>
<span class="normal"><a href="#__codelineno-0-680">680</a></span>
<span class="normal"><a href="#__codelineno-0-681">681</a></span>
<span class="normal"><a href="#__codelineno-0-682">682</a></span>
<span class="normal"><a href="#__codelineno-0-683">683</a></span>
<span class="normal"><a href="#__codelineno-0-684">684</a></span>
<span class="normal"><a href="#__codelineno-0-685">685</a></span>
<span class="normal"><a href="#__codelineno-0-686">686</a></span>
<span class="normal"><a href="#__codelineno-0-687">687</a></span>
<span class="normal"><a href="#__codelineno-0-688">688</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-389" name="__codelineno-0-389"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFileIO</span><span class="p">(</span><span class="n">FileIO</span><span class="p">):</span>
<a id="__codelineno-0-390" name="__codelineno-0-390"></a> <span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span>
<a id="__codelineno-0-391" name="__codelineno-0-391"></a>
<a id="__codelineno-0-392" name="__codelineno-0-392"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">):</span>
<a id="__codelineno-0-393" name="__codelineno-0-393"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span>
<a id="__codelineno-0-394" name="__codelineno-0-394"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">properties</span><span class="o">=</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-395" name="__codelineno-0-395"></a>
<a id="__codelineno-0-396" name="__codelineno-0-396"></a> <span class="nd">@staticmethod</span>
<a id="__codelineno-0-397" name="__codelineno-0-397"></a> <span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span>
<a id="__codelineno-0-398" name="__codelineno-0-398"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return (scheme, netloc, path) for the given location.</span>
<a id="__codelineno-0-399" name="__codelineno-0-399"></a>
<a id="__codelineno-0-400" name="__codelineno-0-400"></a><span class="sd"> Uses DEFAULT_SCHEME and DEFAULT_NETLOC if scheme/netloc are missing.</span>
<a id="__codelineno-0-401" name="__codelineno-0-401"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-402" name="__codelineno-0-402"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-403" name="__codelineno-0-403"></a>
<a id="__codelineno-0-404" name="__codelineno-0-404"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span>
<a id="__codelineno-0-405" name="__codelineno-0-405"></a> <span class="n">default_scheme</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;DEFAULT_SCHEME&quot;</span><span class="p">,</span> <span class="s2">&quot;file&quot;</span><span class="p">)</span>
<a id="__codelineno-0-406" name="__codelineno-0-406"></a> <span class="n">default_netloc</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;DEFAULT_NETLOC&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<a id="__codelineno-0-407" name="__codelineno-0-407"></a> <span class="k">return</span> <span class="n">default_scheme</span><span class="p">,</span> <span class="n">default_netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-408" name="__codelineno-0-408"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;hdfs&quot;</span><span class="p">,</span> <span class="s2">&quot;viewfs&quot;</span><span class="p">):</span>
<a id="__codelineno-0-409" name="__codelineno-0-409"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span>
<a id="__codelineno-0-410" name="__codelineno-0-410"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-411" name="__codelineno-0-411"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">&quot;</span>
<a id="__codelineno-0-412" name="__codelineno-0-412"></a>
<a id="__codelineno-0-413" name="__codelineno-0-413"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-414" name="__codelineno-0-414"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize FileSystem for different scheme.&quot;&quot;&quot;</span>
<a id="__codelineno-0-415" name="__codelineno-0-415"></a> <span class="k">if</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;oss&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-416" name="__codelineno-0-416"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_oss_fs</span><span class="p">()</span>
<a id="__codelineno-0-417" name="__codelineno-0-417"></a>
<a id="__codelineno-0-418" name="__codelineno-0-418"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;s3&quot;</span><span class="p">,</span> <span class="s2">&quot;s3a&quot;</span><span class="p">,</span> <span class="s2">&quot;s3n&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-419" name="__codelineno-0-419"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_s3_fs</span><span class="p">(</span><span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-420" name="__codelineno-0-420"></a>
<a id="__codelineno-0-421" name="__codelineno-0-421"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;hdfs&quot;</span><span class="p">,</span> <span class="s2">&quot;viewfs&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-422" name="__codelineno-0-422"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_hdfs_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-423" name="__codelineno-0-423"></a>
<a id="__codelineno-0-424" name="__codelineno-0-424"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;gs&quot;</span><span class="p">,</span> <span class="s2">&quot;gcs&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-425" name="__codelineno-0-425"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_gcs_fs</span><span class="p">()</span>
<a id="__codelineno-0-426" name="__codelineno-0-426"></a>
<a id="__codelineno-0-427" name="__codelineno-0-427"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;abfs&quot;</span><span class="p">,</span> <span class="s2">&quot;abfss&quot;</span><span class="p">,</span> <span class="s2">&quot;wasb&quot;</span><span class="p">,</span> <span class="s2">&quot;wasbs&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-428" name="__codelineno-0-428"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_azure_fs</span><span class="p">()</span>
<a id="__codelineno-0-429" name="__codelineno-0-429"></a>
<a id="__codelineno-0-430" name="__codelineno-0-430"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;file&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-431" name="__codelineno-0-431"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_local_fs</span><span class="p">()</span>
<a id="__codelineno-0-432" name="__codelineno-0-432"></a>
<a id="__codelineno-0-433" name="__codelineno-0-433"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-434" name="__codelineno-0-434"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unrecognized filesystem type in URI: </span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-435" name="__codelineno-0-435"></a>
<a id="__codelineno-0-436" name="__codelineno-0-436"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_oss_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-437" name="__codelineno-0-437"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span>
<a id="__codelineno-0-438" name="__codelineno-0-438"></a>
<a id="__codelineno-0-439" name="__codelineno-0-439"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
<a id="__codelineno-0-440" name="__codelineno-0-440"></a> <span class="s2">&quot;endpoint_override&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span>
<a id="__codelineno-0-441" name="__codelineno-0-441"></a> <span class="s2">&quot;access_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span>
<a id="__codelineno-0-442" name="__codelineno-0-442"></a> <span class="s2">&quot;secret_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span>
<a id="__codelineno-0-443" name="__codelineno-0-443"></a> <span class="s2">&quot;session_token&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span>
<a id="__codelineno-0-444" name="__codelineno-0-444"></a> <span class="s2">&quot;region&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">),</span>
<a id="__codelineno-0-445" name="__codelineno-0-445"></a> <span class="s2">&quot;force_virtual_addressing&quot;</span><span class="p">:</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">,</span> <span class="kc">True</span><span class="p">),</span>
<a id="__codelineno-0-446" name="__codelineno-0-446"></a> <span class="p">}</span>
<a id="__codelineno-0-447" name="__codelineno-0-447"></a>
<a id="__codelineno-0-448" name="__codelineno-0-448"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span>
<a id="__codelineno-0-449" name="__codelineno-0-449"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;proxy_options&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span>
<a id="__codelineno-0-450" name="__codelineno-0-450"></a>
<a id="__codelineno-0-451" name="__codelineno-0-451"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-452" name="__codelineno-0-452"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;connect_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span>
<a id="__codelineno-0-453" name="__codelineno-0-453"></a>
<a id="__codelineno-0-454" name="__codelineno-0-454"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-455" name="__codelineno-0-455"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;request_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span>
<a id="__codelineno-0-456" name="__codelineno-0-456"></a>
<a id="__codelineno-0-457" name="__codelineno-0-457"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span>
<a id="__codelineno-0-458" name="__codelineno-0-458"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;role_arn&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span>
<a id="__codelineno-0-459" name="__codelineno-0-459"></a>
<a id="__codelineno-0-460" name="__codelineno-0-460"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span>
<a id="__codelineno-0-461" name="__codelineno-0-461"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;session_name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span>
<a id="__codelineno-0-462" name="__codelineno-0-462"></a>
<a id="__codelineno-0-463" name="__codelineno-0-463"></a> <span class="k">if</span> <span class="n">s3_anonymous</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ANONYMOUS</span><span class="p">):</span>
<a id="__codelineno-0-464" name="__codelineno-0-464"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;anonymous&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">strtobool</span><span class="p">(</span><span class="n">s3_anonymous</span><span class="p">)</span>
<a id="__codelineno-0-465" name="__codelineno-0-465"></a>
<a id="__codelineno-0-466" name="__codelineno-0-466"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-467" name="__codelineno-0-467"></a>
<a id="__codelineno-0-468" name="__codelineno-0-468"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_s3_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-469" name="__codelineno-0-469"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span>
<a id="__codelineno-0-470" name="__codelineno-0-470"></a>
<a id="__codelineno-0-471" name="__codelineno-0-471"></a> <span class="n">provided_region</span> <span class="o">=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">)</span>
<a id="__codelineno-0-472" name="__codelineno-0-472"></a>
<a id="__codelineno-0-473" name="__codelineno-0-473"></a> <span class="c1"># Do this when we don&#39;t provide the region at all, or when we explicitly enable it</span>
<a id="__codelineno-0-474" name="__codelineno-0-474"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_RESOLVE_REGION</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<a id="__codelineno-0-475" name="__codelineno-0-475"></a> <span class="c1"># Resolve region from netloc(bucket), fallback to user-provided region</span>
<a id="__codelineno-0-476" name="__codelineno-0-476"></a> <span class="c1"># Only supported by buckets hosted by S3</span>
<a id="__codelineno-0-477" name="__codelineno-0-477"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">_cached_resolve_s3_region</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">netloc</span><span class="p">)</span> <span class="ow">or</span> <span class="n">provided_region</span>
<a id="__codelineno-0-478" name="__codelineno-0-478"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">bucket_region</span> <span class="o">!=</span> <span class="n">provided_region</span><span class="p">:</span>
<a id="__codelineno-0-479" name="__codelineno-0-479"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
<a id="__codelineno-0-480" name="__codelineno-0-480"></a> <span class="sa">f</span><span class="s2">&quot;PyArrow FileIO overriding S3 bucket region for bucket </span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">: &quot;</span>
<a id="__codelineno-0-481" name="__codelineno-0-481"></a> <span class="sa">f</span><span class="s2">&quot;provided region </span><span class="si">{</span><span class="n">provided_region</span><span class="si">}</span><span class="s2">, actual region </span><span class="si">{</span><span class="n">bucket_region</span><span class="si">}</span><span class="s2">&quot;</span>
<a id="__codelineno-0-482" name="__codelineno-0-482"></a> <span class="p">)</span>
<a id="__codelineno-0-483" name="__codelineno-0-483"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-484" name="__codelineno-0-484"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">provided_region</span>
<a id="__codelineno-0-485" name="__codelineno-0-485"></a>
<a id="__codelineno-0-486" name="__codelineno-0-486"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
<a id="__codelineno-0-487" name="__codelineno-0-487"></a> <span class="s2">&quot;endpoint_override&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span>
<a id="__codelineno-0-488" name="__codelineno-0-488"></a> <span class="s2">&quot;access_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span>
<a id="__codelineno-0-489" name="__codelineno-0-489"></a> <span class="s2">&quot;secret_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span>
<a id="__codelineno-0-490" name="__codelineno-0-490"></a> <span class="s2">&quot;session_token&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span>
<a id="__codelineno-0-491" name="__codelineno-0-491"></a> <span class="s2">&quot;region&quot;</span><span class="p">:</span> <span class="n">bucket_region</span><span class="p">,</span>
<a id="__codelineno-0-492" name="__codelineno-0-492"></a> <span class="p">}</span>
<a id="__codelineno-0-493" name="__codelineno-0-493"></a>
<a id="__codelineno-0-494" name="__codelineno-0-494"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span>
<a id="__codelineno-0-495" name="__codelineno-0-495"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;proxy_options&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span>
<a id="__codelineno-0-496" name="__codelineno-0-496"></a>
<a id="__codelineno-0-497" name="__codelineno-0-497"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-498" name="__codelineno-0-498"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;connect_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span>
<a id="__codelineno-0-499" name="__codelineno-0-499"></a>
<a id="__codelineno-0-500" name="__codelineno-0-500"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-501" name="__codelineno-0-501"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;request_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span>
<a id="__codelineno-0-502" name="__codelineno-0-502"></a>
<a id="__codelineno-0-503" name="__codelineno-0-503"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span>
<a id="__codelineno-0-504" name="__codelineno-0-504"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;role_arn&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span>
<a id="__codelineno-0-505" name="__codelineno-0-505"></a>
<a id="__codelineno-0-506" name="__codelineno-0-506"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span>
<a id="__codelineno-0-507" name="__codelineno-0-507"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;session_name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span>
<a id="__codelineno-0-508" name="__codelineno-0-508"></a>
<a id="__codelineno-0-509" name="__codelineno-0-509"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-510" name="__codelineno-0-510"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;force_virtual_addressing&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
<a id="__codelineno-0-511" name="__codelineno-0-511"></a>
<a id="__codelineno-0-512" name="__codelineno-0-512"></a> <span class="k">if</span> <span class="p">(</span><span class="n">retry_strategy_impl</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_RETRY_STRATEGY_IMPL</span><span class="p">))</span> <span class="ow">and</span> <span class="p">(</span>
<a id="__codelineno-0-513" name="__codelineno-0-513"></a> <span class="n">retry_instance</span> <span class="o">:=</span> <span class="n">_import_retry_strategy</span><span class="p">(</span><span class="n">retry_strategy_impl</span><span class="p">)</span>
<a id="__codelineno-0-514" name="__codelineno-0-514"></a> <span class="p">):</span>
<a id="__codelineno-0-515" name="__codelineno-0-515"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;retry_strategy&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">retry_instance</span>
<a id="__codelineno-0-516" name="__codelineno-0-516"></a>
<a id="__codelineno-0-517" name="__codelineno-0-517"></a> <span class="k">if</span> <span class="n">s3_anonymous</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ANONYMOUS</span><span class="p">):</span>
<a id="__codelineno-0-518" name="__codelineno-0-518"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;anonymous&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">strtobool</span><span class="p">(</span><span class="n">s3_anonymous</span><span class="p">)</span>
<a id="__codelineno-0-519" name="__codelineno-0-519"></a>
<a id="__codelineno-0-520" name="__codelineno-0-520"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-521" name="__codelineno-0-521"></a>
<a id="__codelineno-0-522" name="__codelineno-0-522"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_azure_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-523" name="__codelineno-0-523"></a> <span class="c1"># https://arrow.apache.org/docs/python/generated/pyarrow.fs.AzureFileSystem.html</span>
<a id="__codelineno-0-524" name="__codelineno-0-524"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">packaging</span><span class="w"> </span><span class="kn">import</span> <span class="n">version</span>
<a id="__codelineno-0-525" name="__codelineno-0-525"></a>
<a id="__codelineno-0-526" name="__codelineno-0-526"></a> <span class="n">MIN_PYARROW_VERSION_SUPPORTING_AZURE_FS</span> <span class="o">=</span> <span class="s2">&quot;20.0.0&quot;</span>
<a id="__codelineno-0-527" name="__codelineno-0-527"></a> <span class="k">if</span> <span class="n">version</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">__version__</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">version</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">MIN_PYARROW_VERSION_SUPPORTING_AZURE_FS</span><span class="p">):</span>
<a id="__codelineno-0-528" name="__codelineno-0-528"></a> <span class="k">raise</span> <span class="ne">ImportError</span><span class="p">(</span>
<a id="__codelineno-0-529" name="__codelineno-0-529"></a> <span class="sa">f</span><span class="s2">&quot;pyarrow version &gt;= </span><span class="si">{</span><span class="n">MIN_PYARROW_VERSION_SUPPORTING_AZURE_FS</span><span class="si">}</span><span class="s2"> required for AzureFileSystem support, &quot;</span>
<a id="__codelineno-0-530" name="__codelineno-0-530"></a> <span class="sa">f</span><span class="s2">&quot;but found version </span><span class="si">{</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">__version__</span><span class="si">}</span><span class="s2">.&quot;</span>
<a id="__codelineno-0-531" name="__codelineno-0-531"></a> <span class="p">)</span>
<a id="__codelineno-0-532" name="__codelineno-0-532"></a>
<a id="__codelineno-0-533" name="__codelineno-0-533"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">AzureFileSystem</span>
<a id="__codelineno-0-534" name="__codelineno-0-534"></a>
<a id="__codelineno-0-535" name="__codelineno-0-535"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-536" name="__codelineno-0-536"></a>
<a id="__codelineno-0-537" name="__codelineno-0-537"></a> <span class="k">if</span> <span class="n">account_name</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_ACCOUNT_NAME</span><span class="p">):</span>
<a id="__codelineno-0-538" name="__codelineno-0-538"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;account_name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">account_name</span>
<a id="__codelineno-0-539" name="__codelineno-0-539"></a>
<a id="__codelineno-0-540" name="__codelineno-0-540"></a> <span class="k">if</span> <span class="n">account_key</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_ACCOUNT_KEY</span><span class="p">):</span>
<a id="__codelineno-0-541" name="__codelineno-0-541"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;account_key&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">account_key</span>
<a id="__codelineno-0-542" name="__codelineno-0-542"></a>
<a id="__codelineno-0-543" name="__codelineno-0-543"></a> <span class="k">if</span> <span class="n">blob_storage_authority</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_BLOB_STORAGE_AUTHORITY</span><span class="p">):</span>
<a id="__codelineno-0-544" name="__codelineno-0-544"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;blob_storage_authority&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">blob_storage_authority</span>
<a id="__codelineno-0-545" name="__codelineno-0-545"></a>
<a id="__codelineno-0-546" name="__codelineno-0-546"></a> <span class="k">if</span> <span class="n">dfs_storage_authority</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_DFS_STORAGE_AUTHORITY</span><span class="p">):</span>
<a id="__codelineno-0-547" name="__codelineno-0-547"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;dfs_storage_authority&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dfs_storage_authority</span>
<a id="__codelineno-0-548" name="__codelineno-0-548"></a>
<a id="__codelineno-0-549" name="__codelineno-0-549"></a> <span class="k">if</span> <span class="n">blob_storage_scheme</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_BLOB_STORAGE_SCHEME</span><span class="p">):</span>
<a id="__codelineno-0-550" name="__codelineno-0-550"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;blob_storage_scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">blob_storage_scheme</span>
<a id="__codelineno-0-551" name="__codelineno-0-551"></a>
<a id="__codelineno-0-552" name="__codelineno-0-552"></a> <span class="k">if</span> <span class="n">dfs_storage_scheme</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_DFS_STORAGE_SCHEME</span><span class="p">):</span>
<a id="__codelineno-0-553" name="__codelineno-0-553"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;dfs_storage_scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dfs_storage_scheme</span>
<a id="__codelineno-0-554" name="__codelineno-0-554"></a>
<a id="__codelineno-0-555" name="__codelineno-0-555"></a> <span class="k">if</span> <span class="n">sas_token</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_SAS_TOKEN</span><span class="p">):</span>
<a id="__codelineno-0-556" name="__codelineno-0-556"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;sas_token&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">sas_token</span>
<a id="__codelineno-0-557" name="__codelineno-0-557"></a>
<a id="__codelineno-0-558" name="__codelineno-0-558"></a> <span class="k">if</span> <span class="n">client_id</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_CLIENT_ID</span><span class="p">):</span>
<a id="__codelineno-0-559" name="__codelineno-0-559"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;client_id&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">client_id</span>
<a id="__codelineno-0-560" name="__codelineno-0-560"></a> <span class="k">if</span> <span class="n">client_secret</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_CLIENT_SECRET</span><span class="p">):</span>
<a id="__codelineno-0-561" name="__codelineno-0-561"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;client_secret&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">client_secret</span>
<a id="__codelineno-0-562" name="__codelineno-0-562"></a> <span class="k">if</span> <span class="n">tenant_id</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ADLS_TENANT_ID</span><span class="p">):</span>
<a id="__codelineno-0-563" name="__codelineno-0-563"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;tenant_id&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">tenant_id</span>
<a id="__codelineno-0-564" name="__codelineno-0-564"></a>
<a id="__codelineno-0-565" name="__codelineno-0-565"></a> <span class="c1"># Validate that all three are provided together for ClientSecretCredential</span>
<a id="__codelineno-0-566" name="__codelineno-0-566"></a> <span class="n">credential_keys</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;client_id&quot;</span><span class="p">,</span> <span class="s2">&quot;client_secret&quot;</span><span class="p">,</span> <span class="s2">&quot;tenant_id&quot;</span><span class="p">]</span>
<a id="__codelineno-0-567" name="__codelineno-0-567"></a> <span class="n">provided_keys</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">credential_keys</span> <span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">client_kwargs</span><span class="p">]</span>
<a id="__codelineno-0-568" name="__codelineno-0-568"></a> <span class="k">if</span> <span class="n">provided_keys</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">provided_keys</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">credential_keys</span><span class="p">):</span>
<a id="__codelineno-0-569" name="__codelineno-0-569"></a> <span class="n">missing_keys</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">credential_keys</span> <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">client_kwargs</span><span class="p">]</span>
<a id="__codelineno-0-570" name="__codelineno-0-570"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<a id="__codelineno-0-571" name="__codelineno-0-571"></a> <span class="sa">f</span><span class="s2">&quot;client_id, client_secret, and tenant_id must all be provided together &quot;</span>
<a id="__codelineno-0-572" name="__codelineno-0-572"></a> <span class="sa">f</span><span class="s2">&quot;to use ClientSecretCredential for Azure authentication. &quot;</span>
<a id="__codelineno-0-573" name="__codelineno-0-573"></a> <span class="sa">f</span><span class="s2">&quot;Provided: </span><span class="si">{</span><span class="n">provided_keys</span><span class="si">}</span><span class="s2">, Missing: </span><span class="si">{</span><span class="n">missing_keys</span><span class="si">}</span><span class="s2">&quot;</span>
<a id="__codelineno-0-574" name="__codelineno-0-574"></a> <span class="p">)</span>
<a id="__codelineno-0-575" name="__codelineno-0-575"></a>
<a id="__codelineno-0-576" name="__codelineno-0-576"></a> <span class="k">return</span> <span class="n">AzureFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-577" name="__codelineno-0-577"></a>
<a id="__codelineno-0-578" name="__codelineno-0-578"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_hdfs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-579" name="__codelineno-0-579"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">HadoopFileSystem</span>
<a id="__codelineno-0-580" name="__codelineno-0-580"></a>
<a id="__codelineno-0-581" name="__codelineno-0-581"></a> <span class="n">hdfs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-582" name="__codelineno-0-582"></a> <span class="k">if</span> <span class="n">netloc</span><span class="p">:</span>
<a id="__codelineno-0-583" name="__codelineno-0-583"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="o">.</span><span class="n">from_uri</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">://</span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-584" name="__codelineno-0-584"></a> <span class="k">if</span> <span class="n">host</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_HOST</span><span class="p">):</span>
<a id="__codelineno-0-585" name="__codelineno-0-585"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;host&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">host</span>
<a id="__codelineno-0-586" name="__codelineno-0-586"></a> <span class="k">if</span> <span class="n">port</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_PORT</span><span class="p">):</span>
<a id="__codelineno-0-587" name="__codelineno-0-587"></a> <span class="c1"># port should be an integer type</span>
<a id="__codelineno-0-588" name="__codelineno-0-588"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;port&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">port</span><span class="p">)</span>
<a id="__codelineno-0-589" name="__codelineno-0-589"></a> <span class="k">if</span> <span class="n">user</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_USER</span><span class="p">):</span>
<a id="__codelineno-0-590" name="__codelineno-0-590"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;user&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">user</span>
<a id="__codelineno-0-591" name="__codelineno-0-591"></a> <span class="k">if</span> <span class="n">kerb_ticket</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_KERB_TICKET</span><span class="p">):</span>
<a id="__codelineno-0-592" name="__codelineno-0-592"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;kerb_ticket&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kerb_ticket</span>
<a id="__codelineno-0-593" name="__codelineno-0-593"></a>
<a id="__codelineno-0-594" name="__codelineno-0-594"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">hdfs_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-595" name="__codelineno-0-595"></a>
<a id="__codelineno-0-596" name="__codelineno-0-596"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_gcs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-597" name="__codelineno-0-597"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">GcsFileSystem</span>
<a id="__codelineno-0-598" name="__codelineno-0-598"></a>
<a id="__codelineno-0-599" name="__codelineno-0-599"></a> <span class="n">gcs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-600" name="__codelineno-0-600"></a> <span class="k">if</span> <span class="n">access_token</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN</span><span class="p">):</span>
<a id="__codelineno-0-601" name="__codelineno-0-601"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;access_token&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">access_token</span>
<a id="__codelineno-0-602" name="__codelineno-0-602"></a> <span class="k">if</span> <span class="n">expiration</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN_EXPIRES_AT_MS</span><span class="p">):</span>
<a id="__codelineno-0-603" name="__codelineno-0-603"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;credential_token_expiration&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">millis_to_datetime</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">expiration</span><span class="p">))</span>
<a id="__codelineno-0-604" name="__codelineno-0-604"></a> <span class="k">if</span> <span class="n">bucket_location</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_DEFAULT_LOCATION</span><span class="p">):</span>
<a id="__codelineno-0-605" name="__codelineno-0-605"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;default_bucket_location&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">bucket_location</span>
<a id="__codelineno-0-606" name="__codelineno-0-606"></a> <span class="k">if</span> <span class="n">endpoint</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_SERVICE_HOST</span><span class="p">):</span>
<a id="__codelineno-0-607" name="__codelineno-0-607"></a> <span class="n">url_parts</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">endpoint</span><span class="p">)</span>
<a id="__codelineno-0-608" name="__codelineno-0-608"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">scheme</span>
<a id="__codelineno-0-609" name="__codelineno-0-609"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;endpoint_override&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">netloc</span>
<a id="__codelineno-0-610" name="__codelineno-0-610"></a>
<a id="__codelineno-0-611" name="__codelineno-0-611"></a> <span class="k">return</span> <span class="n">GcsFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">gcs_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-612" name="__codelineno-0-612"></a>
<a id="__codelineno-0-613" name="__codelineno-0-613"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_local_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-614" name="__codelineno-0-614"></a> <span class="k">return</span> <span class="n">PyArrowLocalFileSystem</span><span class="p">()</span>
<a id="__codelineno-0-615" name="__codelineno-0-615"></a>
<a id="__codelineno-0-616" name="__codelineno-0-616"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-617" name="__codelineno-0-617"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to read bytes from the file at the given location.</span>
<a id="__codelineno-0-618" name="__codelineno-0-618"></a>
<a id="__codelineno-0-619" name="__codelineno-0-619"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-620" name="__codelineno-0-620"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-621" name="__codelineno-0-621"></a>
<a id="__codelineno-0-622" name="__codelineno-0-622"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-623" name="__codelineno-0-623"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-624" name="__codelineno-0-624"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-625" name="__codelineno-0-625"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-626" name="__codelineno-0-626"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-627" name="__codelineno-0-627"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-628" name="__codelineno-0-628"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-629" name="__codelineno-0-629"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-630" name="__codelineno-0-630"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-631" name="__codelineno-0-631"></a> <span class="p">)</span>
<a id="__codelineno-0-632" name="__codelineno-0-632"></a>
<a id="__codelineno-0-633" name="__codelineno-0-633"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-634" name="__codelineno-0-634"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to write bytes to the file at the given location.</span>
<a id="__codelineno-0-635" name="__codelineno-0-635"></a>
<a id="__codelineno-0-636" name="__codelineno-0-636"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-637" name="__codelineno-0-637"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-638" name="__codelineno-0-638"></a>
<a id="__codelineno-0-639" name="__codelineno-0-639"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-640" name="__codelineno-0-640"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-641" name="__codelineno-0-641"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-642" name="__codelineno-0-642"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-643" name="__codelineno-0-643"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-644" name="__codelineno-0-644"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-645" name="__codelineno-0-645"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-646" name="__codelineno-0-646"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-647" name="__codelineno-0-647"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-648" name="__codelineno-0-648"></a> <span class="p">)</span>
<a id="__codelineno-0-649" name="__codelineno-0-649"></a>
<a id="__codelineno-0-650" name="__codelineno-0-650"></a> <span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-651" name="__codelineno-0-651"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Delete the file at the given location.</span>
<a id="__codelineno-0-652" name="__codelineno-0-652"></a>
<a id="__codelineno-0-653" name="__codelineno-0-653"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-654" name="__codelineno-0-654"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span>
<a id="__codelineno-0-655" name="__codelineno-0-655"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span>
<a id="__codelineno-0-656" name="__codelineno-0-656"></a>
<a id="__codelineno-0-657" name="__codelineno-0-657"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-658" name="__codelineno-0-658"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span>
<a id="__codelineno-0-659" name="__codelineno-0-659"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-660" name="__codelineno-0-660"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-661" name="__codelineno-0-661"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-662" name="__codelineno-0-662"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span>
<a id="__codelineno-0-663" name="__codelineno-0-663"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-664" name="__codelineno-0-664"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-665" name="__codelineno-0-665"></a>
<a id="__codelineno-0-666" name="__codelineno-0-666"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-667" name="__codelineno-0-667"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<a id="__codelineno-0-668" name="__codelineno-0-668"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-669" name="__codelineno-0-669"></a> <span class="k">raise</span>
<a id="__codelineno-0-670" name="__codelineno-0-670"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-671" name="__codelineno-0-671"></a> <span class="k">raise</span>
<a id="__codelineno-0-672" name="__codelineno-0-672"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-673" name="__codelineno-0-673"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-674" name="__codelineno-0-674"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-675" name="__codelineno-0-675"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-676" name="__codelineno-0-676"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-677" name="__codelineno-0-677"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-678" name="__codelineno-0-678"></a>
<a id="__codelineno-0-679" name="__codelineno-0-679"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<a id="__codelineno-0-680" name="__codelineno-0-680"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a dictionary of the PyArrowFileIO fields used when pickling.&quot;&quot;&quot;</span>
<a id="__codelineno-0-681" name="__codelineno-0-681"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
<a id="__codelineno-0-682" name="__codelineno-0-682"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">&quot;fs_by_scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<a id="__codelineno-0-683" name="__codelineno-0-683"></a> <span class="k">return</span> <span class="n">fileio_copy</span>
<a id="__codelineno-0-684" name="__codelineno-0-684"></a>
<a id="__codelineno-0-685" name="__codelineno-0-685"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-686" name="__codelineno-0-686"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Deserialize the state into a PyArrowFileIO instance.&quot;&quot;&quot;</span>
<a id="__codelineno-0-687" name="__codelineno-0-687"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span>
<a id="__codelineno-0-688" name="__codelineno-0-688"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="doc doc-heading">
<code class="highlight language-python"><span class="n">__getstate__</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Create a dictionary of the PyArrowFileIO fields used when pickling.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-679">679</a></span>
<span class="normal"><a href="#__codelineno-0-680">680</a></span>
<span class="normal"><a href="#__codelineno-0-681">681</a></span>
<span class="normal"><a href="#__codelineno-0-682">682</a></span>
<span class="normal"><a href="#__codelineno-0-683">683</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-679" name="__codelineno-0-679"></a><span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<a id="__codelineno-0-680" name="__codelineno-0-680"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a dictionary of the PyArrowFileIO fields used when pickling.&quot;&quot;&quot;</span>
<a id="__codelineno-0-681" name="__codelineno-0-681"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
<a id="__codelineno-0-682" name="__codelineno-0-682"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">&quot;fs_by_scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<a id="__codelineno-0-683" name="__codelineno-0-683"></a> <span class="k">return</span> <span class="n">fileio_copy</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="doc doc-heading">
<code class="highlight language-python"><span class="n">__setstate__</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Deserialize the state into a PyArrowFileIO instance.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-685">685</a></span>
<span class="normal"><a href="#__codelineno-0-686">686</a></span>
<span class="normal"><a href="#__codelineno-0-687">687</a></span>
<span class="normal"><a href="#__codelineno-0-688">688</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-685" name="__codelineno-0-685"></a><span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-686" name="__codelineno-0-686"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Deserialize the state into a PyArrowFileIO instance.&quot;&quot;&quot;</span>
<a id="__codelineno-0-687" name="__codelineno-0-687"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span>
<a id="__codelineno-0-688" name="__codelineno-0-688"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="doc doc-heading">
<code class="highlight language-python"><span class="n">delete</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Delete the file at the given location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="typing.Union">Union</span>[<span title="str">str</span>, <a class="autorefs autorefs-internal" title="InputFile (pyiceberg.io.InputFile)" href="../#pyiceberg.io.InputFile">InputFile</a>, <a class="autorefs autorefs-internal" title="OutputFile (pyiceberg.io.OutputFile)" href="../#pyiceberg.io.OutputFile">OutputFile</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The URI to the file--if an InputFile instance or an OutputFile instance is provided,
the location attribute for that instance is used as the location to delete.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="FileNotFoundError">FileNotFoundError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When the file at the provided location does not exist.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="PermissionError">PermissionError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file at the provided location cannot be accessed due to a permission error such as
an AWS error code 15.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-650">650</a></span>
<span class="normal"><a href="#__codelineno-0-651">651</a></span>
<span class="normal"><a href="#__codelineno-0-652">652</a></span>
<span class="normal"><a href="#__codelineno-0-653">653</a></span>
<span class="normal"><a href="#__codelineno-0-654">654</a></span>
<span class="normal"><a href="#__codelineno-0-655">655</a></span>
<span class="normal"><a href="#__codelineno-0-656">656</a></span>
<span class="normal"><a href="#__codelineno-0-657">657</a></span>
<span class="normal"><a href="#__codelineno-0-658">658</a></span>
<span class="normal"><a href="#__codelineno-0-659">659</a></span>
<span class="normal"><a href="#__codelineno-0-660">660</a></span>
<span class="normal"><a href="#__codelineno-0-661">661</a></span>
<span class="normal"><a href="#__codelineno-0-662">662</a></span>
<span class="normal"><a href="#__codelineno-0-663">663</a></span>
<span class="normal"><a href="#__codelineno-0-664">664</a></span>
<span class="normal"><a href="#__codelineno-0-665">665</a></span>
<span class="normal"><a href="#__codelineno-0-666">666</a></span>
<span class="normal"><a href="#__codelineno-0-667">667</a></span>
<span class="normal"><a href="#__codelineno-0-668">668</a></span>
<span class="normal"><a href="#__codelineno-0-669">669</a></span>
<span class="normal"><a href="#__codelineno-0-670">670</a></span>
<span class="normal"><a href="#__codelineno-0-671">671</a></span>
<span class="normal"><a href="#__codelineno-0-672">672</a></span>
<span class="normal"><a href="#__codelineno-0-673">673</a></span>
<span class="normal"><a href="#__codelineno-0-674">674</a></span>
<span class="normal"><a href="#__codelineno-0-675">675</a></span>
<span class="normal"><a href="#__codelineno-0-676">676</a></span>
<span class="normal"><a href="#__codelineno-0-677">677</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-650" name="__codelineno-0-650"></a><span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-651" name="__codelineno-0-651"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Delete the file at the given location.</span>
<a id="__codelineno-0-652" name="__codelineno-0-652"></a>
<a id="__codelineno-0-653" name="__codelineno-0-653"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-654" name="__codelineno-0-654"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span>
<a id="__codelineno-0-655" name="__codelineno-0-655"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span>
<a id="__codelineno-0-656" name="__codelineno-0-656"></a>
<a id="__codelineno-0-657" name="__codelineno-0-657"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-658" name="__codelineno-0-658"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span>
<a id="__codelineno-0-659" name="__codelineno-0-659"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-660" name="__codelineno-0-660"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-661" name="__codelineno-0-661"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-662" name="__codelineno-0-662"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span>
<a id="__codelineno-0-663" name="__codelineno-0-663"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-664" name="__codelineno-0-664"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-665" name="__codelineno-0-665"></a>
<a id="__codelineno-0-666" name="__codelineno-0-666"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-667" name="__codelineno-0-667"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<a id="__codelineno-0-668" name="__codelineno-0-668"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-669" name="__codelineno-0-669"></a> <span class="k">raise</span>
<a id="__codelineno-0-670" name="__codelineno-0-670"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-671" name="__codelineno-0-671"></a> <span class="k">raise</span>
<a id="__codelineno-0-672" name="__codelineno-0-672"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-673" name="__codelineno-0-673"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-674" name="__codelineno-0-674"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-675" name="__codelineno-0-675"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-676" name="__codelineno-0-676"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-677" name="__codelineno-0-677"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="doc doc-heading">
<code class="highlight language-python"><span class="n">new_input</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Get a PyArrowFile instance to read bytes from the file at the given location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="str">str</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A URI or a path to a local file.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>PyArrowFile</code></td> <td>
<code><a class="autorefs autorefs-internal" title="PyArrowFile (pyiceberg.io.pyarrow.PyArrowFile)" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code>
</td>
<td>
<div class="doc-md-description">
<p>A PyArrowFile instance for the given location.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-616">616</a></span>
<span class="normal"><a href="#__codelineno-0-617">617</a></span>
<span class="normal"><a href="#__codelineno-0-618">618</a></span>
<span class="normal"><a href="#__codelineno-0-619">619</a></span>
<span class="normal"><a href="#__codelineno-0-620">620</a></span>
<span class="normal"><a href="#__codelineno-0-621">621</a></span>
<span class="normal"><a href="#__codelineno-0-622">622</a></span>
<span class="normal"><a href="#__codelineno-0-623">623</a></span>
<span class="normal"><a href="#__codelineno-0-624">624</a></span>
<span class="normal"><a href="#__codelineno-0-625">625</a></span>
<span class="normal"><a href="#__codelineno-0-626">626</a></span>
<span class="normal"><a href="#__codelineno-0-627">627</a></span>
<span class="normal"><a href="#__codelineno-0-628">628</a></span>
<span class="normal"><a href="#__codelineno-0-629">629</a></span>
<span class="normal"><a href="#__codelineno-0-630">630</a></span>
<span class="normal"><a href="#__codelineno-0-631">631</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-616" name="__codelineno-0-616"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-617" name="__codelineno-0-617"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to read bytes from the file at the given location.</span>
<a id="__codelineno-0-618" name="__codelineno-0-618"></a>
<a id="__codelineno-0-619" name="__codelineno-0-619"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-620" name="__codelineno-0-620"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-621" name="__codelineno-0-621"></a>
<a id="__codelineno-0-622" name="__codelineno-0-622"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-623" name="__codelineno-0-623"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-624" name="__codelineno-0-624"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-625" name="__codelineno-0-625"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-626" name="__codelineno-0-626"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-627" name="__codelineno-0-627"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-628" name="__codelineno-0-628"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-629" name="__codelineno-0-629"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-630" name="__codelineno-0-630"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-631" name="__codelineno-0-631"></a> <span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="doc doc-heading">
<code class="highlight language-python"><span class="n">new_output</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Get a PyArrowFile instance to write bytes to the file at the given location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="str">str</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A URI or a path to a local file.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>PyArrowFile</code></td> <td>
<code><a class="autorefs autorefs-internal" title="PyArrowFile (pyiceberg.io.pyarrow.PyArrowFile)" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code>
</td>
<td>
<div class="doc-md-description">
<p>A PyArrowFile instance for the given location.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-633">633</a></span>
<span class="normal"><a href="#__codelineno-0-634">634</a></span>
<span class="normal"><a href="#__codelineno-0-635">635</a></span>
<span class="normal"><a href="#__codelineno-0-636">636</a></span>
<span class="normal"><a href="#__codelineno-0-637">637</a></span>
<span class="normal"><a href="#__codelineno-0-638">638</a></span>
<span class="normal"><a href="#__codelineno-0-639">639</a></span>
<span class="normal"><a href="#__codelineno-0-640">640</a></span>
<span class="normal"><a href="#__codelineno-0-641">641</a></span>
<span class="normal"><a href="#__codelineno-0-642">642</a></span>
<span class="normal"><a href="#__codelineno-0-643">643</a></span>
<span class="normal"><a href="#__codelineno-0-644">644</a></span>
<span class="normal"><a href="#__codelineno-0-645">645</a></span>
<span class="normal"><a href="#__codelineno-0-646">646</a></span>
<span class="normal"><a href="#__codelineno-0-647">647</a></span>
<span class="normal"><a href="#__codelineno-0-648">648</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-633" name="__codelineno-0-633"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-634" name="__codelineno-0-634"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to write bytes to the file at the given location.</span>
<a id="__codelineno-0-635" name="__codelineno-0-635"></a>
<a id="__codelineno-0-636" name="__codelineno-0-636"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-637" name="__codelineno-0-637"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-638" name="__codelineno-0-638"></a>
<a id="__codelineno-0-639" name="__codelineno-0-639"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-640" name="__codelineno-0-640"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-641" name="__codelineno-0-641"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-642" name="__codelineno-0-642"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-643" name="__codelineno-0-643"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-644" name="__codelineno-0-644"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-645" name="__codelineno-0-645"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-646" name="__codelineno-0-646"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-647" name="__codelineno-0-647"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-648" name="__codelineno-0-648"></a> <span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="doc doc-heading">
<code class="highlight language-python"><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="n">properties</span><span class="o">=</span><span class="n">EMPTY_DICT</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Return (scheme, netloc, path) for the given location.</p>
<p>Uses DEFAULT_SCHEME and DEFAULT_NETLOC if scheme/netloc are missing.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-396">396</a></span>
<span class="normal"><a href="#__codelineno-0-397">397</a></span>
<span class="normal"><a href="#__codelineno-0-398">398</a></span>
<span class="normal"><a href="#__codelineno-0-399">399</a></span>
<span class="normal"><a href="#__codelineno-0-400">400</a></span>
<span class="normal"><a href="#__codelineno-0-401">401</a></span>
<span class="normal"><a href="#__codelineno-0-402">402</a></span>
<span class="normal"><a href="#__codelineno-0-403">403</a></span>
<span class="normal"><a href="#__codelineno-0-404">404</a></span>
<span class="normal"><a href="#__codelineno-0-405">405</a></span>
<span class="normal"><a href="#__codelineno-0-406">406</a></span>
<span class="normal"><a href="#__codelineno-0-407">407</a></span>
<span class="normal"><a href="#__codelineno-0-408">408</a></span>
<span class="normal"><a href="#__codelineno-0-409">409</a></span>
<span class="normal"><a href="#__codelineno-0-410">410</a></span>
<span class="normal"><a href="#__codelineno-0-411">411</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-396" name="__codelineno-0-396"></a><span class="nd">@staticmethod</span>
<a id="__codelineno-0-397" name="__codelineno-0-397"></a><span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span>
<a id="__codelineno-0-398" name="__codelineno-0-398"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return (scheme, netloc, path) for the given location.</span>
<a id="__codelineno-0-399" name="__codelineno-0-399"></a>
<a id="__codelineno-0-400" name="__codelineno-0-400"></a><span class="sd"> Uses DEFAULT_SCHEME and DEFAULT_NETLOC if scheme/netloc are missing.</span>
<a id="__codelineno-0-401" name="__codelineno-0-401"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-402" name="__codelineno-0-402"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-403" name="__codelineno-0-403"></a>
<a id="__codelineno-0-404" name="__codelineno-0-404"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span>
<a id="__codelineno-0-405" name="__codelineno-0-405"></a> <span class="n">default_scheme</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;DEFAULT_SCHEME&quot;</span><span class="p">,</span> <span class="s2">&quot;file&quot;</span><span class="p">)</span>
<a id="__codelineno-0-406" name="__codelineno-0-406"></a> <span class="n">default_netloc</span> <span class="o">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;DEFAULT_NETLOC&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<a id="__codelineno-0-407" name="__codelineno-0-407"></a> <span class="k">return</span> <span class="n">default_scheme</span><span class="p">,</span> <span class="n">default_netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-408" name="__codelineno-0-408"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;hdfs&quot;</span><span class="p">,</span> <span class="s2">&quot;viewfs&quot;</span><span class="p">):</span>
<a id="__codelineno-0-409" name="__codelineno-0-409"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span>
<a id="__codelineno-0-410" name="__codelineno-0-410"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-411" name="__codelineno-0-411"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="doc doc-heading">
<code>PyArrowSchemaVisitor</code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><span title="typing.Generic">Generic</span>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code>, <code><span title="abc.ABC">ABC</span></code></p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1204">1204</a></span>
<span class="normal"><a href="#__codelineno-0-1205">1205</a></span>
<span class="normal"><a href="#__codelineno-0-1206">1206</a></span>
<span class="normal"><a href="#__codelineno-0-1207">1207</a></span>
<span class="normal"><a href="#__codelineno-0-1208">1208</a></span>
<span class="normal"><a href="#__codelineno-0-1209">1209</a></span>
<span class="normal"><a href="#__codelineno-0-1210">1210</a></span>
<span class="normal"><a href="#__codelineno-0-1211">1211</a></span>
<span class="normal"><a href="#__codelineno-0-1212">1212</a></span>
<span class="normal"><a href="#__codelineno-0-1213">1213</a></span>
<span class="normal"><a href="#__codelineno-0-1214">1214</a></span>
<span class="normal"><a href="#__codelineno-0-1215">1215</a></span>
<span class="normal"><a href="#__codelineno-0-1216">1216</a></span>
<span class="normal"><a href="#__codelineno-0-1217">1217</a></span>
<span class="normal"><a href="#__codelineno-0-1218">1218</a></span>
<span class="normal"><a href="#__codelineno-0-1219">1219</a></span>
<span class="normal"><a href="#__codelineno-0-1220">1220</a></span>
<span class="normal"><a href="#__codelineno-0-1221">1221</a></span>
<span class="normal"><a href="#__codelineno-0-1222">1222</a></span>
<span class="normal"><a href="#__codelineno-0-1223">1223</a></span>
<span class="normal"><a href="#__codelineno-0-1224">1224</a></span>
<span class="normal"><a href="#__codelineno-0-1225">1225</a></span>
<span class="normal"><a href="#__codelineno-0-1226">1226</a></span>
<span class="normal"><a href="#__codelineno-0-1227">1227</a></span>
<span class="normal"><a href="#__codelineno-0-1228">1228</a></span>
<span class="normal"><a href="#__codelineno-0-1229">1229</a></span>
<span class="normal"><a href="#__codelineno-0-1230">1230</a></span>
<span class="normal"><a href="#__codelineno-0-1231">1231</a></span>
<span class="normal"><a href="#__codelineno-0-1232">1232</a></span>
<span class="normal"><a href="#__codelineno-0-1233">1233</a></span>
<span class="normal"><a href="#__codelineno-0-1234">1234</a></span>
<span class="normal"><a href="#__codelineno-0-1235">1235</a></span>
<span class="normal"><a href="#__codelineno-0-1236">1236</a></span>
<span class="normal"><a href="#__codelineno-0-1237">1237</a></span>
<span class="normal"><a href="#__codelineno-0-1238">1238</a></span>
<span class="normal"><a href="#__codelineno-0-1239">1239</a></span>
<span class="normal"><a href="#__codelineno-0-1240">1240</a></span>
<span class="normal"><a href="#__codelineno-0-1241">1241</a></span>
<span class="normal"><a href="#__codelineno-0-1242">1242</a></span>
<span class="normal"><a href="#__codelineno-0-1243">1243</a></span>
<span class="normal"><a href="#__codelineno-0-1244">1244</a></span>
<span class="normal"><a href="#__codelineno-0-1245">1245</a></span>
<span class="normal"><a href="#__codelineno-0-1246">1246</a></span>
<span class="normal"><a href="#__codelineno-0-1247">1247</a></span>
<span class="normal"><a href="#__codelineno-0-1248">1248</a></span>
<span class="normal"><a href="#__codelineno-0-1249">1249</a></span>
<span class="normal"><a href="#__codelineno-0-1250">1250</a></span>
<span class="normal"><a href="#__codelineno-0-1251">1251</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1204" name="__codelineno-0-1204"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowSchemaVisitor</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">ABC</span><span class="p">):</span>
<a id="__codelineno-0-1205" name="__codelineno-0-1205"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1206" name="__codelineno-0-1206"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a field.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1207" name="__codelineno-0-1207"></a>
<a id="__codelineno-0-1208" name="__codelineno-0-1208"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1209" name="__codelineno-0-1209"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a field.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1210" name="__codelineno-0-1210"></a>
<a id="__codelineno-0-1211" name="__codelineno-0-1211"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1212" name="__codelineno-0-1212"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting an element within a ListType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1213" name="__codelineno-0-1213"></a>
<a id="__codelineno-0-1214" name="__codelineno-0-1214"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1215" name="__codelineno-0-1215"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting an element within a ListType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1216" name="__codelineno-0-1216"></a>
<a id="__codelineno-0-1217" name="__codelineno-0-1217"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1218" name="__codelineno-0-1218"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a key within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1219" name="__codelineno-0-1219"></a>
<a id="__codelineno-0-1220" name="__codelineno-0-1220"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1221" name="__codelineno-0-1221"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a key within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1222" name="__codelineno-0-1222"></a>
<a id="__codelineno-0-1223" name="__codelineno-0-1223"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1224" name="__codelineno-0-1224"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a value within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1225" name="__codelineno-0-1225"></a>
<a id="__codelineno-0-1226" name="__codelineno-0-1226"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1227" name="__codelineno-0-1227"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a value within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1228" name="__codelineno-0-1228"></a>
<a id="__codelineno-0-1229" name="__codelineno-0-1229"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1230" name="__codelineno-0-1230"></a> <span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1231" name="__codelineno-0-1231"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a schema.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1232" name="__codelineno-0-1232"></a>
<a id="__codelineno-0-1233" name="__codelineno-0-1233"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1234" name="__codelineno-0-1234"></a> <span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1235" name="__codelineno-0-1235"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a struct.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1236" name="__codelineno-0-1236"></a>
<a id="__codelineno-0-1237" name="__codelineno-0-1237"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1238" name="__codelineno-0-1238"></a> <span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1239" name="__codelineno-0-1239"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a field.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1240" name="__codelineno-0-1240"></a>
<a id="__codelineno-0-1241" name="__codelineno-0-1241"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1242" name="__codelineno-0-1242"></a> <span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1243" name="__codelineno-0-1243"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a list.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1244" name="__codelineno-0-1244"></a>
<a id="__codelineno-0-1245" name="__codelineno-0-1245"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1246" name="__codelineno-0-1246"></a> <span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1247" name="__codelineno-0-1247"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a map.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1248" name="__codelineno-0-1248"></a>
<a id="__codelineno-0-1249" name="__codelineno-0-1249"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1250" name="__codelineno-0-1250"></a> <span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1251" name="__codelineno-0-1251"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a primitive type.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting a field.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1208">1208</a></span>
<span class="normal"><a href="#__codelineno-0-1209">1209</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1208" name="__codelineno-0-1208"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1209" name="__codelineno-0-1209"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a field.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting an element within a ListType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1214">1214</a></span>
<span class="normal"><a href="#__codelineno-0-1215">1215</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1214" name="__codelineno-0-1214"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1215" name="__codelineno-0-1215"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting an element within a ListType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting a key within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1220">1220</a></span>
<span class="normal"><a href="#__codelineno-0-1221">1221</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1220" name="__codelineno-0-1220"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1221" name="__codelineno-0-1221"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a key within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting a value within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1226">1226</a></span>
<span class="normal"><a href="#__codelineno-0-1227">1227</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1226" name="__codelineno-0-1226"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1227" name="__codelineno-0-1227"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a value within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting a field.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1205">1205</a></span>
<span class="normal"><a href="#__codelineno-0-1206">1206</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1205" name="__codelineno-0-1205"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1206" name="__codelineno-0-1206"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a field.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting an element within a ListType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1211">1211</a></span>
<span class="normal"><a href="#__codelineno-0-1212">1212</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1211" name="__codelineno-0-1211"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1212" name="__codelineno-0-1212"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting an element within a ListType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting a key within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1217">1217</a></span>
<span class="normal"><a href="#__codelineno-0-1218">1218</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1217" name="__codelineno-0-1217"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1218" name="__codelineno-0-1218"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a key within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting a value within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1223">1223</a></span>
<span class="normal"><a href="#__codelineno-0-1224">1224</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1223" name="__codelineno-0-1223"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1224" name="__codelineno-0-1224"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a value within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="doc doc-heading">
<code class="highlight language-python"><span class="n">field</span><span class="p">(</span><span class="n">field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a field.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1237">1237</a></span>
<span class="normal"><a href="#__codelineno-0-1238">1238</a></span>
<span class="normal"><a href="#__codelineno-0-1239">1239</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1237" name="__codelineno-0-1237"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1238" name="__codelineno-0-1238"></a><span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1239" name="__codelineno-0-1239"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a field.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="doc doc-heading">
<code class="highlight language-python"><span class="nb">list</span><span class="p">(</span><span class="n">list_type</span><span class="p">,</span> <span class="n">element_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a list.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1241">1241</a></span>
<span class="normal"><a href="#__codelineno-0-1242">1242</a></span>
<span class="normal"><a href="#__codelineno-0-1243">1243</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1241" name="__codelineno-0-1241"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1242" name="__codelineno-0-1242"></a><span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1243" name="__codelineno-0-1243"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a list.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="doc doc-heading">
<code class="highlight language-python"><span class="nb">map</span><span class="p">(</span><span class="n">map_type</span><span class="p">,</span> <span class="n">key_result</span><span class="p">,</span> <span class="n">value_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a map.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1245">1245</a></span>
<span class="normal"><a href="#__codelineno-0-1246">1246</a></span>
<span class="normal"><a href="#__codelineno-0-1247">1247</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1245" name="__codelineno-0-1245"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1246" name="__codelineno-0-1246"></a><span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1247" name="__codelineno-0-1247"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a map.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="doc doc-heading">
<code class="highlight language-python"><span class="n">primitive</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a primitive type.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1249">1249</a></span>
<span class="normal"><a href="#__codelineno-0-1250">1250</a></span>
<span class="normal"><a href="#__codelineno-0-1251">1251</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1249" name="__codelineno-0-1249"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1250" name="__codelineno-0-1250"></a><span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1251" name="__codelineno-0-1251"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a primitive type.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="doc doc-heading">
<code class="highlight language-python"><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a schema.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1229">1229</a></span>
<span class="normal"><a href="#__codelineno-0-1230">1230</a></span>
<span class="normal"><a href="#__codelineno-0-1231">1231</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1229" name="__codelineno-0-1229"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1230" name="__codelineno-0-1230"></a><span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1231" name="__codelineno-0-1231"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a schema.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="doc doc-heading">
<code class="highlight language-python"><span class="n">struct</span><span class="p">(</span><span class="n">struct</span><span class="p">,</span> <span class="n">field_results</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a struct.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1233">1233</a></span>
<span class="normal"><a href="#__codelineno-0-1234">1234</a></span>
<span class="normal"><a href="#__codelineno-0-1235">1235</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1233" name="__codelineno-0-1233"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1234" name="__codelineno-0-1234"></a><span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1235" name="__codelineno-0-1235"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a struct.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="doc doc-heading">
<code>UnsupportedPyArrowTypeException</code>
<a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><span title="Exception">Exception</span></code></p>
<p>Cannot convert PyArrow type to corresponding Iceberg type.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-241">241</a></span>
<span class="normal"><a href="#__codelineno-0-242">242</a></span>
<span class="normal"><a href="#__codelineno-0-243">243</a></span>
<span class="normal"><a href="#__codelineno-0-244">244</a></span>
<span class="normal"><a href="#__codelineno-0-245">245</a></span>
<span class="normal"><a href="#__codelineno-0-246">246</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="k">class</span><span class="w"> </span><span class="nc">UnsupportedPyArrowTypeException</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
<a id="__codelineno-0-242" name="__codelineno-0-242"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Cannot convert PyArrow type to corresponding Iceberg type.&quot;&quot;&quot;</span>
<a id="__codelineno-0-243" name="__codelineno-0-243"></a>
<a id="__codelineno-0-244" name="__codelineno-0-244"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span>
<a id="__codelineno-0-245" name="__codelineno-0-245"></a> <span class="bp">self</span><span class="o">.</span><span class="n">field</span> <span class="o">=</span> <span class="n">field</span>
<a id="__codelineno-0-246" name="__codelineno-0-246"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
</div>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.compute_statistics_plan" class="doc doc-heading">
<code class="highlight language-python"><span class="n">compute_statistics_plan</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Compute the statistics plan for all columns.</p>
<p>The resulting list is assumed to have the same length and same order as the columns in the pyarrow table.
This allows the list to map from the column index to the Iceberg column ID.
For each element, the desired metrics collection that was provided by the user in the configuration
is computed and then adjusted according to the data type of the column. For nested columns the minimum
and maximum values are not computed. And truncation is only applied to text of binary strings.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>table_properties</code>
</td>
<td>
<code>from pyiceberg.table.metadata.TableMetadata</code>
</td>
<td>
<div class="doc-md-description">
<p>The Iceberg table metadata properties.
They are required to compute the mapping of column position to iceberg schema type id. It's also
used to set the mode for column metrics collection</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2260">2260</a></span>
<span class="normal"><a href="#__codelineno-0-2261">2261</a></span>
<span class="normal"><a href="#__codelineno-0-2262">2262</a></span>
<span class="normal"><a href="#__codelineno-0-2263">2263</a></span>
<span class="normal"><a href="#__codelineno-0-2264">2264</a></span>
<span class="normal"><a href="#__codelineno-0-2265">2265</a></span>
<span class="normal"><a href="#__codelineno-0-2266">2266</a></span>
<span class="normal"><a href="#__codelineno-0-2267">2267</a></span>
<span class="normal"><a href="#__codelineno-0-2268">2268</a></span>
<span class="normal"><a href="#__codelineno-0-2269">2269</a></span>
<span class="normal"><a href="#__codelineno-0-2270">2270</a></span>
<span class="normal"><a href="#__codelineno-0-2271">2271</a></span>
<span class="normal"><a href="#__codelineno-0-2272">2272</a></span>
<span class="normal"><a href="#__codelineno-0-2273">2273</a></span>
<span class="normal"><a href="#__codelineno-0-2274">2274</a></span>
<span class="normal"><a href="#__codelineno-0-2275">2275</a></span>
<span class="normal"><a href="#__codelineno-0-2276">2276</a></span>
<span class="normal"><a href="#__codelineno-0-2277">2277</a></span>
<span class="normal"><a href="#__codelineno-0-2278">2278</a></span>
<span class="normal"><a href="#__codelineno-0-2279">2279</a></span>
<span class="normal"><a href="#__codelineno-0-2280">2280</a></span>
<span class="normal"><a href="#__codelineno-0-2281">2281</a></span>
<span class="normal"><a href="#__codelineno-0-2282">2282</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2260" name="__codelineno-0-2260"></a><span class="k">def</span><span class="w"> </span><span class="nf">compute_statistics_plan</span><span class="p">(</span>
<a id="__codelineno-0-2261" name="__codelineno-0-2261"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span>
<a id="__codelineno-0-2262" name="__codelineno-0-2262"></a> <span class="n">table_properties</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<a id="__codelineno-0-2263" name="__codelineno-0-2263"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]:</span>
<a id="__codelineno-0-2264" name="__codelineno-0-2264"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-2265" name="__codelineno-0-2265"></a><span class="sd"> Compute the statistics plan for all columns.</span>
<a id="__codelineno-0-2266" name="__codelineno-0-2266"></a>
<a id="__codelineno-0-2267" name="__codelineno-0-2267"></a><span class="sd"> The resulting list is assumed to have the same length and same order as the columns in the pyarrow table.</span>
<a id="__codelineno-0-2268" name="__codelineno-0-2268"></a><span class="sd"> This allows the list to map from the column index to the Iceberg column ID.</span>
<a id="__codelineno-0-2269" name="__codelineno-0-2269"></a><span class="sd"> For each element, the desired metrics collection that was provided by the user in the configuration</span>
<a id="__codelineno-0-2270" name="__codelineno-0-2270"></a><span class="sd"> is computed and then adjusted according to the data type of the column. For nested columns the minimum</span>
<a id="__codelineno-0-2271" name="__codelineno-0-2271"></a><span class="sd"> and maximum values are not computed. And truncation is only applied to text of binary strings.</span>
<a id="__codelineno-0-2272" name="__codelineno-0-2272"></a>
<a id="__codelineno-0-2273" name="__codelineno-0-2273"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-2274" name="__codelineno-0-2274"></a><span class="sd"> table_properties (from pyiceberg.table.metadata.TableMetadata): The Iceberg table metadata properties.</span>
<a id="__codelineno-0-2275" name="__codelineno-0-2275"></a><span class="sd"> They are required to compute the mapping of column position to iceberg schema type id. It&#39;s also</span>
<a id="__codelineno-0-2276" name="__codelineno-0-2276"></a><span class="sd"> used to set the mode for column metrics collection</span>
<a id="__codelineno-0-2277" name="__codelineno-0-2277"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2278" name="__codelineno-0-2278"></a> <span class="n">stats_cols</span> <span class="o">=</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">PyArrowStatisticsCollector</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">))</span>
<a id="__codelineno-0-2279" name="__codelineno-0-2279"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2280" name="__codelineno-0-2280"></a> <span class="k">for</span> <span class="n">stats_col</span> <span class="ow">in</span> <span class="n">stats_cols</span><span class="p">:</span>
<a id="__codelineno-0-2281" name="__codelineno-0-2281"></a> <span class="n">result</span><span class="p">[</span><span class="n">stats_col</span><span class="o">.</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">stats_col</span>
<a id="__codelineno-0-2282" name="__codelineno-0-2282"></a> <span class="k">return</span> <span class="n">result</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="doc doc-heading">
<code class="highlight language-python"><span class="n">data_file_statistics_from_parquet_metadata</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="p">,</span> <span class="n">stats_columns</span><span class="p">,</span> <span class="n">parquet_column_mapping</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Compute and return DataFileStatistics that includes the following.</p>
<ul>
<li>record_count</li>
<li>column_sizes</li>
<li>value_counts</li>
<li>null_value_counts</li>
<li>nan_value_counts</li>
<li>column_aggregates</li>
<li>split_offsets</li>
</ul>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>parquet_metadata</code>
</td>
<td>
<code><span title="pyarrow.parquet.FileMetaData">FileMetaData</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A pyarrow metadata object.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>stats_columns</code>
</td>
<td>
<code><span title="typing.Dict">Dict</span>[<span title="int">int</span>, <span title="pyiceberg.io.pyarrow.StatisticsCollector">StatisticsCollector</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The statistics gathering plan. It is required to
set the mode for column metrics collection</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>parquet_column_mapping</code>
</td>
<td>
<code><span title="typing.Dict">Dict</span>[<span title="str">str</span>, <span title="int">int</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The mapping of the parquet file name to the field ID</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2428">2428</a></span>
<span class="normal"><a href="#__codelineno-0-2429">2429</a></span>
<span class="normal"><a href="#__codelineno-0-2430">2430</a></span>
<span class="normal"><a href="#__codelineno-0-2431">2431</a></span>
<span class="normal"><a href="#__codelineno-0-2432">2432</a></span>
<span class="normal"><a href="#__codelineno-0-2433">2433</a></span>
<span class="normal"><a href="#__codelineno-0-2434">2434</a></span>
<span class="normal"><a href="#__codelineno-0-2435">2435</a></span>
<span class="normal"><a href="#__codelineno-0-2436">2436</a></span>
<span class="normal"><a href="#__codelineno-0-2437">2437</a></span>
<span class="normal"><a href="#__codelineno-0-2438">2438</a></span>
<span class="normal"><a href="#__codelineno-0-2439">2439</a></span>
<span class="normal"><a href="#__codelineno-0-2440">2440</a></span>
<span class="normal"><a href="#__codelineno-0-2441">2441</a></span>
<span class="normal"><a href="#__codelineno-0-2442">2442</a></span>
<span class="normal"><a href="#__codelineno-0-2443">2443</a></span>
<span class="normal"><a href="#__codelineno-0-2444">2444</a></span>
<span class="normal"><a href="#__codelineno-0-2445">2445</a></span>
<span class="normal"><a href="#__codelineno-0-2446">2446</a></span>
<span class="normal"><a href="#__codelineno-0-2447">2447</a></span>
<span class="normal"><a href="#__codelineno-0-2448">2448</a></span>
<span class="normal"><a href="#__codelineno-0-2449">2449</a></span>
<span class="normal"><a href="#__codelineno-0-2450">2450</a></span>
<span class="normal"><a href="#__codelineno-0-2451">2451</a></span>
<span class="normal"><a href="#__codelineno-0-2452">2452</a></span>
<span class="normal"><a href="#__codelineno-0-2453">2453</a></span>
<span class="normal"><a href="#__codelineno-0-2454">2454</a></span>
<span class="normal"><a href="#__codelineno-0-2455">2455</a></span>
<span class="normal"><a href="#__codelineno-0-2456">2456</a></span>
<span class="normal"><a href="#__codelineno-0-2457">2457</a></span>
<span class="normal"><a href="#__codelineno-0-2458">2458</a></span>
<span class="normal"><a href="#__codelineno-0-2459">2459</a></span>
<span class="normal"><a href="#__codelineno-0-2460">2460</a></span>
<span class="normal"><a href="#__codelineno-0-2461">2461</a></span>
<span class="normal"><a href="#__codelineno-0-2462">2462</a></span>
<span class="normal"><a href="#__codelineno-0-2463">2463</a></span>
<span class="normal"><a href="#__codelineno-0-2464">2464</a></span>
<span class="normal"><a href="#__codelineno-0-2465">2465</a></span>
<span class="normal"><a href="#__codelineno-0-2466">2466</a></span>
<span class="normal"><a href="#__codelineno-0-2467">2467</a></span>
<span class="normal"><a href="#__codelineno-0-2468">2468</a></span>
<span class="normal"><a href="#__codelineno-0-2469">2469</a></span>
<span class="normal"><a href="#__codelineno-0-2470">2470</a></span>
<span class="normal"><a href="#__codelineno-0-2471">2471</a></span>
<span class="normal"><a href="#__codelineno-0-2472">2472</a></span>
<span class="normal"><a href="#__codelineno-0-2473">2473</a></span>
<span class="normal"><a href="#__codelineno-0-2474">2474</a></span>
<span class="normal"><a href="#__codelineno-0-2475">2475</a></span>
<span class="normal"><a href="#__codelineno-0-2476">2476</a></span>
<span class="normal"><a href="#__codelineno-0-2477">2477</a></span>
<span class="normal"><a href="#__codelineno-0-2478">2478</a></span>
<span class="normal"><a href="#__codelineno-0-2479">2479</a></span>
<span class="normal"><a href="#__codelineno-0-2480">2480</a></span>
<span class="normal"><a href="#__codelineno-0-2481">2481</a></span>
<span class="normal"><a href="#__codelineno-0-2482">2482</a></span>
<span class="normal"><a href="#__codelineno-0-2483">2483</a></span>
<span class="normal"><a href="#__codelineno-0-2484">2484</a></span>
<span class="normal"><a href="#__codelineno-0-2485">2485</a></span>
<span class="normal"><a href="#__codelineno-0-2486">2486</a></span>
<span class="normal"><a href="#__codelineno-0-2487">2487</a></span>
<span class="normal"><a href="#__codelineno-0-2488">2488</a></span>
<span class="normal"><a href="#__codelineno-0-2489">2489</a></span>
<span class="normal"><a href="#__codelineno-0-2490">2490</a></span>
<span class="normal"><a href="#__codelineno-0-2491">2491</a></span>
<span class="normal"><a href="#__codelineno-0-2492">2492</a></span>
<span class="normal"><a href="#__codelineno-0-2493">2493</a></span>
<span class="normal"><a href="#__codelineno-0-2494">2494</a></span>
<span class="normal"><a href="#__codelineno-0-2495">2495</a></span>
<span class="normal"><a href="#__codelineno-0-2496">2496</a></span>
<span class="normal"><a href="#__codelineno-0-2497">2497</a></span>
<span class="normal"><a href="#__codelineno-0-2498">2498</a></span>
<span class="normal"><a href="#__codelineno-0-2499">2499</a></span>
<span class="normal"><a href="#__codelineno-0-2500">2500</a></span>
<span class="normal"><a href="#__codelineno-0-2501">2501</a></span>
<span class="normal"><a href="#__codelineno-0-2502">2502</a></span>
<span class="normal"><a href="#__codelineno-0-2503">2503</a></span>
<span class="normal"><a href="#__codelineno-0-2504">2504</a></span>
<span class="normal"><a href="#__codelineno-0-2505">2505</a></span>
<span class="normal"><a href="#__codelineno-0-2506">2506</a></span>
<span class="normal"><a href="#__codelineno-0-2507">2507</a></span>
<span class="normal"><a href="#__codelineno-0-2508">2508</a></span>
<span class="normal"><a href="#__codelineno-0-2509">2509</a></span>
<span class="normal"><a href="#__codelineno-0-2510">2510</a></span>
<span class="normal"><a href="#__codelineno-0-2511">2511</a></span>
<span class="normal"><a href="#__codelineno-0-2512">2512</a></span>
<span class="normal"><a href="#__codelineno-0-2513">2513</a></span>
<span class="normal"><a href="#__codelineno-0-2514">2514</a></span>
<span class="normal"><a href="#__codelineno-0-2515">2515</a></span>
<span class="normal"><a href="#__codelineno-0-2516">2516</a></span>
<span class="normal"><a href="#__codelineno-0-2517">2517</a></span>
<span class="normal"><a href="#__codelineno-0-2518">2518</a></span>
<span class="normal"><a href="#__codelineno-0-2519">2519</a></span>
<span class="normal"><a href="#__codelineno-0-2520">2520</a></span>
<span class="normal"><a href="#__codelineno-0-2521">2521</a></span>
<span class="normal"><a href="#__codelineno-0-2522">2522</a></span>
<span class="normal"><a href="#__codelineno-0-2523">2523</a></span>
<span class="normal"><a href="#__codelineno-0-2524">2524</a></span>
<span class="normal"><a href="#__codelineno-0-2525">2525</a></span>
<span class="normal"><a href="#__codelineno-0-2526">2526</a></span>
<span class="normal"><a href="#__codelineno-0-2527">2527</a></span>
<span class="normal"><a href="#__codelineno-0-2528">2528</a></span>
<span class="normal"><a href="#__codelineno-0-2529">2529</a></span>
<span class="normal"><a href="#__codelineno-0-2530">2530</a></span>
<span class="normal"><a href="#__codelineno-0-2531">2531</a></span>
<span class="normal"><a href="#__codelineno-0-2532">2532</a></span>
<span class="normal"><a href="#__codelineno-0-2533">2533</a></span>
<span class="normal"><a href="#__codelineno-0-2534">2534</a></span>
<span class="normal"><a href="#__codelineno-0-2535">2535</a></span>
<span class="normal"><a href="#__codelineno-0-2536">2536</a></span>
<span class="normal"><a href="#__codelineno-0-2537">2537</a></span>
<span class="normal"><a href="#__codelineno-0-2538">2538</a></span>
<span class="normal"><a href="#__codelineno-0-2539">2539</a></span>
<span class="normal"><a href="#__codelineno-0-2540">2540</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2428" name="__codelineno-0-2428"></a><span class="k">def</span><span class="w"> </span><span class="nf">data_file_statistics_from_parquet_metadata</span><span class="p">(</span>
<a id="__codelineno-0-2429" name="__codelineno-0-2429"></a> <span class="n">parquet_metadata</span><span class="p">:</span> <span class="n">pq</span><span class="o">.</span><span class="n">FileMetaData</span><span class="p">,</span>
<a id="__codelineno-0-2430" name="__codelineno-0-2430"></a> <span class="n">stats_columns</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">],</span>
<a id="__codelineno-0-2431" name="__codelineno-0-2431"></a> <span class="n">parquet_column_mapping</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span>
<a id="__codelineno-0-2432" name="__codelineno-0-2432"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFileStatistics</span><span class="p">:</span>
<a id="__codelineno-0-2433" name="__codelineno-0-2433"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-2434" name="__codelineno-0-2434"></a><span class="sd"> Compute and return DataFileStatistics that includes the following.</span>
<a id="__codelineno-0-2435" name="__codelineno-0-2435"></a>
<a id="__codelineno-0-2436" name="__codelineno-0-2436"></a><span class="sd"> - record_count</span>
<a id="__codelineno-0-2437" name="__codelineno-0-2437"></a><span class="sd"> - column_sizes</span>
<a id="__codelineno-0-2438" name="__codelineno-0-2438"></a><span class="sd"> - value_counts</span>
<a id="__codelineno-0-2439" name="__codelineno-0-2439"></a><span class="sd"> - null_value_counts</span>
<a id="__codelineno-0-2440" name="__codelineno-0-2440"></a><span class="sd"> - nan_value_counts</span>
<a id="__codelineno-0-2441" name="__codelineno-0-2441"></a><span class="sd"> - column_aggregates</span>
<a id="__codelineno-0-2442" name="__codelineno-0-2442"></a><span class="sd"> - split_offsets</span>
<a id="__codelineno-0-2443" name="__codelineno-0-2443"></a>
<a id="__codelineno-0-2444" name="__codelineno-0-2444"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-2445" name="__codelineno-0-2445"></a><span class="sd"> parquet_metadata (pyarrow.parquet.FileMetaData): A pyarrow metadata object.</span>
<a id="__codelineno-0-2446" name="__codelineno-0-2446"></a><span class="sd"> stats_columns (Dict[int, StatisticsCollector]): The statistics gathering plan. It is required to</span>
<a id="__codelineno-0-2447" name="__codelineno-0-2447"></a><span class="sd"> set the mode for column metrics collection</span>
<a id="__codelineno-0-2448" name="__codelineno-0-2448"></a><span class="sd"> parquet_column_mapping (Dict[str, int]): The mapping of the parquet file name to the field ID</span>
<a id="__codelineno-0-2449" name="__codelineno-0-2449"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2450" name="__codelineno-0-2450"></a> <span class="n">column_sizes</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2451" name="__codelineno-0-2451"></a> <span class="n">value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2452" name="__codelineno-0-2452"></a> <span class="n">split_offsets</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<a id="__codelineno-0-2453" name="__codelineno-0-2453"></a>
<a id="__codelineno-0-2454" name="__codelineno-0-2454"></a> <span class="n">null_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2455" name="__codelineno-0-2455"></a> <span class="n">nan_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2456" name="__codelineno-0-2456"></a>
<a id="__codelineno-0-2457" name="__codelineno-0-2457"></a> <span class="n">col_aggs</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2458" name="__codelineno-0-2458"></a>
<a id="__codelineno-0-2459" name="__codelineno-0-2459"></a> <span class="n">invalidate_col</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<a id="__codelineno-0-2460" name="__codelineno-0-2460"></a> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_row_groups</span><span class="p">):</span>
<a id="__codelineno-0-2461" name="__codelineno-0-2461"></a> <span class="c1"># References:</span>
<a id="__codelineno-0-2462" name="__codelineno-0-2462"></a> <span class="c1"># https://github.com/apache/iceberg/blob/fc381a81a1fdb8f51a0637ca27cd30673bd7aad3/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java#L232</span>
<a id="__codelineno-0-2463" name="__codelineno-0-2463"></a> <span class="c1"># https://github.com/apache/parquet-mr/blob/ac29db4611f86a07cc6877b416aa4b183e09b353/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java#L184</span>
<a id="__codelineno-0-2464" name="__codelineno-0-2464"></a>
<a id="__codelineno-0-2465" name="__codelineno-0-2465"></a> <span class="n">row_group</span> <span class="o">=</span> <span class="n">parquet_metadata</span><span class="o">.</span><span class="n">row_group</span><span class="p">(</span><span class="n">r</span><span class="p">)</span>
<a id="__codelineno-0-2466" name="__codelineno-0-2466"></a>
<a id="__codelineno-0-2467" name="__codelineno-0-2467"></a> <span class="n">data_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">data_page_offset</span>
<a id="__codelineno-0-2468" name="__codelineno-0-2468"></a> <span class="n">dictionary_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">dictionary_page_offset</span>
<a id="__codelineno-0-2469" name="__codelineno-0-2469"></a>
<a id="__codelineno-0-2470" name="__codelineno-0-2470"></a> <span class="k">if</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">has_dictionary_page</span> <span class="ow">and</span> <span class="n">dictionary_offset</span> <span class="o">&lt;</span> <span class="n">data_offset</span><span class="p">:</span>
<a id="__codelineno-0-2471" name="__codelineno-0-2471"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dictionary_offset</span><span class="p">)</span>
<a id="__codelineno-0-2472" name="__codelineno-0-2472"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-2473" name="__codelineno-0-2473"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">data_offset</span><span class="p">)</span>
<a id="__codelineno-0-2474" name="__codelineno-0-2474"></a>
<a id="__codelineno-0-2475" name="__codelineno-0-2475"></a> <span class="k">for</span> <span class="n">pos</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_columns</span><span class="p">):</span>
<a id="__codelineno-0-2476" name="__codelineno-0-2476"></a> <span class="n">column</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="n">pos</span><span class="p">)</span>
<a id="__codelineno-0-2477" name="__codelineno-0-2477"></a> <span class="n">field_id</span> <span class="o">=</span> <span class="n">parquet_column_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">path_in_schema</span><span class="p">]</span>
<a id="__codelineno-0-2478" name="__codelineno-0-2478"></a>
<a id="__codelineno-0-2479" name="__codelineno-0-2479"></a> <span class="n">stats_col</span> <span class="o">=</span> <span class="n">stats_columns</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span>
<a id="__codelineno-0-2480" name="__codelineno-0-2480"></a>
<a id="__codelineno-0-2481" name="__codelineno-0-2481"></a> <span class="n">column_sizes</span><span class="o">.</span><span class="n">setdefault</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<a id="__codelineno-0-2482" name="__codelineno-0-2482"></a> <span class="n">column_sizes</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">+=</span> <span class="n">column</span><span class="o">.</span><span class="n">total_compressed_size</span>
<a id="__codelineno-0-2483" name="__codelineno-0-2483"></a>
<a id="__codelineno-0-2484" name="__codelineno-0-2484"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">NONE</span><span class="p">):</span>
<a id="__codelineno-0-2485" name="__codelineno-0-2485"></a> <span class="k">continue</span>
<a id="__codelineno-0-2486" name="__codelineno-0-2486"></a>
<a id="__codelineno-0-2487" name="__codelineno-0-2487"></a> <span class="n">value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">column</span><span class="o">.</span><span class="n">num_values</span>
<a id="__codelineno-0-2488" name="__codelineno-0-2488"></a>
<a id="__codelineno-0-2489" name="__codelineno-0-2489"></a> <span class="k">if</span> <span class="n">column</span><span class="o">.</span><span class="n">is_stats_set</span><span class="p">:</span>
<a id="__codelineno-0-2490" name="__codelineno-0-2490"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-2491" name="__codelineno-0-2491"></a> <span class="n">statistics</span> <span class="o">=</span> <span class="n">column</span><span class="o">.</span><span class="n">statistics</span>
<a id="__codelineno-0-2492" name="__codelineno-0-2492"></a>
<a id="__codelineno-0-2493" name="__codelineno-0-2493"></a> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">has_null_count</span><span class="p">:</span>
<a id="__codelineno-0-2494" name="__codelineno-0-2494"></a> <span class="n">null_value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">statistics</span><span class="o">.</span><span class="n">null_count</span>
<a id="__codelineno-0-2495" name="__codelineno-0-2495"></a>
<a id="__codelineno-0-2496" name="__codelineno-0-2496"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">COUNTS</span><span class="p">):</span>
<a id="__codelineno-0-2497" name="__codelineno-0-2497"></a> <span class="k">continue</span>
<a id="__codelineno-0-2498" name="__codelineno-0-2498"></a>
<a id="__codelineno-0-2499" name="__codelineno-0-2499"></a> <span class="k">if</span> <span class="n">field_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">col_aggs</span><span class="p">:</span>
<a id="__codelineno-0-2500" name="__codelineno-0-2500"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-2501" name="__codelineno-0-2501"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">StatsAggregator</span><span class="p">(</span>
<a id="__codelineno-0-2502" name="__codelineno-0-2502"></a> <span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="p">,</span> <span class="n">statistics</span><span class="o">.</span><span class="n">physical_type</span><span class="p">,</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span><span class="o">.</span><span class="n">length</span>
<a id="__codelineno-0-2503" name="__codelineno-0-2503"></a> <span class="p">)</span>
<a id="__codelineno-0-2504" name="__codelineno-0-2504"></a> <span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-2505" name="__codelineno-0-2505"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2"> for column &#39;</span><span class="si">{</span><span class="n">stats_col</span><span class="o">.</span><span class="n">column_name</span><span class="si">}</span><span class="s2">&#39;&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-2506" name="__codelineno-0-2506"></a>
<a id="__codelineno-0-2507" name="__codelineno-0-2507"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="p">,</span> <span class="n">DecimalType</span><span class="p">)</span> <span class="ow">and</span> <span class="n">statistics</span><span class="o">.</span><span class="n">physical_type</span> <span class="o">!=</span> <span class="s2">&quot;FIXED_LEN_BYTE_ARRAY&quot;</span><span class="p">:</span>
<a id="__codelineno-0-2508" name="__codelineno-0-2508"></a> <span class="n">scale</span> <span class="o">=</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="o">.</span><span class="n">scale</span>
<a id="__codelineno-0-2509" name="__codelineno-0-2509"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_min</span><span class="p">(</span>
<a id="__codelineno-0-2510" name="__codelineno-0-2510"></a> <span class="n">unscaled_to_decimal</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">min_raw</span><span class="p">,</span> <span class="n">scale</span><span class="p">)</span>
<a id="__codelineno-0-2511" name="__codelineno-0-2511"></a> <span class="p">)</span> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">min_raw</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span>
<a id="__codelineno-0-2512" name="__codelineno-0-2512"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_max</span><span class="p">(</span>
<a id="__codelineno-0-2513" name="__codelineno-0-2513"></a> <span class="n">unscaled_to_decimal</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">max_raw</span><span class="p">,</span> <span class="n">scale</span><span class="p">)</span>
<a id="__codelineno-0-2514" name="__codelineno-0-2514"></a> <span class="p">)</span> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">max_raw</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span>
<a id="__codelineno-0-2515" name="__codelineno-0-2515"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-2516" name="__codelineno-0-2516"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_min</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">min</span><span class="p">)</span>
<a id="__codelineno-0-2517" name="__codelineno-0-2517"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_max</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">max</span><span class="p">)</span>
<a id="__codelineno-0-2518" name="__codelineno-0-2518"></a>
<a id="__codelineno-0-2519" name="__codelineno-0-2519"></a> <span class="k">except</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">lib</span><span class="o">.</span><span class="n">ArrowNotImplementedError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-2520" name="__codelineno-0-2520"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span>
<a id="__codelineno-0-2521" name="__codelineno-0-2521"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
<a id="__codelineno-0-2522" name="__codelineno-0-2522"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-2523" name="__codelineno-0-2523"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span>
<a id="__codelineno-0-2524" name="__codelineno-0-2524"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;PyArrow statistics missing for column </span><span class="si">%d</span><span class="s2"> when writing file&quot;</span><span class="p">,</span> <span class="n">pos</span><span class="p">)</span>
<a id="__codelineno-0-2525" name="__codelineno-0-2525"></a>
<a id="__codelineno-0-2526" name="__codelineno-0-2526"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
<a id="__codelineno-0-2527" name="__codelineno-0-2527"></a>
<a id="__codelineno-0-2528" name="__codelineno-0-2528"></a> <span class="k">for</span> <span class="n">field_id</span> <span class="ow">in</span> <span class="n">invalidate_col</span><span class="p">:</span>
<a id="__codelineno-0-2529" name="__codelineno-0-2529"></a> <span class="n">col_aggs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<a id="__codelineno-0-2530" name="__codelineno-0-2530"></a> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<a id="__codelineno-0-2531" name="__codelineno-0-2531"></a>
<a id="__codelineno-0-2532" name="__codelineno-0-2532"></a> <span class="k">return</span> <span class="n">DataFileStatistics</span><span class="p">(</span>
<a id="__codelineno-0-2533" name="__codelineno-0-2533"></a> <span class="n">record_count</span><span class="o">=</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_rows</span><span class="p">,</span>
<a id="__codelineno-0-2534" name="__codelineno-0-2534"></a> <span class="n">column_sizes</span><span class="o">=</span><span class="n">column_sizes</span><span class="p">,</span>
<a id="__codelineno-0-2535" name="__codelineno-0-2535"></a> <span class="n">value_counts</span><span class="o">=</span><span class="n">value_counts</span><span class="p">,</span>
<a id="__codelineno-0-2536" name="__codelineno-0-2536"></a> <span class="n">null_value_counts</span><span class="o">=</span><span class="n">null_value_counts</span><span class="p">,</span>
<a id="__codelineno-0-2537" name="__codelineno-0-2537"></a> <span class="n">nan_value_counts</span><span class="o">=</span><span class="n">nan_value_counts</span><span class="p">,</span>
<a id="__codelineno-0-2538" name="__codelineno-0-2538"></a> <span class="n">column_aggregates</span><span class="o">=</span><span class="n">col_aggs</span><span class="p">,</span>
<a id="__codelineno-0-2539" name="__codelineno-0-2539"></a> <span class="n">split_offsets</span><span class="o">=</span><span class="n">split_offsets</span><span class="p">,</span>
<a id="__codelineno-0-2540" name="__codelineno-0-2540"></a> <span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="doc doc-heading">
<code class="highlight language-python"><span class="n">parquet_path_to_id_mapping</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Compute the mapping of parquet column path to Iceberg ID.</p>
<p>For each column, the parquet file metadata has a path_in_schema attribute that follows
a specific naming scheme for nested columns. This function computes a mapping of
the full paths to the corresponding Iceberg IDs.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>schema</code>
</td>
<td>
<code><a class="autorefs autorefs-internal" title="Schema (pyiceberg.schema.Schema)" href="../../schema/#pyiceberg.schema.Schema">Schema</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The current table schema.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2338">2338</a></span>
<span class="normal"><a href="#__codelineno-0-2339">2339</a></span>
<span class="normal"><a href="#__codelineno-0-2340">2340</a></span>
<span class="normal"><a href="#__codelineno-0-2341">2341</a></span>
<span class="normal"><a href="#__codelineno-0-2342">2342</a></span>
<span class="normal"><a href="#__codelineno-0-2343">2343</a></span>
<span class="normal"><a href="#__codelineno-0-2344">2344</a></span>
<span class="normal"><a href="#__codelineno-0-2345">2345</a></span>
<span class="normal"><a href="#__codelineno-0-2346">2346</a></span>
<span class="normal"><a href="#__codelineno-0-2347">2347</a></span>
<span class="normal"><a href="#__codelineno-0-2348">2348</a></span>
<span class="normal"><a href="#__codelineno-0-2349">2349</a></span>
<span class="normal"><a href="#__codelineno-0-2350">2350</a></span>
<span class="normal"><a href="#__codelineno-0-2351">2351</a></span>
<span class="normal"><a href="#__codelineno-0-2352">2352</a></span>
<span class="normal"><a href="#__codelineno-0-2353">2353</a></span>
<span class="normal"><a href="#__codelineno-0-2354">2354</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2338" name="__codelineno-0-2338"></a><span class="k">def</span><span class="w"> </span><span class="nf">parquet_path_to_id_mapping</span><span class="p">(</span>
<a id="__codelineno-0-2339" name="__codelineno-0-2339"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span>
<a id="__codelineno-0-2340" name="__codelineno-0-2340"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span>
<a id="__codelineno-0-2341" name="__codelineno-0-2341"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-2342" name="__codelineno-0-2342"></a><span class="sd"> Compute the mapping of parquet column path to Iceberg ID.</span>
<a id="__codelineno-0-2343" name="__codelineno-0-2343"></a>
<a id="__codelineno-0-2344" name="__codelineno-0-2344"></a><span class="sd"> For each column, the parquet file metadata has a path_in_schema attribute that follows</span>
<a id="__codelineno-0-2345" name="__codelineno-0-2345"></a><span class="sd"> a specific naming scheme for nested columns. This function computes a mapping of</span>
<a id="__codelineno-0-2346" name="__codelineno-0-2346"></a><span class="sd"> the full paths to the corresponding Iceberg IDs.</span>
<a id="__codelineno-0-2347" name="__codelineno-0-2347"></a>
<a id="__codelineno-0-2348" name="__codelineno-0-2348"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-2349" name="__codelineno-0-2349"></a><span class="sd"> schema (pyiceberg.schema.Schema): The current table schema.</span>
<a id="__codelineno-0-2350" name="__codelineno-0-2350"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2351" name="__codelineno-0-2351"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2352" name="__codelineno-0-2352"></a> <span class="k">for</span> <span class="n">pair</span> <span class="ow">in</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">ID2ParquetPathVisitor</span><span class="p">()):</span>
<a id="__codelineno-0-2353" name="__codelineno-0-2353"></a> <span class="n">result</span><span class="p">[</span><span class="n">pair</span><span class="o">.</span><span class="n">parquet_path</span><span class="p">]</span> <span class="o">=</span> <span class="n">pair</span><span class="o">.</span><span class="n">field_id</span>
<a id="__codelineno-0-2354" name="__codelineno-0-2354"></a> <span class="k">return</span> <span class="n">result</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.visit_pyarrow" class="doc doc-heading">
<code class="highlight language-python"><span class="n">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">visitor</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Apply a pyarrow schema visitor to any point within a schema.</p>
<p>The function traverses the schema in post-order fashion.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>obj</code>
</td>
<td>
<code><span title="typing.Union">Union</span>[<span title="pyarrow.DataType">DataType</span>, <span title="pyarrow.Schema">Schema</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>An instance of a Schema or an IcebergType.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>visitor</code>
</td>
<td>
<code><a class="autorefs autorefs-internal" title="PyArrowSchemaVisitor (pyiceberg.io.pyarrow.PyArrowSchemaVisitor)" href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor">PyArrowSchemaVisitor</a>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>An instance of an implementation of the generic PyarrowSchemaVisitor base class.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="NotImplementedError">NotImplementedError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If attempting to visit an unrecognized object type.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1122">1122</a></span>
<span class="normal"><a href="#__codelineno-0-1123">1123</a></span>
<span class="normal"><a href="#__codelineno-0-1124">1124</a></span>
<span class="normal"><a href="#__codelineno-0-1125">1125</a></span>
<span class="normal"><a href="#__codelineno-0-1126">1126</a></span>
<span class="normal"><a href="#__codelineno-0-1127">1127</a></span>
<span class="normal"><a href="#__codelineno-0-1128">1128</a></span>
<span class="normal"><a href="#__codelineno-0-1129">1129</a></span>
<span class="normal"><a href="#__codelineno-0-1130">1130</a></span>
<span class="normal"><a href="#__codelineno-0-1131">1131</a></span>
<span class="normal"><a href="#__codelineno-0-1132">1132</a></span>
<span class="normal"><a href="#__codelineno-0-1133">1133</a></span>
<span class="normal"><a href="#__codelineno-0-1134">1134</a></span>
<span class="normal"><a href="#__codelineno-0-1135">1135</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1122" name="__codelineno-0-1122"></a><span class="nd">@singledispatch</span>
<a id="__codelineno-0-1123" name="__codelineno-0-1123"></a><span class="k">def</span><span class="w"> </span><span class="nf">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">],</span> <span class="n">visitor</span><span class="p">:</span> <span class="n">PyArrowSchemaVisitor</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1124" name="__codelineno-0-1124"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Apply a pyarrow schema visitor to any point within a schema.</span>
<a id="__codelineno-0-1125" name="__codelineno-0-1125"></a>
<a id="__codelineno-0-1126" name="__codelineno-0-1126"></a><span class="sd"> The function traverses the schema in post-order fashion.</span>
<a id="__codelineno-0-1127" name="__codelineno-0-1127"></a>
<a id="__codelineno-0-1128" name="__codelineno-0-1128"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1129" name="__codelineno-0-1129"></a><span class="sd"> obj (Union[pa.DataType, pa.Schema]): An instance of a Schema or an IcebergType.</span>
<a id="__codelineno-0-1130" name="__codelineno-0-1130"></a><span class="sd"> visitor (PyArrowSchemaVisitor[T]): An instance of an implementation of the generic PyarrowSchemaVisitor base class.</span>
<a id="__codelineno-0-1131" name="__codelineno-0-1131"></a>
<a id="__codelineno-0-1132" name="__codelineno-0-1132"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1133" name="__codelineno-0-1133"></a><span class="sd"> NotImplementedError: If attempting to visit an unrecognized object type.</span>
<a id="__codelineno-0-1134" name="__codelineno-0-1134"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1135" name="__codelineno-0-1135"></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot visit non-type: </span><span class="si">{</span><span class="n">obj</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../../..", "features": ["navigation.top", "navigation.tracking", "navigation.tabs", "navigation.tabs.sticky", "content.code.copy"], "search": "../../../../assets/javascripts/workers/search.973d3a69.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../../../assets/javascripts/bundle.f55a23d4.min.js"></script>
</body>
</html>