blob: 1c09775ea2496bea684fbac95802e855785178e9 [file] [log] [blame]
<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://py.iceberg.apache.org/reference/pyiceberg/io/pyarrow/">
<link rel="prev" href="../fsspec/">
<link rel="next" href="../../manifest/">
<link rel="icon" href="../../../../assets/images/iceberg-logo-icon.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.5">
<title>pyarrow - PyIceberg</title>
<link rel="stylesheet" href="../../../../assets/stylesheets/main.8608ea7d.min.css">
<link rel="stylesheet" href="../../../../assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Lato:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Lato";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../../../assets/_mkdocstrings.css">
<script>__md_scope=new URL("../../../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#pyiceberg.io.pyarrow" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../../../.." title="PyIceberg" class="md-header__button md-logo" aria-label="PyIceberg" data-md-component="logo">
<img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
PyIceberg
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
pyarrow
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
apache/iceberg-python
</div>
</a>
</div>
</nav>
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../../../.." class="md-tabs__link">
Getting started
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../configuration/" class="md-tabs__link">
Configuration
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../cli/" class="md-tabs__link">
CLI
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../api/" class="md-tabs__link">
API
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../contributing/" class="md-tabs__link">
Contributing
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../community/" class="md-tabs__link">
Community
</a>
</li>
<li class="md-tabs__item">
<a href="../../../../verify-release/" class="md-tabs__link">
Releases
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../../" class="md-tabs__link">
Code Reference
</a>
</li>
</ul>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../../.." title="PyIceberg" class="md-nav__button md-logo" aria-label="PyIceberg" data-md-component="logo">
<img src="../../../../assets/images/iceberg-logo-icon.png" alt="logo">
</a>
PyIceberg
</label>
<div class="md-nav__source">
<a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
apache/iceberg-python
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../.." class="md-nav__link">
<span class="md-ellipsis">
Getting started
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../configuration/" class="md-nav__link">
<span class="md-ellipsis">
Configuration
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../cli/" class="md-nav__link">
<span class="md-ellipsis">
CLI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../api/" class="md-nav__link">
<span class="md-ellipsis">
API
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../contributing/" class="md-nav__link">
<span class="md-ellipsis">
Contributing
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../community/" class="md-nav__link">
<span class="md-ellipsis">
Community
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
<span class="md-ellipsis">
Releases
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_7">
<span class="md-nav__icon md-icon"></span>
Releases
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../../verify-release/" class="md-nav__link">
<span class="md-ellipsis">
Verify a release
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../how-to-release/" class="md-nav__link">
<span class="md-ellipsis">
How to release
</span>
</a>
</li>
<li class="md-nav__item">
<a href="https://github.com/apache/iceberg-python/releases" class="md-nav__link">
<span class="md-ellipsis">
Release Notes
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../../nightly-build/" class="md-nav__link">
<span class="md-ellipsis">
Nightly Build
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" checked>
<label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="">
<span class="md-ellipsis">
Code Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_8">
<span class="md-nav__icon md-icon"></span>
Code Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1" checked>
<div class="md-nav__link md-nav__container">
<a href="../../" class="md-nav__link ">
<span class="md-ellipsis">
pyiceberg
</span>
</a>
<label class="md-nav__link " for="__nav_8_1" id="__nav_8_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_8_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_8_1">
<span class="md-nav__icon md-icon"></span>
pyiceberg
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1" >
<div class="md-nav__link md-nav__container">
<a href="../../avro/" class="md-nav__link ">
<span class="md-ellipsis">
avro
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_1" id="__nav_8_1_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_1">
<span class="md-nav__icon md-icon"></span>
avro
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1_1" >
<div class="md-nav__link md-nav__container">
<a href="../../avro/codecs/" class="md-nav__link ">
<span class="md-ellipsis">
codecs
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_1_1" id="__nav_8_1_1_1_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_1_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_1_1">
<span class="md-nav__icon md-icon"></span>
codecs
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../avro/codecs/bzip2/" class="md-nav__link">
<span class="md-ellipsis">
bzip2
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/codec/" class="md-nav__link">
<span class="md-ellipsis">
codec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/deflate/" class="md-nav__link">
<span class="md-ellipsis">
deflate
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/snappy_codec/" class="md-nav__link">
<span class="md-ellipsis">
snappy_codec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/codecs/zstandard_codec/" class="md-nav__link">
<span class="md-ellipsis">
zstandard_codec
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../avro/decoder/" class="md-nav__link">
<span class="md-ellipsis">
decoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/encoder/" class="md-nav__link">
<span class="md-ellipsis">
encoder
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/file/" class="md-nav__link">
<span class="md-ellipsis">
file
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/reader/" class="md-nav__link">
<span class="md-ellipsis">
reader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/resolver/" class="md-nav__link">
<span class="md-ellipsis">
resolver
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../avro/writer/" class="md-nav__link">
<span class="md-ellipsis">
writer
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2" >
<div class="md-nav__link md-nav__container">
<a href="../../catalog/" class="md-nav__link ">
<span class="md-ellipsis">
catalog
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_2" id="__nav_8_1_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_2">
<span class="md-nav__icon md-icon"></span>
catalog
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../catalog/dynamodb/" class="md-nav__link">
<span class="md-ellipsis">
dynamodb
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/glue/" class="md-nav__link">
<span class="md-ellipsis">
glue
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/hive/" class="md-nav__link">
<span class="md-ellipsis">
hive
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/memory/" class="md-nav__link">
<span class="md-ellipsis">
memory
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/noop/" class="md-nav__link">
<span class="md-ellipsis">
noop
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/rest/" class="md-nav__link">
<span class="md-ellipsis">
rest
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../catalog/sql/" class="md-nav__link">
<span class="md-ellipsis">
sql
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_3" >
<div class="md-nav__link md-nav__container">
<a href="../../cli/" class="md-nav__link ">
<span class="md-ellipsis">
cli
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_3" id="__nav_8_1_3_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_3">
<span class="md-nav__icon md-icon"></span>
cli
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../cli/console/" class="md-nav__link">
<span class="md-ellipsis">
console
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../cli/output/" class="md-nav__link">
<span class="md-ellipsis">
output
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../conversions/" class="md-nav__link">
<span class="md-ellipsis">
conversions
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../exceptions/" class="md-nav__link">
<span class="md-ellipsis">
exceptions
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_6" >
<div class="md-nav__link md-nav__container">
<a href="../../expressions/" class="md-nav__link ">
<span class="md-ellipsis">
expressions
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_6" id="__nav_8_1_6_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_6">
<span class="md-nav__icon md-icon"></span>
expressions
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../expressions/literals/" class="md-nav__link">
<span class="md-ellipsis">
literals
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../expressions/parser/" class="md-nav__link">
<span class="md-ellipsis">
parser
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../expressions/visitors/" class="md-nav__link">
<span class="md-ellipsis">
visitors
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_7" checked>
<div class="md-nav__link md-nav__container">
<a href="../" class="md-nav__link ">
<span class="md-ellipsis">
io
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_7" id="__nav_8_1_7_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_7_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_8_1_7">
<span class="md-nav__icon md-icon"></span>
io
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../fsspec/" class="md-nav__link">
<span class="md-ellipsis">
fsspec
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
pyarrow
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
pyarrow
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow" class="md-nav__link">
<span class="md-ellipsis">
pyarrow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link">
<span class="md-ellipsis">
ArrowScan
</span>
</a>
<nav class="md-nav" aria-label="ArrowScan">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan._limit" class="md-nav__link">
<span class="md-ellipsis">
_limit
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="md-nav__link">
<span class="md-ellipsis">
_projected_field_ids
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="md-nav__link">
<span class="md-ellipsis">
_use_large_types
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link">
<span class="md-ellipsis">
to_record_batches
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link">
<span class="md-ellipsis">
to_table
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFile
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFile">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link">
<span class="md-ellipsis">
__len__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile._file_info" class="md-nav__link">
<span class="md-ellipsis">
_file_info
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link">
<span class="md-ellipsis">
create
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link">
<span class="md-ellipsis">
exists
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link">
<span class="md-ellipsis">
open
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link">
<span class="md-ellipsis">
to_input_file
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFileIO
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFileIO">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link">
<span class="md-ellipsis">
__getstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link">
<span class="md-ellipsis">
__setstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="md-nav__link">
<span class="md-ellipsis">
_initialize_fs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link">
<span class="md-ellipsis">
delete
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link">
<span class="md-ellipsis">
new_input
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link">
<span class="md-ellipsis">
new_output
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link">
<span class="md-ellipsis">
parse_location
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link">
<span class="md-ellipsis">
PyArrowSchemaVisitor
</span>
</a>
<nav class="md-nav" aria-label="PyArrowSchemaVisitor">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link">
<span class="md-ellipsis">
after_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link">
<span class="md-ellipsis">
after_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link">
<span class="md-ellipsis">
after_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link">
<span class="md-ellipsis">
after_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link">
<span class="md-ellipsis">
before_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link">
<span class="md-ellipsis">
before_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link">
<span class="md-ellipsis">
before_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link">
<span class="md-ellipsis">
before_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link">
<span class="md-ellipsis">
field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link">
<span class="md-ellipsis">
list
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link">
<span class="md-ellipsis">
map
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link">
<span class="md-ellipsis">
primitive
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link">
<span class="md-ellipsis">
struct
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link">
<span class="md-ellipsis">
UnsupportedPyArrowTypeException
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._ConvertToIceberg" class="md-nav__link">
<span class="md-ellipsis">
_ConvertToIceberg
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="md-nav__link">
<span class="md-ellipsis">
_ConvertToIcebergWithoutIDs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="md-nav__link">
<span class="md-ellipsis">
_NullNaNUnmentionedTermsCollector
</span>
</a>
<nav class="md-nav" aria-label="_NullNaNUnmentionedTermsCollector">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="md-nav__link">
<span class="md-ellipsis">
_handle_explicit_is_nan_or_not
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="md-nav__link">
<span class="md-ellipsis">
_handle_explicit_is_null_or_not
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="md-nav__link">
<span class="md-ellipsis">
_handle_nan_unmentioned
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="md-nav__link">
<span class="md-ellipsis">
_handle_null_unmentioned
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="md-nav__link">
<span class="md-ellipsis">
collect
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="md-nav__link">
<span class="md-ellipsis">
_check_pyarrow_schema_compatible
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._dataframe_to_data_files" class="md-nav__link">
<span class="md-ellipsis">
_dataframe_to_data_files
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._determine_partitions" class="md-nav__link">
<span class="md-ellipsis">
_determine_partitions
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="md-nav__link">
<span class="md-ellipsis">
_expression_to_complementary_pyarrow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._get_column_projection_values" class="md-nav__link">
<span class="md-ellipsis">
_get_column_projection_values
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link">
<span class="md-ellipsis">
compute_statistics_plan
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link">
<span class="md-ellipsis">
data_file_statistics_from_parquet_metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link">
<span class="md-ellipsis">
parquet_path_to_id_mapping
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link">
<span class="md-ellipsis">
visit_pyarrow
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../manifest/" class="md-nav__link">
<span class="md-ellipsis">
manifest
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../partitioning/" class="md-nav__link">
<span class="md-ellipsis">
partitioning
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../schema/" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../serializers/" class="md-nav__link">
<span class="md-ellipsis">
serializers
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12" >
<div class="md-nav__link md-nav__container">
<a href="../../table/" class="md-nav__link ">
<span class="md-ellipsis">
table
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_12" id="__nav_8_1_12_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_12_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_12">
<span class="md-nav__icon md-icon"></span>
table
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../table/inspect/" class="md-nav__link">
<span class="md-ellipsis">
inspect
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/locations/" class="md-nav__link">
<span class="md-ellipsis">
locations
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/metadata/" class="md-nav__link">
<span class="md-ellipsis">
metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/name_mapping/" class="md-nav__link">
<span class="md-ellipsis">
name_mapping
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/refs/" class="md-nav__link">
<span class="md-ellipsis">
refs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/snapshots/" class="md-nav__link">
<span class="md-ellipsis">
snapshots
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/sorting/" class="md-nav__link">
<span class="md-ellipsis">
sorting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/statistics/" class="md-nav__link">
<span class="md-ellipsis">
statistics
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12_9" >
<div class="md-nav__link md-nav__container">
<a href="../../table/update/" class="md-nav__link ">
<span class="md-ellipsis">
update
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_12_9" id="__nav_8_1_12_9_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_12_9_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_12_9">
<span class="md-nav__icon md-icon"></span>
update
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../table/update/schema/" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/snapshot/" class="md-nav__link">
<span class="md-ellipsis">
snapshot
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/spec/" class="md-nav__link">
<span class="md-ellipsis">
spec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../table/update/statistics/" class="md-nav__link">
<span class="md-ellipsis">
statistics
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../table/upsert_util/" class="md-nav__link">
<span class="md-ellipsis">
upsert_util
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../transforms/" class="md-nav__link">
<span class="md-ellipsis">
transforms
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../typedef/" class="md-nav__link">
<span class="md-ellipsis">
typedef
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../types/" class="md-nav__link">
<span class="md-ellipsis">
types
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_16" >
<div class="md-nav__link md-nav__container">
<a href="../../utils/" class="md-nav__link ">
<span class="md-ellipsis">
utils
</span>
</a>
<label class="md-nav__link " for="__nav_8_1_16" id="__nav_8_1_16_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_16_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8_1_16">
<span class="md-nav__icon md-icon"></span>
utils
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../utils/bin_packing/" class="md-nav__link">
<span class="md-ellipsis">
bin_packing
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/concurrent/" class="md-nav__link">
<span class="md-ellipsis">
concurrent
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/config/" class="md-nav__link">
<span class="md-ellipsis">
config
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/datetime/" class="md-nav__link">
<span class="md-ellipsis">
datetime
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/decimal/" class="md-nav__link">
<span class="md-ellipsis">
decimal
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/deprecated/" class="md-nav__link">
<span class="md-ellipsis">
deprecated
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/lazydict/" class="md-nav__link">
<span class="md-ellipsis">
lazydict
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/parsing/" class="md-nav__link">
<span class="md-ellipsis">
parsing
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/properties/" class="md-nav__link">
<span class="md-ellipsis">
properties
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/schema_conversion/" class="md-nav__link">
<span class="md-ellipsis">
schema_conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/singleton/" class="md-nav__link">
<span class="md-ellipsis">
singleton
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../utils/truncate/" class="md-nav__link">
<span class="md-ellipsis">
truncate
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow" class="md-nav__link">
<span class="md-ellipsis">
pyarrow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan" class="md-nav__link">
<span class="md-ellipsis">
ArrowScan
</span>
</a>
<nav class="md-nav" aria-label="ArrowScan">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan._limit" class="md-nav__link">
<span class="md-ellipsis">
_limit
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="md-nav__link">
<span class="md-ellipsis">
_projected_field_ids
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="md-nav__link">
<span class="md-ellipsis">
_use_large_types
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="md-nav__link">
<span class="md-ellipsis">
to_record_batches
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="md-nav__link">
<span class="md-ellipsis">
to_table
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFile
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFile">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="md-nav__link">
<span class="md-ellipsis">
__len__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile._file_info" class="md-nav__link">
<span class="md-ellipsis">
_file_info
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="md-nav__link">
<span class="md-ellipsis">
create
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="md-nav__link">
<span class="md-ellipsis">
exists
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="md-nav__link">
<span class="md-ellipsis">
open
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="md-nav__link">
<span class="md-ellipsis">
to_input_file
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="md-nav__link">
<span class="md-ellipsis">
PyArrowFileIO
</span>
</a>
<nav class="md-nav" aria-label="PyArrowFileIO">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="md-nav__link">
<span class="md-ellipsis">
__getstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="md-nav__link">
<span class="md-ellipsis">
__setstate__
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="md-nav__link">
<span class="md-ellipsis">
_initialize_fs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="md-nav__link">
<span class="md-ellipsis">
delete
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="md-nav__link">
<span class="md-ellipsis">
new_input
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="md-nav__link">
<span class="md-ellipsis">
new_output
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="md-nav__link">
<span class="md-ellipsis">
parse_location
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="md-nav__link">
<span class="md-ellipsis">
PyArrowSchemaVisitor
</span>
</a>
<nav class="md-nav" aria-label="PyArrowSchemaVisitor">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="md-nav__link">
<span class="md-ellipsis">
after_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="md-nav__link">
<span class="md-ellipsis">
after_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="md-nav__link">
<span class="md-ellipsis">
after_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="md-nav__link">
<span class="md-ellipsis">
after_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="md-nav__link">
<span class="md-ellipsis">
before_field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="md-nav__link">
<span class="md-ellipsis">
before_list_element
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="md-nav__link">
<span class="md-ellipsis">
before_map_key
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="md-nav__link">
<span class="md-ellipsis">
before_map_value
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="md-nav__link">
<span class="md-ellipsis">
field
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="md-nav__link">
<span class="md-ellipsis">
list
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="md-nav__link">
<span class="md-ellipsis">
map
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="md-nav__link">
<span class="md-ellipsis">
primitive
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="md-nav__link">
<span class="md-ellipsis">
schema
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="md-nav__link">
<span class="md-ellipsis">
struct
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="md-nav__link">
<span class="md-ellipsis">
UnsupportedPyArrowTypeException
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._ConvertToIceberg" class="md-nav__link">
<span class="md-ellipsis">
_ConvertToIceberg
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="md-nav__link">
<span class="md-ellipsis">
_ConvertToIcebergWithoutIDs
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="md-nav__link">
<span class="md-ellipsis">
_NullNaNUnmentionedTermsCollector
</span>
</a>
<nav class="md-nav" aria-label="_NullNaNUnmentionedTermsCollector">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="md-nav__link">
<span class="md-ellipsis">
_handle_explicit_is_nan_or_not
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="md-nav__link">
<span class="md-ellipsis">
_handle_explicit_is_null_or_not
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="md-nav__link">
<span class="md-ellipsis">
_handle_nan_unmentioned
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="md-nav__link">
<span class="md-ellipsis">
_handle_null_unmentioned
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="md-nav__link">
<span class="md-ellipsis">
collect
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="md-nav__link">
<span class="md-ellipsis">
_check_pyarrow_schema_compatible
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._dataframe_to_data_files" class="md-nav__link">
<span class="md-ellipsis">
_dataframe_to_data_files
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._determine_partitions" class="md-nav__link">
<span class="md-ellipsis">
_determine_partitions
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="md-nav__link">
<span class="md-ellipsis">
_expression_to_complementary_pyarrow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow._get_column_projection_values" class="md-nav__link">
<span class="md-ellipsis">
_get_column_projection_values
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="md-nav__link">
<span class="md-ellipsis">
compute_statistics_plan
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="md-nav__link">
<span class="md-ellipsis">
data_file_statistics_from_parquet_metadata
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="md-nav__link">
<span class="md-ellipsis">
parquet_path_to_id_mapping
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="md-nav__link">
<span class="md-ellipsis">
visit_pyarrow
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1>pyarrow</h1>
<div class="doc doc-object doc-module">
<a id="pyiceberg.io.pyarrow"></a>
<div class="doc doc-contents first">
<p>FileIO implementation for reading and writing table files that uses pyarrow.fs.</p>
<p>This file contains a FileIO implementation that relies on the filesystem interface provided
by PyArrow. It relies on PyArrow's <code>from_uri</code> method that infers the correct filesystem
type to use. Theoretically, this allows the supported storage types to grow naturally
with the pyarrow library.</p>
<div class="doc doc-children">
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.ArrowScan" class="doc doc-heading">
<code>ArrowScan</code>
<a href="#pyiceberg.io.pyarrow.ArrowScan" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1484">1484</a></span>
<span class="normal"><a href="#__codelineno-0-1485">1485</a></span>
<span class="normal"><a href="#__codelineno-0-1486">1486</a></span>
<span class="normal"><a href="#__codelineno-0-1487">1487</a></span>
<span class="normal"><a href="#__codelineno-0-1488">1488</a></span>
<span class="normal"><a href="#__codelineno-0-1489">1489</a></span>
<span class="normal"><a href="#__codelineno-0-1490">1490</a></span>
<span class="normal"><a href="#__codelineno-0-1491">1491</a></span>
<span class="normal"><a href="#__codelineno-0-1492">1492</a></span>
<span class="normal"><a href="#__codelineno-0-1493">1493</a></span>
<span class="normal"><a href="#__codelineno-0-1494">1494</a></span>
<span class="normal"><a href="#__codelineno-0-1495">1495</a></span>
<span class="normal"><a href="#__codelineno-0-1496">1496</a></span>
<span class="normal"><a href="#__codelineno-0-1497">1497</a></span>
<span class="normal"><a href="#__codelineno-0-1498">1498</a></span>
<span class="normal"><a href="#__codelineno-0-1499">1499</a></span>
<span class="normal"><a href="#__codelineno-0-1500">1500</a></span>
<span class="normal"><a href="#__codelineno-0-1501">1501</a></span>
<span class="normal"><a href="#__codelineno-0-1502">1502</a></span>
<span class="normal"><a href="#__codelineno-0-1503">1503</a></span>
<span class="normal"><a href="#__codelineno-0-1504">1504</a></span>
<span class="normal"><a href="#__codelineno-0-1505">1505</a></span>
<span class="normal"><a href="#__codelineno-0-1506">1506</a></span>
<span class="normal"><a href="#__codelineno-0-1507">1507</a></span>
<span class="normal"><a href="#__codelineno-0-1508">1508</a></span>
<span class="normal"><a href="#__codelineno-0-1509">1509</a></span>
<span class="normal"><a href="#__codelineno-0-1510">1510</a></span>
<span class="normal"><a href="#__codelineno-0-1511">1511</a></span>
<span class="normal"><a href="#__codelineno-0-1512">1512</a></span>
<span class="normal"><a href="#__codelineno-0-1513">1513</a></span>
<span class="normal"><a href="#__codelineno-0-1514">1514</a></span>
<span class="normal"><a href="#__codelineno-0-1515">1515</a></span>
<span class="normal"><a href="#__codelineno-0-1516">1516</a></span>
<span class="normal"><a href="#__codelineno-0-1517">1517</a></span>
<span class="normal"><a href="#__codelineno-0-1518">1518</a></span>
<span class="normal"><a href="#__codelineno-0-1519">1519</a></span>
<span class="normal"><a href="#__codelineno-0-1520">1520</a></span>
<span class="normal"><a href="#__codelineno-0-1521">1521</a></span>
<span class="normal"><a href="#__codelineno-0-1522">1522</a></span>
<span class="normal"><a href="#__codelineno-0-1523">1523</a></span>
<span class="normal"><a href="#__codelineno-0-1524">1524</a></span>
<span class="normal"><a href="#__codelineno-0-1525">1525</a></span>
<span class="normal"><a href="#__codelineno-0-1526">1526</a></span>
<span class="normal"><a href="#__codelineno-0-1527">1527</a></span>
<span class="normal"><a href="#__codelineno-0-1528">1528</a></span>
<span class="normal"><a href="#__codelineno-0-1529">1529</a></span>
<span class="normal"><a href="#__codelineno-0-1530">1530</a></span>
<span class="normal"><a href="#__codelineno-0-1531">1531</a></span>
<span class="normal"><a href="#__codelineno-0-1532">1532</a></span>
<span class="normal"><a href="#__codelineno-0-1533">1533</a></span>
<span class="normal"><a href="#__codelineno-0-1534">1534</a></span>
<span class="normal"><a href="#__codelineno-0-1535">1535</a></span>
<span class="normal"><a href="#__codelineno-0-1536">1536</a></span>
<span class="normal"><a href="#__codelineno-0-1537">1537</a></span>
<span class="normal"><a href="#__codelineno-0-1538">1538</a></span>
<span class="normal"><a href="#__codelineno-0-1539">1539</a></span>
<span class="normal"><a href="#__codelineno-0-1540">1540</a></span>
<span class="normal"><a href="#__codelineno-0-1541">1541</a></span>
<span class="normal"><a href="#__codelineno-0-1542">1542</a></span>
<span class="normal"><a href="#__codelineno-0-1543">1543</a></span>
<span class="normal"><a href="#__codelineno-0-1544">1544</a></span>
<span class="normal"><a href="#__codelineno-0-1545">1545</a></span>
<span class="normal"><a href="#__codelineno-0-1546">1546</a></span>
<span class="normal"><a href="#__codelineno-0-1547">1547</a></span>
<span class="normal"><a href="#__codelineno-0-1548">1548</a></span>
<span class="normal"><a href="#__codelineno-0-1549">1549</a></span>
<span class="normal"><a href="#__codelineno-0-1550">1550</a></span>
<span class="normal"><a href="#__codelineno-0-1551">1551</a></span>
<span class="normal"><a href="#__codelineno-0-1552">1552</a></span>
<span class="normal"><a href="#__codelineno-0-1553">1553</a></span>
<span class="normal"><a href="#__codelineno-0-1554">1554</a></span>
<span class="normal"><a href="#__codelineno-0-1555">1555</a></span>
<span class="normal"><a href="#__codelineno-0-1556">1556</a></span>
<span class="normal"><a href="#__codelineno-0-1557">1557</a></span>
<span class="normal"><a href="#__codelineno-0-1558">1558</a></span>
<span class="normal"><a href="#__codelineno-0-1559">1559</a></span>
<span class="normal"><a href="#__codelineno-0-1560">1560</a></span>
<span class="normal"><a href="#__codelineno-0-1561">1561</a></span>
<span class="normal"><a href="#__codelineno-0-1562">1562</a></span>
<span class="normal"><a href="#__codelineno-0-1563">1563</a></span>
<span class="normal"><a href="#__codelineno-0-1564">1564</a></span>
<span class="normal"><a href="#__codelineno-0-1565">1565</a></span>
<span class="normal"><a href="#__codelineno-0-1566">1566</a></span>
<span class="normal"><a href="#__codelineno-0-1567">1567</a></span>
<span class="normal"><a href="#__codelineno-0-1568">1568</a></span>
<span class="normal"><a href="#__codelineno-0-1569">1569</a></span>
<span class="normal"><a href="#__codelineno-0-1570">1570</a></span>
<span class="normal"><a href="#__codelineno-0-1571">1571</a></span>
<span class="normal"><a href="#__codelineno-0-1572">1572</a></span>
<span class="normal"><a href="#__codelineno-0-1573">1573</a></span>
<span class="normal"><a href="#__codelineno-0-1574">1574</a></span>
<span class="normal"><a href="#__codelineno-0-1575">1575</a></span>
<span class="normal"><a href="#__codelineno-0-1576">1576</a></span>
<span class="normal"><a href="#__codelineno-0-1577">1577</a></span>
<span class="normal"><a href="#__codelineno-0-1578">1578</a></span>
<span class="normal"><a href="#__codelineno-0-1579">1579</a></span>
<span class="normal"><a href="#__codelineno-0-1580">1580</a></span>
<span class="normal"><a href="#__codelineno-0-1581">1581</a></span>
<span class="normal"><a href="#__codelineno-0-1582">1582</a></span>
<span class="normal"><a href="#__codelineno-0-1583">1583</a></span>
<span class="normal"><a href="#__codelineno-0-1584">1584</a></span>
<span class="normal"><a href="#__codelineno-0-1585">1585</a></span>
<span class="normal"><a href="#__codelineno-0-1586">1586</a></span>
<span class="normal"><a href="#__codelineno-0-1587">1587</a></span>
<span class="normal"><a href="#__codelineno-0-1588">1588</a></span>
<span class="normal"><a href="#__codelineno-0-1589">1589</a></span>
<span class="normal"><a href="#__codelineno-0-1590">1590</a></span>
<span class="normal"><a href="#__codelineno-0-1591">1591</a></span>
<span class="normal"><a href="#__codelineno-0-1592">1592</a></span>
<span class="normal"><a href="#__codelineno-0-1593">1593</a></span>
<span class="normal"><a href="#__codelineno-0-1594">1594</a></span>
<span class="normal"><a href="#__codelineno-0-1595">1595</a></span>
<span class="normal"><a href="#__codelineno-0-1596">1596</a></span>
<span class="normal"><a href="#__codelineno-0-1597">1597</a></span>
<span class="normal"><a href="#__codelineno-0-1598">1598</a></span>
<span class="normal"><a href="#__codelineno-0-1599">1599</a></span>
<span class="normal"><a href="#__codelineno-0-1600">1600</a></span>
<span class="normal"><a href="#__codelineno-0-1601">1601</a></span>
<span class="normal"><a href="#__codelineno-0-1602">1602</a></span>
<span class="normal"><a href="#__codelineno-0-1603">1603</a></span>
<span class="normal"><a href="#__codelineno-0-1604">1604</a></span>
<span class="normal"><a href="#__codelineno-0-1605">1605</a></span>
<span class="normal"><a href="#__codelineno-0-1606">1606</a></span>
<span class="normal"><a href="#__codelineno-0-1607">1607</a></span>
<span class="normal"><a href="#__codelineno-0-1608">1608</a></span>
<span class="normal"><a href="#__codelineno-0-1609">1609</a></span>
<span class="normal"><a href="#__codelineno-0-1610">1610</a></span>
<span class="normal"><a href="#__codelineno-0-1611">1611</a></span>
<span class="normal"><a href="#__codelineno-0-1612">1612</a></span>
<span class="normal"><a href="#__codelineno-0-1613">1613</a></span>
<span class="normal"><a href="#__codelineno-0-1614">1614</a></span>
<span class="normal"><a href="#__codelineno-0-1615">1615</a></span>
<span class="normal"><a href="#__codelineno-0-1616">1616</a></span>
<span class="normal"><a href="#__codelineno-0-1617">1617</a></span>
<span class="normal"><a href="#__codelineno-0-1618">1618</a></span>
<span class="normal"><a href="#__codelineno-0-1619">1619</a></span>
<span class="normal"><a href="#__codelineno-0-1620">1620</a></span>
<span class="normal"><a href="#__codelineno-0-1621">1621</a></span>
<span class="normal"><a href="#__codelineno-0-1622">1622</a></span>
<span class="normal"><a href="#__codelineno-0-1623">1623</a></span>
<span class="normal"><a href="#__codelineno-0-1624">1624</a></span>
<span class="normal"><a href="#__codelineno-0-1625">1625</a></span>
<span class="normal"><a href="#__codelineno-0-1626">1626</a></span>
<span class="normal"><a href="#__codelineno-0-1627">1627</a></span>
<span class="normal"><a href="#__codelineno-0-1628">1628</a></span>
<span class="normal"><a href="#__codelineno-0-1629">1629</a></span>
<span class="normal"><a href="#__codelineno-0-1630">1630</a></span>
<span class="normal"><a href="#__codelineno-0-1631">1631</a></span>
<span class="normal"><a href="#__codelineno-0-1632">1632</a></span>
<span class="normal"><a href="#__codelineno-0-1633">1633</a></span>
<span class="normal"><a href="#__codelineno-0-1634">1634</a></span>
<span class="normal"><a href="#__codelineno-0-1635">1635</a></span>
<span class="normal"><a href="#__codelineno-0-1636">1636</a></span>
<span class="normal"><a href="#__codelineno-0-1637">1637</a></span>
<span class="normal"><a href="#__codelineno-0-1638">1638</a></span>
<span class="normal"><a href="#__codelineno-0-1639">1639</a></span>
<span class="normal"><a href="#__codelineno-0-1640">1640</a></span>
<span class="normal"><a href="#__codelineno-0-1641">1641</a></span>
<span class="normal"><a href="#__codelineno-0-1642">1642</a></span>
<span class="normal"><a href="#__codelineno-0-1643">1643</a></span>
<span class="normal"><a href="#__codelineno-0-1644">1644</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1484" name="__codelineno-0-1484"></a><span class="k">class</span><span class="w"> </span><span class="nc">ArrowScan</span><span class="p">:</span>
<a id="__codelineno-0-1485" name="__codelineno-0-1485"></a> <span class="n">_table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span>
<a id="__codelineno-0-1486" name="__codelineno-0-1486"></a> <span class="n">_io</span><span class="p">:</span> <span class="n">FileIO</span>
<a id="__codelineno-0-1487" name="__codelineno-0-1487"></a> <span class="n">_projected_schema</span><span class="p">:</span> <span class="n">Schema</span>
<a id="__codelineno-0-1488" name="__codelineno-0-1488"></a> <span class="n">_bound_row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span>
<a id="__codelineno-0-1489" name="__codelineno-0-1489"></a> <span class="n">_case_sensitive</span><span class="p">:</span> <span class="nb">bool</span>
<a id="__codelineno-0-1490" name="__codelineno-0-1490"></a> <span class="n">_limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
<a id="__codelineno-0-1491" name="__codelineno-0-1491"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg Table and create an Arrow construct.</span>
<a id="__codelineno-0-1492" name="__codelineno-0-1492"></a>
<a id="__codelineno-0-1493" name="__codelineno-0-1493"></a><span class="sd"> Attributes:</span>
<a id="__codelineno-0-1494" name="__codelineno-0-1494"></a><span class="sd"> _table_metadata: Current table metadata of the Iceberg table</span>
<a id="__codelineno-0-1495" name="__codelineno-0-1495"></a><span class="sd"> _io: PyIceberg FileIO implementation from which to fetch the io properties</span>
<a id="__codelineno-0-1496" name="__codelineno-0-1496"></a><span class="sd"> _projected_schema: Iceberg Schema to project onto the data files</span>
<a id="__codelineno-0-1497" name="__codelineno-0-1497"></a><span class="sd"> _bound_row_filter: Schema bound row expression to filter the data with</span>
<a id="__codelineno-0-1498" name="__codelineno-0-1498"></a><span class="sd"> _case_sensitive: Case sensitivity when looking up column names</span>
<a id="__codelineno-0-1499" name="__codelineno-0-1499"></a><span class="sd"> _limit: Limit the number of records.</span>
<a id="__codelineno-0-1500" name="__codelineno-0-1500"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1501" name="__codelineno-0-1501"></a>
<a id="__codelineno-0-1502" name="__codelineno-0-1502"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<a id="__codelineno-0-1503" name="__codelineno-0-1503"></a> <span class="bp">self</span><span class="p">,</span>
<a id="__codelineno-0-1504" name="__codelineno-0-1504"></a> <span class="n">table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span><span class="p">,</span>
<a id="__codelineno-0-1505" name="__codelineno-0-1505"></a> <span class="n">io</span><span class="p">:</span> <span class="n">FileIO</span><span class="p">,</span>
<a id="__codelineno-0-1506" name="__codelineno-0-1506"></a> <span class="n">projected_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span>
<a id="__codelineno-0-1507" name="__codelineno-0-1507"></a> <span class="n">row_filter</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span>
<a id="__codelineno-0-1508" name="__codelineno-0-1508"></a> <span class="n">case_sensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<a id="__codelineno-0-1509" name="__codelineno-0-1509"></a> <span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<a id="__codelineno-0-1510" name="__codelineno-0-1510"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1511" name="__codelineno-0-1511"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span> <span class="o">=</span> <span class="n">table_metadata</span>
<a id="__codelineno-0-1512" name="__codelineno-0-1512"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_io</span> <span class="o">=</span> <span class="n">io</span>
<a id="__codelineno-0-1513" name="__codelineno-0-1513"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span> <span class="o">=</span> <span class="n">projected_schema</span>
<a id="__codelineno-0-1514" name="__codelineno-0-1514"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span> <span class="o">=</span> <span class="n">bind</span><span class="p">(</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">(),</span> <span class="n">row_filter</span><span class="p">,</span> <span class="n">case_sensitive</span><span class="o">=</span><span class="n">case_sensitive</span><span class="p">)</span>
<a id="__codelineno-0-1515" name="__codelineno-0-1515"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span> <span class="o">=</span> <span class="n">case_sensitive</span>
<a id="__codelineno-0-1516" name="__codelineno-0-1516"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">=</span> <span class="n">limit</span>
<a id="__codelineno-0-1517" name="__codelineno-0-1517"></a>
<a id="__codelineno-0-1518" name="__codelineno-0-1518"></a> <span class="nd">@property</span>
<a id="__codelineno-0-1519" name="__codelineno-0-1519"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_use_large_types</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<a id="__codelineno-0-1520" name="__codelineno-0-1520"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Whether to represent data as large arrow types.</span>
<a id="__codelineno-0-1521" name="__codelineno-0-1521"></a>
<a id="__codelineno-0-1522" name="__codelineno-0-1522"></a><span class="sd"> Defaults to True.</span>
<a id="__codelineno-0-1523" name="__codelineno-0-1523"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1524" name="__codelineno-0-1524"></a> <span class="k">return</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">PYARROW_USE_LARGE_TYPES_ON_READ</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<a id="__codelineno-0-1525" name="__codelineno-0-1525"></a>
<a id="__codelineno-0-1526" name="__codelineno-0-1526"></a> <span class="nd">@property</span>
<a id="__codelineno-0-1527" name="__codelineno-0-1527"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_projected_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<a id="__codelineno-0-1528" name="__codelineno-0-1528"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Set of field IDs that should be projected from the data files.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1529" name="__codelineno-0-1529"></a> <span class="k">return</span> <span class="p">{</span>
<a id="__codelineno-0-1530" name="__codelineno-0-1530"></a> <span class="nb">id</span>
<a id="__codelineno-0-1531" name="__codelineno-0-1531"></a> <span class="k">for</span> <span class="nb">id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">field_ids</span>
<a id="__codelineno-0-1532" name="__codelineno-0-1532"></a> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="o">.</span><span class="n">find_type</span><span class="p">(</span><span class="nb">id</span><span class="p">),</span> <span class="p">(</span><span class="n">MapType</span><span class="p">,</span> <span class="n">ListType</span><span class="p">))</span>
<a id="__codelineno-0-1533" name="__codelineno-0-1533"></a> <span class="p">}</span><span class="o">.</span><span class="n">union</span><span class="p">(</span><span class="n">extract_field_ids</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">))</span>
<a id="__codelineno-0-1534" name="__codelineno-0-1534"></a>
<a id="__codelineno-0-1535" name="__codelineno-0-1535"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span>
<a id="__codelineno-0-1536" name="__codelineno-0-1536"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return a pa.Table.</span>
<a id="__codelineno-0-1537" name="__codelineno-0-1537"></a>
<a id="__codelineno-0-1538" name="__codelineno-0-1538"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span>
<a id="__codelineno-0-1539" name="__codelineno-0-1539"></a><span class="sd"> right columns that match the current table schema. Only data that</span>
<a id="__codelineno-0-1540" name="__codelineno-0-1540"></a><span class="sd"> matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1541" name="__codelineno-0-1541"></a>
<a id="__codelineno-0-1542" name="__codelineno-0-1542"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1543" name="__codelineno-0-1543"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1544" name="__codelineno-0-1544"></a>
<a id="__codelineno-0-1545" name="__codelineno-0-1545"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1546" name="__codelineno-0-1546"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1547" name="__codelineno-0-1547"></a>
<a id="__codelineno-0-1548" name="__codelineno-0-1548"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1549" name="__codelineno-0-1549"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1550" name="__codelineno-0-1550"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1551" name="__codelineno-0-1551"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1552" name="__codelineno-0-1552"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1553" name="__codelineno-0-1553"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span>
<a id="__codelineno-0-1554" name="__codelineno-0-1554"></a>
<a id="__codelineno-0-1555" name="__codelineno-0-1555"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_table_from_scan_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span>
<a id="__codelineno-0-1556" name="__codelineno-0-1556"></a> <span class="n">batches</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span>
<a id="__codelineno-0-1557" name="__codelineno-0-1557"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<a id="__codelineno-0-1558" name="__codelineno-0-1558"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span>
<a id="__codelineno-0-1559" name="__codelineno-0-1559"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1560" name="__codelineno-0-1560"></a> <span class="k">return</span> <span class="kc">None</span>
<a id="__codelineno-0-1561" name="__codelineno-0-1561"></a>
<a id="__codelineno-0-1562" name="__codelineno-0-1562"></a> <span class="n">futures</span> <span class="o">=</span> <span class="p">[</span>
<a id="__codelineno-0-1563" name="__codelineno-0-1563"></a> <span class="n">executor</span><span class="o">.</span><span class="n">submit</span><span class="p">(</span>
<a id="__codelineno-0-1564" name="__codelineno-0-1564"></a> <span class="n">_table_from_scan_task</span><span class="p">,</span>
<a id="__codelineno-0-1565" name="__codelineno-0-1565"></a> <span class="n">task</span><span class="p">,</span>
<a id="__codelineno-0-1566" name="__codelineno-0-1566"></a> <span class="p">)</span>
<a id="__codelineno-0-1567" name="__codelineno-0-1567"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span>
<a id="__codelineno-0-1568" name="__codelineno-0-1568"></a> <span class="p">]</span>
<a id="__codelineno-0-1569" name="__codelineno-0-1569"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span>
<a id="__codelineno-0-1570" name="__codelineno-0-1570"></a> <span class="c1"># for consistent ordering, we need to maintain future order</span>
<a id="__codelineno-0-1571" name="__codelineno-0-1571"></a> <span class="n">futures_index</span> <span class="o">=</span> <span class="p">{</span><span class="n">f</span><span class="p">:</span> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">f</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">futures</span><span class="p">)}</span>
<a id="__codelineno-0-1572" name="__codelineno-0-1572"></a> <span class="n">completed_futures</span><span class="p">:</span> <span class="n">SortedList</span><span class="p">[</span><span class="n">Future</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">]]</span> <span class="o">=</span> <span class="n">SortedList</span><span class="p">(</span><span class="n">iterable</span><span class="o">=</span><span class="p">[],</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">f</span><span class="p">:</span> <span class="n">futures_index</span><span class="p">[</span><span class="n">f</span><span class="p">])</span>
<a id="__codelineno-0-1573" name="__codelineno-0-1573"></a> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">concurrent</span><span class="o">.</span><span class="n">futures</span><span class="o">.</span><span class="n">as_completed</span><span class="p">(</span><span class="n">futures</span><span class="p">):</span>
<a id="__codelineno-0-1574" name="__codelineno-0-1574"></a> <span class="n">completed_futures</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">future</span><span class="p">)</span>
<a id="__codelineno-0-1575" name="__codelineno-0-1575"></a> <span class="k">if</span> <span class="n">table_result</span> <span class="o">:=</span> <span class="n">future</span><span class="o">.</span><span class="n">result</span><span class="p">():</span>
<a id="__codelineno-0-1576" name="__codelineno-0-1576"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">table_result</span><span class="p">)</span>
<a id="__codelineno-0-1577" name="__codelineno-0-1577"></a> <span class="c1"># stop early if limit is satisfied</span>
<a id="__codelineno-0-1578" name="__codelineno-0-1578"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1579" name="__codelineno-0-1579"></a> <span class="k">break</span>
<a id="__codelineno-0-1580" name="__codelineno-0-1580"></a>
<a id="__codelineno-0-1581" name="__codelineno-0-1581"></a> <span class="c1"># by now, we&#39;ve either completed all tasks or satisfied the limit</span>
<a id="__codelineno-0-1582" name="__codelineno-0-1582"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1583" name="__codelineno-0-1583"></a> <span class="n">_</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">cancel</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">futures</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">f</span><span class="o">.</span><span class="n">done</span><span class="p">()]</span>
<a id="__codelineno-0-1584" name="__codelineno-0-1584"></a>
<a id="__codelineno-0-1585" name="__codelineno-0-1585"></a> <span class="n">tables</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">completed_futures</span> <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()]</span>
<a id="__codelineno-0-1586" name="__codelineno-0-1586"></a>
<a id="__codelineno-0-1587" name="__codelineno-0-1587"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">tables</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">:</span>
<a id="__codelineno-0-1588" name="__codelineno-0-1588"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([],</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
<a id="__codelineno-0-1589" name="__codelineno-0-1589"></a>
<a id="__codelineno-0-1590" name="__codelineno-0-1590"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span><span class="n">tables</span><span class="p">,</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">&quot;permissive&quot;</span><span class="p">)</span>
<a id="__codelineno-0-1591" name="__codelineno-0-1591"></a>
<a id="__codelineno-0-1592" name="__codelineno-0-1592"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1593" name="__codelineno-0-1593"></a> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">)</span>
<a id="__codelineno-0-1594" name="__codelineno-0-1594"></a>
<a id="__codelineno-0-1595" name="__codelineno-0-1595"></a> <span class="k">return</span> <span class="n">result</span>
<a id="__codelineno-0-1596" name="__codelineno-0-1596"></a>
<a id="__codelineno-0-1597" name="__codelineno-0-1597"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1598" name="__codelineno-0-1598"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span>
<a id="__codelineno-0-1599" name="__codelineno-0-1599"></a>
<a id="__codelineno-0-1600" name="__codelineno-0-1600"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span>
<a id="__codelineno-0-1601" name="__codelineno-0-1601"></a><span class="sd"> by resolving the right columns that match the current table schema.</span>
<a id="__codelineno-0-1602" name="__codelineno-0-1602"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1603" name="__codelineno-0-1603"></a>
<a id="__codelineno-0-1604" name="__codelineno-0-1604"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1605" name="__codelineno-0-1605"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1606" name="__codelineno-0-1606"></a>
<a id="__codelineno-0-1607" name="__codelineno-0-1607"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1608" name="__codelineno-0-1608"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span>
<a id="__codelineno-0-1609" name="__codelineno-0-1609"></a><span class="sd"> Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1610" name="__codelineno-0-1610"></a>
<a id="__codelineno-0-1611" name="__codelineno-0-1611"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1612" name="__codelineno-0-1612"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1613" name="__codelineno-0-1613"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1614" name="__codelineno-0-1614"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1615" name="__codelineno-0-1615"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1616" name="__codelineno-0-1616"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span><span class="n">tasks</span><span class="p">,</span> <span class="n">deletes_per_file</span><span class="p">)</span>
<a id="__codelineno-0-1617" name="__codelineno-0-1617"></a>
<a id="__codelineno-0-1618" name="__codelineno-0-1618"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span>
<a id="__codelineno-0-1619" name="__codelineno-0-1619"></a> <span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">ChunkedArray</span><span class="p">]]</span>
<a id="__codelineno-0-1620" name="__codelineno-0-1620"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1621" name="__codelineno-0-1621"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span>
<a id="__codelineno-0-1622" name="__codelineno-0-1622"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span><span class="p">:</span>
<a id="__codelineno-0-1623" name="__codelineno-0-1623"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1624" name="__codelineno-0-1624"></a> <span class="k">break</span>
<a id="__codelineno-0-1625" name="__codelineno-0-1625"></a> <span class="n">batches</span> <span class="o">=</span> <span class="n">_task_to_record_batches</span><span class="p">(</span>
<a id="__codelineno-0-1626" name="__codelineno-0-1626"></a> <span class="n">_fs_from_file_path</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">file_path</span><span class="p">),</span>
<a id="__codelineno-0-1627" name="__codelineno-0-1627"></a> <span class="n">task</span><span class="p">,</span>
<a id="__codelineno-0-1628" name="__codelineno-0-1628"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_bound_row_filter</span><span class="p">,</span>
<a id="__codelineno-0-1629" name="__codelineno-0-1629"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span>
<a id="__codelineno-0-1630" name="__codelineno-0-1630"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_projected_field_ids</span><span class="p">,</span>
<a id="__codelineno-0-1631" name="__codelineno-0-1631"></a> <span class="n">deletes_per_file</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">file_path</span><span class="p">),</span>
<a id="__codelineno-0-1632" name="__codelineno-0-1632"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_case_sensitive</span><span class="p">,</span>
<a id="__codelineno-0-1633" name="__codelineno-0-1633"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">name_mapping</span><span class="p">(),</span>
<a id="__codelineno-0-1634" name="__codelineno-0-1634"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_use_large_types</span><span class="p">,</span>
<a id="__codelineno-0-1635" name="__codelineno-0-1635"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_table_metadata</span><span class="o">.</span><span class="n">spec</span><span class="p">(),</span>
<a id="__codelineno-0-1636" name="__codelineno-0-1636"></a> <span class="p">)</span>
<a id="__codelineno-0-1637" name="__codelineno-0-1637"></a> <span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">batches</span><span class="p">:</span>
<a id="__codelineno-0-1638" name="__codelineno-0-1638"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1639" name="__codelineno-0-1639"></a> <span class="k">if</span> <span class="n">total_row_count</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1640" name="__codelineno-0-1640"></a> <span class="k">break</span>
<a id="__codelineno-0-1641" name="__codelineno-0-1641"></a> <span class="k">elif</span> <span class="n">total_row_count</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1642" name="__codelineno-0-1642"></a> <span class="n">batch</span> <span class="o">=</span> <span class="n">batch</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="o">-</span> <span class="n">total_row_count</span><span class="p">)</span>
<a id="__codelineno-0-1643" name="__codelineno-0-1643"></a> <span class="k">yield</span> <span class="n">batch</span>
<a id="__codelineno-0-1644" name="__codelineno-0-1644"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-attribute">
<h3 id="pyiceberg.io.pyarrow.ArrowScan._limit" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_limit</span> <span class="o">=</span> <span class="n">limit</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.ArrowScan._limit" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Scan the Iceberg Table and create an Arrow construct.</p>
<p><span class="doc-section-title">Attributes:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._table_metadata">_table_metadata</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>Current table metadata of the Iceberg table</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._io">_io</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>PyIceberg FileIO implementation from which to fetch the io properties</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._projected_schema">_projected_schema</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>Iceberg Schema to project onto the data files</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._bound_row_filter">_bound_row_filter</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>Schema bound row expression to filter the data with</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._case_sensitive">_case_sensitive</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>Case sensitivity when looking up column names</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.ArrowScan._limit._limit">_limit</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>Limit the number of records.</p>
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_projected_field_ids</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.ArrowScan._projected_field_ids" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Set of field IDs that should be projected from the data files.</p>
</div>
</div>
<div class="doc doc-object doc-attribute">
<h3 id="pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_use_large_types</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-property"><code>property</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.ArrowScan._use_large_types" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Whether to represent data as large arrow types.</p>
<p>Defaults to True.</p>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="doc doc-heading">
<code class="highlight language-python"><span class="n">to_record_batches</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_record_batches" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Scan the Iceberg table and return an Iterator[pa.RecordBatch].</p>
<p>Returns an Iterator of pa.RecordBatch with data from the Iceberg table
by resolving the right columns that match the current table schema.
Only data that matches the provided row_filter expression is returned.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>tasks</code>
</td>
<td>
<code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="pyiceberg.table.FileScanTask" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>FileScanTasks representing the data files and delete files to read from.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>An Iterator of PyArrow RecordBatches.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="typing.Iterator">Iterator</span>[<span title="pyarrow.RecordBatch">RecordBatch</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>Total number of rows will be capped if specified.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a required field cannot be found in the file</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="ValueError">ValueError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a field type in the file cannot be projected to the schema type</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1597">1597</a></span>
<span class="normal"><a href="#__codelineno-0-1598">1598</a></span>
<span class="normal"><a href="#__codelineno-0-1599">1599</a></span>
<span class="normal"><a href="#__codelineno-0-1600">1600</a></span>
<span class="normal"><a href="#__codelineno-0-1601">1601</a></span>
<span class="normal"><a href="#__codelineno-0-1602">1602</a></span>
<span class="normal"><a href="#__codelineno-0-1603">1603</a></span>
<span class="normal"><a href="#__codelineno-0-1604">1604</a></span>
<span class="normal"><a href="#__codelineno-0-1605">1605</a></span>
<span class="normal"><a href="#__codelineno-0-1606">1606</a></span>
<span class="normal"><a href="#__codelineno-0-1607">1607</a></span>
<span class="normal"><a href="#__codelineno-0-1608">1608</a></span>
<span class="normal"><a href="#__codelineno-0-1609">1609</a></span>
<span class="normal"><a href="#__codelineno-0-1610">1610</a></span>
<span class="normal"><a href="#__codelineno-0-1611">1611</a></span>
<span class="normal"><a href="#__codelineno-0-1612">1612</a></span>
<span class="normal"><a href="#__codelineno-0-1613">1613</a></span>
<span class="normal"><a href="#__codelineno-0-1614">1614</a></span>
<span class="normal"><a href="#__codelineno-0-1615">1615</a></span>
<span class="normal"><a href="#__codelineno-0-1616">1616</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1597" name="__codelineno-0-1597"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_record_batches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">]:</span>
<a id="__codelineno-0-1598" name="__codelineno-0-1598"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return an Iterator[pa.RecordBatch].</span>
<a id="__codelineno-0-1599" name="__codelineno-0-1599"></a>
<a id="__codelineno-0-1600" name="__codelineno-0-1600"></a><span class="sd"> Returns an Iterator of pa.RecordBatch with data from the Iceberg table</span>
<a id="__codelineno-0-1601" name="__codelineno-0-1601"></a><span class="sd"> by resolving the right columns that match the current table schema.</span>
<a id="__codelineno-0-1602" name="__codelineno-0-1602"></a><span class="sd"> Only data that matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1603" name="__codelineno-0-1603"></a>
<a id="__codelineno-0-1604" name="__codelineno-0-1604"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1605" name="__codelineno-0-1605"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1606" name="__codelineno-0-1606"></a>
<a id="__codelineno-0-1607" name="__codelineno-0-1607"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1608" name="__codelineno-0-1608"></a><span class="sd"> An Iterator of PyArrow RecordBatches.</span>
<a id="__codelineno-0-1609" name="__codelineno-0-1609"></a><span class="sd"> Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1610" name="__codelineno-0-1610"></a>
<a id="__codelineno-0-1611" name="__codelineno-0-1611"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1612" name="__codelineno-0-1612"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1613" name="__codelineno-0-1613"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1614" name="__codelineno-0-1614"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1615" name="__codelineno-0-1615"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1616" name="__codelineno-0-1616"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">(</span><span class="n">tasks</span><span class="p">,</span> <span class="n">deletes_per_file</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.ArrowScan.to_table" class="doc doc-heading">
<code class="highlight language-python"><span class="n">to_table</span><span class="p">(</span><span class="n">tasks</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.ArrowScan.to_table" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Scan the Iceberg table and return a pa.Table.</p>
<p>Returns a pa.Table with data from the Iceberg table by resolving the
right columns that match the current table schema. Only data that
matches the provided row_filter expression is returned.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>tasks</code>
</td>
<td>
<code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="pyiceberg.table.FileScanTask" href="../../table/#pyiceberg.table.FileScanTask">FileScanTask</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>FileScanTasks representing the data files and delete files to read from.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="pyarrow.Table">Table</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A PyArrow table. Total number of rows will be capped if specified.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="pyiceberg.exceptions.ResolveError">ResolveError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a required field cannot be found in the file</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="ValueError">ValueError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When a field type in the file cannot be projected to the schema type</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1535">1535</a></span>
<span class="normal"><a href="#__codelineno-0-1536">1536</a></span>
<span class="normal"><a href="#__codelineno-0-1537">1537</a></span>
<span class="normal"><a href="#__codelineno-0-1538">1538</a></span>
<span class="normal"><a href="#__codelineno-0-1539">1539</a></span>
<span class="normal"><a href="#__codelineno-0-1540">1540</a></span>
<span class="normal"><a href="#__codelineno-0-1541">1541</a></span>
<span class="normal"><a href="#__codelineno-0-1542">1542</a></span>
<span class="normal"><a href="#__codelineno-0-1543">1543</a></span>
<span class="normal"><a href="#__codelineno-0-1544">1544</a></span>
<span class="normal"><a href="#__codelineno-0-1545">1545</a></span>
<span class="normal"><a href="#__codelineno-0-1546">1546</a></span>
<span class="normal"><a href="#__codelineno-0-1547">1547</a></span>
<span class="normal"><a href="#__codelineno-0-1548">1548</a></span>
<span class="normal"><a href="#__codelineno-0-1549">1549</a></span>
<span class="normal"><a href="#__codelineno-0-1550">1550</a></span>
<span class="normal"><a href="#__codelineno-0-1551">1551</a></span>
<span class="normal"><a href="#__codelineno-0-1552">1552</a></span>
<span class="normal"><a href="#__codelineno-0-1553">1553</a></span>
<span class="normal"><a href="#__codelineno-0-1554">1554</a></span>
<span class="normal"><a href="#__codelineno-0-1555">1555</a></span>
<span class="normal"><a href="#__codelineno-0-1556">1556</a></span>
<span class="normal"><a href="#__codelineno-0-1557">1557</a></span>
<span class="normal"><a href="#__codelineno-0-1558">1558</a></span>
<span class="normal"><a href="#__codelineno-0-1559">1559</a></span>
<span class="normal"><a href="#__codelineno-0-1560">1560</a></span>
<span class="normal"><a href="#__codelineno-0-1561">1561</a></span>
<span class="normal"><a href="#__codelineno-0-1562">1562</a></span>
<span class="normal"><a href="#__codelineno-0-1563">1563</a></span>
<span class="normal"><a href="#__codelineno-0-1564">1564</a></span>
<span class="normal"><a href="#__codelineno-0-1565">1565</a></span>
<span class="normal"><a href="#__codelineno-0-1566">1566</a></span>
<span class="normal"><a href="#__codelineno-0-1567">1567</a></span>
<span class="normal"><a href="#__codelineno-0-1568">1568</a></span>
<span class="normal"><a href="#__codelineno-0-1569">1569</a></span>
<span class="normal"><a href="#__codelineno-0-1570">1570</a></span>
<span class="normal"><a href="#__codelineno-0-1571">1571</a></span>
<span class="normal"><a href="#__codelineno-0-1572">1572</a></span>
<span class="normal"><a href="#__codelineno-0-1573">1573</a></span>
<span class="normal"><a href="#__codelineno-0-1574">1574</a></span>
<span class="normal"><a href="#__codelineno-0-1575">1575</a></span>
<span class="normal"><a href="#__codelineno-0-1576">1576</a></span>
<span class="normal"><a href="#__codelineno-0-1577">1577</a></span>
<span class="normal"><a href="#__codelineno-0-1578">1578</a></span>
<span class="normal"><a href="#__codelineno-0-1579">1579</a></span>
<span class="normal"><a href="#__codelineno-0-1580">1580</a></span>
<span class="normal"><a href="#__codelineno-0-1581">1581</a></span>
<span class="normal"><a href="#__codelineno-0-1582">1582</a></span>
<span class="normal"><a href="#__codelineno-0-1583">1583</a></span>
<span class="normal"><a href="#__codelineno-0-1584">1584</a></span>
<span class="normal"><a href="#__codelineno-0-1585">1585</a></span>
<span class="normal"><a href="#__codelineno-0-1586">1586</a></span>
<span class="normal"><a href="#__codelineno-0-1587">1587</a></span>
<span class="normal"><a href="#__codelineno-0-1588">1588</a></span>
<span class="normal"><a href="#__codelineno-0-1589">1589</a></span>
<span class="normal"><a href="#__codelineno-0-1590">1590</a></span>
<span class="normal"><a href="#__codelineno-0-1591">1591</a></span>
<span class="normal"><a href="#__codelineno-0-1592">1592</a></span>
<span class="normal"><a href="#__codelineno-0-1593">1593</a></span>
<span class="normal"><a href="#__codelineno-0-1594">1594</a></span>
<span class="normal"><a href="#__codelineno-0-1595">1595</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1535" name="__codelineno-0-1535"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">FileScanTask</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span>
<a id="__codelineno-0-1536" name="__codelineno-0-1536"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Scan the Iceberg table and return a pa.Table.</span>
<a id="__codelineno-0-1537" name="__codelineno-0-1537"></a>
<a id="__codelineno-0-1538" name="__codelineno-0-1538"></a><span class="sd"> Returns a pa.Table with data from the Iceberg table by resolving the</span>
<a id="__codelineno-0-1539" name="__codelineno-0-1539"></a><span class="sd"> right columns that match the current table schema. Only data that</span>
<a id="__codelineno-0-1540" name="__codelineno-0-1540"></a><span class="sd"> matches the provided row_filter expression is returned.</span>
<a id="__codelineno-0-1541" name="__codelineno-0-1541"></a>
<a id="__codelineno-0-1542" name="__codelineno-0-1542"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-1543" name="__codelineno-0-1543"></a><span class="sd"> tasks: FileScanTasks representing the data files and delete files to read from.</span>
<a id="__codelineno-0-1544" name="__codelineno-0-1544"></a>
<a id="__codelineno-0-1545" name="__codelineno-0-1545"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-1546" name="__codelineno-0-1546"></a><span class="sd"> A PyArrow table. Total number of rows will be capped if specified.</span>
<a id="__codelineno-0-1547" name="__codelineno-0-1547"></a>
<a id="__codelineno-0-1548" name="__codelineno-0-1548"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-1549" name="__codelineno-0-1549"></a><span class="sd"> ResolveError: When a required field cannot be found in the file</span>
<a id="__codelineno-0-1550" name="__codelineno-0-1550"></a><span class="sd"> ValueError: When a field type in the file cannot be projected to the schema type</span>
<a id="__codelineno-0-1551" name="__codelineno-0-1551"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1552" name="__codelineno-0-1552"></a> <span class="n">deletes_per_file</span> <span class="o">=</span> <span class="n">_read_all_delete_files</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_io</span><span class="p">,</span> <span class="n">tasks</span><span class="p">)</span>
<a id="__codelineno-0-1553" name="__codelineno-0-1553"></a> <span class="n">executor</span> <span class="o">=</span> <span class="n">ExecutorFactory</span><span class="o">.</span><span class="n">get_or_create</span><span class="p">()</span>
<a id="__codelineno-0-1554" name="__codelineno-0-1554"></a>
<a id="__codelineno-0-1555" name="__codelineno-0-1555"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_table_from_scan_task</span><span class="p">(</span><span class="n">task</span><span class="p">:</span> <span class="n">FileScanTask</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span>
<a id="__codelineno-0-1556" name="__codelineno-0-1556"></a> <span class="n">batches</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_record_batches_from_scan_tasks_and_deletes</span><span class="p">([</span><span class="n">task</span><span class="p">],</span> <span class="n">deletes_per_file</span><span class="p">))</span>
<a id="__codelineno-0-1557" name="__codelineno-0-1557"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<a id="__codelineno-0-1558" name="__codelineno-0-1558"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span>
<a id="__codelineno-0-1559" name="__codelineno-0-1559"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1560" name="__codelineno-0-1560"></a> <span class="k">return</span> <span class="kc">None</span>
<a id="__codelineno-0-1561" name="__codelineno-0-1561"></a>
<a id="__codelineno-0-1562" name="__codelineno-0-1562"></a> <span class="n">futures</span> <span class="o">=</span> <span class="p">[</span>
<a id="__codelineno-0-1563" name="__codelineno-0-1563"></a> <span class="n">executor</span><span class="o">.</span><span class="n">submit</span><span class="p">(</span>
<a id="__codelineno-0-1564" name="__codelineno-0-1564"></a> <span class="n">_table_from_scan_task</span><span class="p">,</span>
<a id="__codelineno-0-1565" name="__codelineno-0-1565"></a> <span class="n">task</span><span class="p">,</span>
<a id="__codelineno-0-1566" name="__codelineno-0-1566"></a> <span class="p">)</span>
<a id="__codelineno-0-1567" name="__codelineno-0-1567"></a> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span>
<a id="__codelineno-0-1568" name="__codelineno-0-1568"></a> <span class="p">]</span>
<a id="__codelineno-0-1569" name="__codelineno-0-1569"></a> <span class="n">total_row_count</span> <span class="o">=</span> <span class="mi">0</span>
<a id="__codelineno-0-1570" name="__codelineno-0-1570"></a> <span class="c1"># for consistent ordering, we need to maintain future order</span>
<a id="__codelineno-0-1571" name="__codelineno-0-1571"></a> <span class="n">futures_index</span> <span class="o">=</span> <span class="p">{</span><span class="n">f</span><span class="p">:</span> <span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">f</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">futures</span><span class="p">)}</span>
<a id="__codelineno-0-1572" name="__codelineno-0-1572"></a> <span class="n">completed_futures</span><span class="p">:</span> <span class="n">SortedList</span><span class="p">[</span><span class="n">Future</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">]]</span> <span class="o">=</span> <span class="n">SortedList</span><span class="p">(</span><span class="n">iterable</span><span class="o">=</span><span class="p">[],</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">f</span><span class="p">:</span> <span class="n">futures_index</span><span class="p">[</span><span class="n">f</span><span class="p">])</span>
<a id="__codelineno-0-1573" name="__codelineno-0-1573"></a> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">concurrent</span><span class="o">.</span><span class="n">futures</span><span class="o">.</span><span class="n">as_completed</span><span class="p">(</span><span class="n">futures</span><span class="p">):</span>
<a id="__codelineno-0-1574" name="__codelineno-0-1574"></a> <span class="n">completed_futures</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">future</span><span class="p">)</span>
<a id="__codelineno-0-1575" name="__codelineno-0-1575"></a> <span class="k">if</span> <span class="n">table_result</span> <span class="o">:=</span> <span class="n">future</span><span class="o">.</span><span class="n">result</span><span class="p">():</span>
<a id="__codelineno-0-1576" name="__codelineno-0-1576"></a> <span class="n">total_row_count</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">table_result</span><span class="p">)</span>
<a id="__codelineno-0-1577" name="__codelineno-0-1577"></a> <span class="c1"># stop early if limit is satisfied</span>
<a id="__codelineno-0-1578" name="__codelineno-0-1578"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">total_row_count</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">:</span>
<a id="__codelineno-0-1579" name="__codelineno-0-1579"></a> <span class="k">break</span>
<a id="__codelineno-0-1580" name="__codelineno-0-1580"></a>
<a id="__codelineno-0-1581" name="__codelineno-0-1581"></a> <span class="c1"># by now, we&#39;ve either completed all tasks or satisfied the limit</span>
<a id="__codelineno-0-1582" name="__codelineno-0-1582"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1583" name="__codelineno-0-1583"></a> <span class="n">_</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">cancel</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">futures</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">f</span><span class="o">.</span><span class="n">done</span><span class="p">()]</span>
<a id="__codelineno-0-1584" name="__codelineno-0-1584"></a>
<a id="__codelineno-0-1585" name="__codelineno-0-1585"></a> <span class="n">tables</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">completed_futures</span> <span class="k">if</span> <span class="n">f</span><span class="o">.</span><span class="n">result</span><span class="p">()]</span>
<a id="__codelineno-0-1586" name="__codelineno-0-1586"></a>
<a id="__codelineno-0-1587" name="__codelineno-0-1587"></a> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">tables</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">:</span>
<a id="__codelineno-0-1588" name="__codelineno-0-1588"></a> <span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([],</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema_to_pyarrow</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_projected_schema</span><span class="p">,</span> <span class="n">include_field_ids</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
<a id="__codelineno-0-1589" name="__codelineno-0-1589"></a>
<a id="__codelineno-0-1590" name="__codelineno-0-1590"></a> <span class="n">result</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span><span class="n">tables</span><span class="p">,</span> <span class="n">promote_options</span><span class="o">=</span><span class="s2">&quot;permissive&quot;</span><span class="p">)</span>
<a id="__codelineno-0-1591" name="__codelineno-0-1591"></a>
<a id="__codelineno-0-1592" name="__codelineno-0-1592"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1593" name="__codelineno-0-1593"></a> <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">slice</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_limit</span><span class="p">)</span>
<a id="__codelineno-0-1594" name="__codelineno-0-1594"></a>
<a id="__codelineno-0-1595" name="__codelineno-0-1595"></a> <span class="k">return</span> <span class="n">result</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.PyArrowFile" class="doc doc-heading">
<code>PyArrowFile</code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.InputFile" href="../#pyiceberg.io.InputFile">InputFile</a></code>, <code><a class="autorefs autorefs-internal" title="pyiceberg.io.OutputFile" href="../#pyiceberg.io.OutputFile">OutputFile</a></code></p>
<p>A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="str">str</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A URI or a path to a local file.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Attributes:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code><span title="pyiceberg.io.pyarrow.PyArrowFile.location(str)">location(str)</span></code></td>
<td>
</td>
<td>
<div class="doc-md-description">
<p>The URI or path to a local file for a PyArrowFile instance.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Examples:</span></p>
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.io.pyarrow</span><span class="w"> </span><span class="kn">import</span> <span class="n">PyArrowFile</span>
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># input_file = PyArrowFile(&quot;s3://foo/bar.txt&quot;)</span>
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Read the contents of the PyArrowFile instance</span>
<a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># file_content = input_file.open().read()</span>
</code></pre></div>
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># output_file = PyArrowFile(&quot;s3://baz/qux.txt&quot;)</span>
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Write bytes to a file</span>
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="gp">&gt;&gt;&gt; </span><span class="c1"># output_file.create().write(b&#39;foobytes&#39;)</span>
</code></pre></div>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-224">224</a></span>
<span class="normal"><a href="#__codelineno-0-225">225</a></span>
<span class="normal"><a href="#__codelineno-0-226">226</a></span>
<span class="normal"><a href="#__codelineno-0-227">227</a></span>
<span class="normal"><a href="#__codelineno-0-228">228</a></span>
<span class="normal"><a href="#__codelineno-0-229">229</a></span>
<span class="normal"><a href="#__codelineno-0-230">230</a></span>
<span class="normal"><a href="#__codelineno-0-231">231</a></span>
<span class="normal"><a href="#__codelineno-0-232">232</a></span>
<span class="normal"><a href="#__codelineno-0-233">233</a></span>
<span class="normal"><a href="#__codelineno-0-234">234</a></span>
<span class="normal"><a href="#__codelineno-0-235">235</a></span>
<span class="normal"><a href="#__codelineno-0-236">236</a></span>
<span class="normal"><a href="#__codelineno-0-237">237</a></span>
<span class="normal"><a href="#__codelineno-0-238">238</a></span>
<span class="normal"><a href="#__codelineno-0-239">239</a></span>
<span class="normal"><a href="#__codelineno-0-240">240</a></span>
<span class="normal"><a href="#__codelineno-0-241">241</a></span>
<span class="normal"><a href="#__codelineno-0-242">242</a></span>
<span class="normal"><a href="#__codelineno-0-243">243</a></span>
<span class="normal"><a href="#__codelineno-0-244">244</a></span>
<span class="normal"><a href="#__codelineno-0-245">245</a></span>
<span class="normal"><a href="#__codelineno-0-246">246</a></span>
<span class="normal"><a href="#__codelineno-0-247">247</a></span>
<span class="normal"><a href="#__codelineno-0-248">248</a></span>
<span class="normal"><a href="#__codelineno-0-249">249</a></span>
<span class="normal"><a href="#__codelineno-0-250">250</a></span>
<span class="normal"><a href="#__codelineno-0-251">251</a></span>
<span class="normal"><a href="#__codelineno-0-252">252</a></span>
<span class="normal"><a href="#__codelineno-0-253">253</a></span>
<span class="normal"><a href="#__codelineno-0-254">254</a></span>
<span class="normal"><a href="#__codelineno-0-255">255</a></span>
<span class="normal"><a href="#__codelineno-0-256">256</a></span>
<span class="normal"><a href="#__codelineno-0-257">257</a></span>
<span class="normal"><a href="#__codelineno-0-258">258</a></span>
<span class="normal"><a href="#__codelineno-0-259">259</a></span>
<span class="normal"><a href="#__codelineno-0-260">260</a></span>
<span class="normal"><a href="#__codelineno-0-261">261</a></span>
<span class="normal"><a href="#__codelineno-0-262">262</a></span>
<span class="normal"><a href="#__codelineno-0-263">263</a></span>
<span class="normal"><a href="#__codelineno-0-264">264</a></span>
<span class="normal"><a href="#__codelineno-0-265">265</a></span>
<span class="normal"><a href="#__codelineno-0-266">266</a></span>
<span class="normal"><a href="#__codelineno-0-267">267</a></span>
<span class="normal"><a href="#__codelineno-0-268">268</a></span>
<span class="normal"><a href="#__codelineno-0-269">269</a></span>
<span class="normal"><a href="#__codelineno-0-270">270</a></span>
<span class="normal"><a href="#__codelineno-0-271">271</a></span>
<span class="normal"><a href="#__codelineno-0-272">272</a></span>
<span class="normal"><a href="#__codelineno-0-273">273</a></span>
<span class="normal"><a href="#__codelineno-0-274">274</a></span>
<span class="normal"><a href="#__codelineno-0-275">275</a></span>
<span class="normal"><a href="#__codelineno-0-276">276</a></span>
<span class="normal"><a href="#__codelineno-0-277">277</a></span>
<span class="normal"><a href="#__codelineno-0-278">278</a></span>
<span class="normal"><a href="#__codelineno-0-279">279</a></span>
<span class="normal"><a href="#__codelineno-0-280">280</a></span>
<span class="normal"><a href="#__codelineno-0-281">281</a></span>
<span class="normal"><a href="#__codelineno-0-282">282</a></span>
<span class="normal"><a href="#__codelineno-0-283">283</a></span>
<span class="normal"><a href="#__codelineno-0-284">284</a></span>
<span class="normal"><a href="#__codelineno-0-285">285</a></span>
<span class="normal"><a href="#__codelineno-0-286">286</a></span>
<span class="normal"><a href="#__codelineno-0-287">287</a></span>
<span class="normal"><a href="#__codelineno-0-288">288</a></span>
<span class="normal"><a href="#__codelineno-0-289">289</a></span>
<span class="normal"><a href="#__codelineno-0-290">290</a></span>
<span class="normal"><a href="#__codelineno-0-291">291</a></span>
<span class="normal"><a href="#__codelineno-0-292">292</a></span>
<span class="normal"><a href="#__codelineno-0-293">293</a></span>
<span class="normal"><a href="#__codelineno-0-294">294</a></span>
<span class="normal"><a href="#__codelineno-0-295">295</a></span>
<span class="normal"><a href="#__codelineno-0-296">296</a></span>
<span class="normal"><a href="#__codelineno-0-297">297</a></span>
<span class="normal"><a href="#__codelineno-0-298">298</a></span>
<span class="normal"><a href="#__codelineno-0-299">299</a></span>
<span class="normal"><a href="#__codelineno-0-300">300</a></span>
<span class="normal"><a href="#__codelineno-0-301">301</a></span>
<span class="normal"><a href="#__codelineno-0-302">302</a></span>
<span class="normal"><a href="#__codelineno-0-303">303</a></span>
<span class="normal"><a href="#__codelineno-0-304">304</a></span>
<span class="normal"><a href="#__codelineno-0-305">305</a></span>
<span class="normal"><a href="#__codelineno-0-306">306</a></span>
<span class="normal"><a href="#__codelineno-0-307">307</a></span>
<span class="normal"><a href="#__codelineno-0-308">308</a></span>
<span class="normal"><a href="#__codelineno-0-309">309</a></span>
<span class="normal"><a href="#__codelineno-0-310">310</a></span>
<span class="normal"><a href="#__codelineno-0-311">311</a></span>
<span class="normal"><a href="#__codelineno-0-312">312</a></span>
<span class="normal"><a href="#__codelineno-0-313">313</a></span>
<span class="normal"><a href="#__codelineno-0-314">314</a></span>
<span class="normal"><a href="#__codelineno-0-315">315</a></span>
<span class="normal"><a href="#__codelineno-0-316">316</a></span>
<span class="normal"><a href="#__codelineno-0-317">317</a></span>
<span class="normal"><a href="#__codelineno-0-318">318</a></span>
<span class="normal"><a href="#__codelineno-0-319">319</a></span>
<span class="normal"><a href="#__codelineno-0-320">320</a></span>
<span class="normal"><a href="#__codelineno-0-321">321</a></span>
<span class="normal"><a href="#__codelineno-0-322">322</a></span>
<span class="normal"><a href="#__codelineno-0-323">323</a></span>
<span class="normal"><a href="#__codelineno-0-324">324</a></span>
<span class="normal"><a href="#__codelineno-0-325">325</a></span>
<span class="normal"><a href="#__codelineno-0-326">326</a></span>
<span class="normal"><a href="#__codelineno-0-327">327</a></span>
<span class="normal"><a href="#__codelineno-0-328">328</a></span>
<span class="normal"><a href="#__codelineno-0-329">329</a></span>
<span class="normal"><a href="#__codelineno-0-330">330</a></span>
<span class="normal"><a href="#__codelineno-0-331">331</a></span>
<span class="normal"><a href="#__codelineno-0-332">332</a></span>
<span class="normal"><a href="#__codelineno-0-333">333</a></span>
<span class="normal"><a href="#__codelineno-0-334">334</a></span>
<span class="normal"><a href="#__codelineno-0-335">335</a></span>
<span class="normal"><a href="#__codelineno-0-336">336</a></span>
<span class="normal"><a href="#__codelineno-0-337">337</a></span>
<span class="normal"><a href="#__codelineno-0-338">338</a></span>
<span class="normal"><a href="#__codelineno-0-339">339</a></span>
<span class="normal"><a href="#__codelineno-0-340">340</a></span>
<span class="normal"><a href="#__codelineno-0-341">341</a></span>
<span class="normal"><a href="#__codelineno-0-342">342</a></span>
<span class="normal"><a href="#__codelineno-0-343">343</a></span>
<span class="normal"><a href="#__codelineno-0-344">344</a></span>
<span class="normal"><a href="#__codelineno-0-345">345</a></span>
<span class="normal"><a href="#__codelineno-0-346">346</a></span>
<span class="normal"><a href="#__codelineno-0-347">347</a></span>
<span class="normal"><a href="#__codelineno-0-348">348</a></span>
<span class="normal"><a href="#__codelineno-0-349">349</a></span>
<span class="normal"><a href="#__codelineno-0-350">350</a></span>
<span class="normal"><a href="#__codelineno-0-351">351</a></span>
<span class="normal"><a href="#__codelineno-0-352">352</a></span>
<span class="normal"><a href="#__codelineno-0-353">353</a></span>
<span class="normal"><a href="#__codelineno-0-354">354</a></span>
<span class="normal"><a href="#__codelineno-0-355">355</a></span>
<span class="normal"><a href="#__codelineno-0-356">356</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-224" name="__codelineno-0-224"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFile</span><span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">):</span>
<a id="__codelineno-0-225" name="__codelineno-0-225"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;A combined InputFile and OutputFile implementation that uses a pyarrow filesystem to generate pyarrow.lib.NativeFile instances.</span>
<a id="__codelineno-0-226" name="__codelineno-0-226"></a>
<a id="__codelineno-0-227" name="__codelineno-0-227"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-228" name="__codelineno-0-228"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-229" name="__codelineno-0-229"></a>
<a id="__codelineno-0-230" name="__codelineno-0-230"></a><span class="sd"> Attributes:</span>
<a id="__codelineno-0-231" name="__codelineno-0-231"></a><span class="sd"> location(str): The URI or path to a local file for a PyArrowFile instance.</span>
<a id="__codelineno-0-232" name="__codelineno-0-232"></a>
<a id="__codelineno-0-233" name="__codelineno-0-233"></a><span class="sd"> Examples:</span>
<a id="__codelineno-0-234" name="__codelineno-0-234"></a><span class="sd"> &gt;&gt;&gt; from pyiceberg.io.pyarrow import PyArrowFile</span>
<a id="__codelineno-0-235" name="__codelineno-0-235"></a><span class="sd"> &gt;&gt;&gt; # input_file = PyArrowFile(&quot;s3://foo/bar.txt&quot;)</span>
<a id="__codelineno-0-236" name="__codelineno-0-236"></a><span class="sd"> &gt;&gt;&gt; # Read the contents of the PyArrowFile instance</span>
<a id="__codelineno-0-237" name="__codelineno-0-237"></a><span class="sd"> &gt;&gt;&gt; # Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-238" name="__codelineno-0-238"></a><span class="sd"> &gt;&gt;&gt; # file_content = input_file.open().read()</span>
<a id="__codelineno-0-239" name="__codelineno-0-239"></a>
<a id="__codelineno-0-240" name="__codelineno-0-240"></a><span class="sd"> &gt;&gt;&gt; # output_file = PyArrowFile(&quot;s3://baz/qux.txt&quot;)</span>
<a id="__codelineno-0-241" name="__codelineno-0-241"></a><span class="sd"> &gt;&gt;&gt; # Write bytes to a file</span>
<a id="__codelineno-0-242" name="__codelineno-0-242"></a><span class="sd"> &gt;&gt;&gt; # Make sure that you have permissions to read/write</span>
<a id="__codelineno-0-243" name="__codelineno-0-243"></a><span class="sd"> &gt;&gt;&gt; # output_file.create().write(b&#39;foobytes&#39;)</span>
<a id="__codelineno-0-244" name="__codelineno-0-244"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-245" name="__codelineno-0-245"></a>
<a id="__codelineno-0-246" name="__codelineno-0-246"></a> <span class="n">_filesystem</span><span class="p">:</span> <span class="n">FileSystem</span>
<a id="__codelineno-0-247" name="__codelineno-0-247"></a> <span class="n">_path</span><span class="p">:</span> <span class="nb">str</span>
<a id="__codelineno-0-248" name="__codelineno-0-248"></a> <span class="n">_buffer_size</span><span class="p">:</span> <span class="nb">int</span>
<a id="__codelineno-0-249" name="__codelineno-0-249"></a>
<a id="__codelineno-0-250" name="__codelineno-0-250"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">fs</span><span class="p">:</span> <span class="n">FileSystem</span><span class="p">,</span> <span class="n">buffer_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">ONE_MEGABYTE</span><span class="p">):</span>
<a id="__codelineno-0-251" name="__codelineno-0-251"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span> <span class="o">=</span> <span class="n">fs</span>
<a id="__codelineno-0-252" name="__codelineno-0-252"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_path</span> <span class="o">=</span> <span class="n">path</span>
<a id="__codelineno-0-253" name="__codelineno-0-253"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span> <span class="o">=</span> <span class="n">buffer_size</span>
<a id="__codelineno-0-254" name="__codelineno-0-254"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-255" name="__codelineno-0-255"></a>
<a id="__codelineno-0-256" name="__codelineno-0-256"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_file_info</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileInfo</span><span class="p">:</span>
<a id="__codelineno-0-257" name="__codelineno-0-257"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Retrieve a pyarrow.fs.FileInfo object for the location.</span>
<a id="__codelineno-0-258" name="__codelineno-0-258"></a>
<a id="__codelineno-0-259" name="__codelineno-0-259"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-260" name="__codelineno-0-260"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-261" name="__codelineno-0-261"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-262" name="__codelineno-0-262"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-264" name="__codelineno-0-264"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">get_file_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span>
<a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-266" name="__codelineno-0-266"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-267" name="__codelineno-0-267"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot get file info, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-268" name="__codelineno-0-268"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-269" name="__codelineno-0-269"></a>
<a id="__codelineno-0-270" name="__codelineno-0-270"></a> <span class="k">if</span> <span class="n">file_info</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="n">FileType</span><span class="o">.</span><span class="n">NotFound</span><span class="p">:</span>
<a id="__codelineno-0-271" name="__codelineno-0-271"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot get file info, file not found: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-272" name="__codelineno-0-272"></a> <span class="k">return</span> <span class="n">file_info</span>
<a id="__codelineno-0-273" name="__codelineno-0-273"></a>
<a id="__codelineno-0-274" name="__codelineno-0-274"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<a id="__codelineno-0-275" name="__codelineno-0-275"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return the total length of the file, in bytes.&quot;&quot;&quot;</span>
<a id="__codelineno-0-276" name="__codelineno-0-276"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span>
<a id="__codelineno-0-277" name="__codelineno-0-277"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span>
<a id="__codelineno-0-278" name="__codelineno-0-278"></a>
<a id="__codelineno-0-279" name="__codelineno-0-279"></a> <span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<a id="__codelineno-0-280" name="__codelineno-0-280"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether the location exists.&quot;&quot;&quot;</span>
<a id="__codelineno-0-281" name="__codelineno-0-281"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span>
<a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="k">return</span> <span class="kc">True</span>
<a id="__codelineno-0-284" name="__codelineno-0-284"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="k">return</span> <span class="kc">False</span>
<a id="__codelineno-0-286" name="__codelineno-0-286"></a>
<a id="__codelineno-0-287" name="__codelineno-0-287"></a> <span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">InputStream</span><span class="p">:</span>
<a id="__codelineno-0-288" name="__codelineno-0-288"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Open the location using a PyArrow FileSystem inferred from the location.</span>
<a id="__codelineno-0-289" name="__codelineno-0-289"></a>
<a id="__codelineno-0-290" name="__codelineno-0-290"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-291" name="__codelineno-0-291"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span>
<a id="__codelineno-0-292" name="__codelineno-0-292"></a>
<a id="__codelineno-0-293" name="__codelineno-0-293"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span>
<a id="__codelineno-0-295" name="__codelineno-0-295"></a>
<a id="__codelineno-0-296" name="__codelineno-0-296"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span>
<a id="__codelineno-0-298" name="__codelineno-0-298"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-301" name="__codelineno-0-301"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-302" name="__codelineno-0-302"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span>
<a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span>
<a id="__codelineno-0-304" name="__codelineno-0-304"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-305" name="__codelineno-0-305"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-306" name="__codelineno-0-306"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-307" name="__codelineno-0-307"></a> <span class="k">raise</span>
<a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">raise</span>
<a id="__codelineno-0-310" name="__codelineno-0-310"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-311" name="__codelineno-0-311"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-312" name="__codelineno-0-312"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">return</span> <span class="n">input_file</span>
<a id="__codelineno-0-317" name="__codelineno-0-317"></a>
<a id="__codelineno-0-318" name="__codelineno-0-318"></a> <span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">OutputStream</span><span class="p">:</span>
<a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a writable pyarrow.lib.NativeFile for this PyArrowFile&#39;s location.</span>
<a id="__codelineno-0-320" name="__codelineno-0-320"></a>
<a id="__codelineno-0-321" name="__codelineno-0-321"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span>
<a id="__codelineno-0-323" name="__codelineno-0-323"></a>
<a id="__codelineno-0-324" name="__codelineno-0-324"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span>
<a id="__codelineno-0-326" name="__codelineno-0-326"></a>
<a id="__codelineno-0-327" name="__codelineno-0-327"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span>
<a id="__codelineno-0-329" name="__codelineno-0-329"></a>
<a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> Note:</span>
<a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span>
<a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span>
<a id="__codelineno-0-333" name="__codelineno-0-333"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span>
<a id="__codelineno-0-334" name="__codelineno-0-334"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span>
<a id="__codelineno-0-335" name="__codelineno-0-335"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span>
<a id="__codelineno-0-336" name="__codelineno-0-336"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span>
<a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-347" name="__codelineno-0-347"></a> <span class="k">return</span> <span class="n">output_file</span>
<a id="__codelineno-0-348" name="__codelineno-0-348"></a>
<a id="__codelineno-0-349" name="__codelineno-0-349"></a> <span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-350" name="__codelineno-0-350"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span>
<a id="__codelineno-0-351" name="__codelineno-0-351"></a>
<a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span>
<a id="__codelineno-0-353" name="__codelineno-0-353"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span>
<a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> a copy of the same instance.</span>
<a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-356" name="__codelineno-0-356"></a> <span class="k">return</span> <span class="bp">self</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.__len__" class="doc doc-heading">
<code class="highlight language-python"><span class="fm">__len__</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.__len__" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Return the total length of the file, in bytes.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-274">274</a></span>
<span class="normal"><a href="#__codelineno-0-275">275</a></span>
<span class="normal"><a href="#__codelineno-0-276">276</a></span>
<span class="normal"><a href="#__codelineno-0-277">277</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-274" name="__codelineno-0-274"></a><span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<a id="__codelineno-0-275" name="__codelineno-0-275"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return the total length of the file, in bytes.&quot;&quot;&quot;</span>
<a id="__codelineno-0-276" name="__codelineno-0-276"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span>
<a id="__codelineno-0-277" name="__codelineno-0-277"></a> <span class="k">return</span> <span class="n">file_info</span><span class="o">.</span><span class="n">size</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile._file_info" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_file_info</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile._file_info" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Retrieve a pyarrow.fs.FileInfo object for the location.</p>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="PermissionError">PermissionError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file at self.location cannot be accessed due to a permission error such as
an AWS error code 15.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-256">256</a></span>
<span class="normal"><a href="#__codelineno-0-257">257</a></span>
<span class="normal"><a href="#__codelineno-0-258">258</a></span>
<span class="normal"><a href="#__codelineno-0-259">259</a></span>
<span class="normal"><a href="#__codelineno-0-260">260</a></span>
<span class="normal"><a href="#__codelineno-0-261">261</a></span>
<span class="normal"><a href="#__codelineno-0-262">262</a></span>
<span class="normal"><a href="#__codelineno-0-263">263</a></span>
<span class="normal"><a href="#__codelineno-0-264">264</a></span>
<span class="normal"><a href="#__codelineno-0-265">265</a></span>
<span class="normal"><a href="#__codelineno-0-266">266</a></span>
<span class="normal"><a href="#__codelineno-0-267">267</a></span>
<span class="normal"><a href="#__codelineno-0-268">268</a></span>
<span class="normal"><a href="#__codelineno-0-269">269</a></span>
<span class="normal"><a href="#__codelineno-0-270">270</a></span>
<span class="normal"><a href="#__codelineno-0-271">271</a></span>
<span class="normal"><a href="#__codelineno-0-272">272</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-256" name="__codelineno-0-256"></a><span class="k">def</span><span class="w"> </span><span class="nf">_file_info</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileInfo</span><span class="p">:</span>
<a id="__codelineno-0-257" name="__codelineno-0-257"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Retrieve a pyarrow.fs.FileInfo object for the location.</span>
<a id="__codelineno-0-258" name="__codelineno-0-258"></a>
<a id="__codelineno-0-259" name="__codelineno-0-259"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-260" name="__codelineno-0-260"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-261" name="__codelineno-0-261"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-262" name="__codelineno-0-262"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-263" name="__codelineno-0-263"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-264" name="__codelineno-0-264"></a> <span class="n">file_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">get_file_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span>
<a id="__codelineno-0-265" name="__codelineno-0-265"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-266" name="__codelineno-0-266"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-267" name="__codelineno-0-267"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot get file info, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-268" name="__codelineno-0-268"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-269" name="__codelineno-0-269"></a>
<a id="__codelineno-0-270" name="__codelineno-0-270"></a> <span class="k">if</span> <span class="n">file_info</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="n">FileType</span><span class="o">.</span><span class="n">NotFound</span><span class="p">:</span>
<a id="__codelineno-0-271" name="__codelineno-0-271"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot get file info, file not found: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-272" name="__codelineno-0-272"></a> <span class="k">return</span> <span class="n">file_info</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.create" class="doc doc-heading">
<code class="highlight language-python"><span class="n">create</span><span class="p">(</span><span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.create" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>overwrite</code>
</td>
<td>
<code><span title="bool">bool</span></code>
</td>
<td>
<div class="doc-md-description">
<p>Whether to overwrite the file if it already exists.</p>
</div>
</td>
<td>
<code>False</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="pyiceberg.io.OutputStream" href="../#pyiceberg.io.OutputStream">OutputStream</a></code>
</td>
<td>
<div class="doc-md-description">
<p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="FileExistsError">FileExistsError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file already exists at <code>self.location</code> and <code>overwrite</code> is False.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="note" open>
<summary>Note</summary>
<p>This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,
a check is first performed to verify that the file does not exist. This is not thread-safe and
a possibility does exist that the file can be created by a concurrent process after the existence
check yet before the output stream is created. In such a case, the default pyarrow behavior will
truncate the contents of the existing file when opening the output stream.</p>
</details>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-318">318</a></span>
<span class="normal"><a href="#__codelineno-0-319">319</a></span>
<span class="normal"><a href="#__codelineno-0-320">320</a></span>
<span class="normal"><a href="#__codelineno-0-321">321</a></span>
<span class="normal"><a href="#__codelineno-0-322">322</a></span>
<span class="normal"><a href="#__codelineno-0-323">323</a></span>
<span class="normal"><a href="#__codelineno-0-324">324</a></span>
<span class="normal"><a href="#__codelineno-0-325">325</a></span>
<span class="normal"><a href="#__codelineno-0-326">326</a></span>
<span class="normal"><a href="#__codelineno-0-327">327</a></span>
<span class="normal"><a href="#__codelineno-0-328">328</a></span>
<span class="normal"><a href="#__codelineno-0-329">329</a></span>
<span class="normal"><a href="#__codelineno-0-330">330</a></span>
<span class="normal"><a href="#__codelineno-0-331">331</a></span>
<span class="normal"><a href="#__codelineno-0-332">332</a></span>
<span class="normal"><a href="#__codelineno-0-333">333</a></span>
<span class="normal"><a href="#__codelineno-0-334">334</a></span>
<span class="normal"><a href="#__codelineno-0-335">335</a></span>
<span class="normal"><a href="#__codelineno-0-336">336</a></span>
<span class="normal"><a href="#__codelineno-0-337">337</a></span>
<span class="normal"><a href="#__codelineno-0-338">338</a></span>
<span class="normal"><a href="#__codelineno-0-339">339</a></span>
<span class="normal"><a href="#__codelineno-0-340">340</a></span>
<span class="normal"><a href="#__codelineno-0-341">341</a></span>
<span class="normal"><a href="#__codelineno-0-342">342</a></span>
<span class="normal"><a href="#__codelineno-0-343">343</a></span>
<span class="normal"><a href="#__codelineno-0-344">344</a></span>
<span class="normal"><a href="#__codelineno-0-345">345</a></span>
<span class="normal"><a href="#__codelineno-0-346">346</a></span>
<span class="normal"><a href="#__codelineno-0-347">347</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-318" name="__codelineno-0-318"></a><span class="k">def</span><span class="w"> </span><span class="nf">create</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">overwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">OutputStream</span><span class="p">:</span>
<a id="__codelineno-0-319" name="__codelineno-0-319"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a writable pyarrow.lib.NativeFile for this PyArrowFile&#39;s location.</span>
<a id="__codelineno-0-320" name="__codelineno-0-320"></a>
<a id="__codelineno-0-321" name="__codelineno-0-321"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-322" name="__codelineno-0-322"></a><span class="sd"> overwrite (bool): Whether to overwrite the file if it already exists.</span>
<a id="__codelineno-0-323" name="__codelineno-0-323"></a>
<a id="__codelineno-0-324" name="__codelineno-0-324"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-325" name="__codelineno-0-325"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.</span>
<a id="__codelineno-0-326" name="__codelineno-0-326"></a>
<a id="__codelineno-0-327" name="__codelineno-0-327"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-328" name="__codelineno-0-328"></a><span class="sd"> FileExistsError: If the file already exists at `self.location` and `overwrite` is False.</span>
<a id="__codelineno-0-329" name="__codelineno-0-329"></a>
<a id="__codelineno-0-330" name="__codelineno-0-330"></a><span class="sd"> Note:</span>
<a id="__codelineno-0-331" name="__codelineno-0-331"></a><span class="sd"> This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,</span>
<a id="__codelineno-0-332" name="__codelineno-0-332"></a><span class="sd"> a check is first performed to verify that the file does not exist. This is not thread-safe and</span>
<a id="__codelineno-0-333" name="__codelineno-0-333"></a><span class="sd"> a possibility does exist that the file can be created by a concurrent process after the existence</span>
<a id="__codelineno-0-334" name="__codelineno-0-334"></a><span class="sd"> check yet before the output stream is created. In such a case, the default pyarrow behavior will</span>
<a id="__codelineno-0-335" name="__codelineno-0-335"></a><span class="sd"> truncate the contents of the existing file when opening the output stream.</span>
<a id="__codelineno-0-336" name="__codelineno-0-336"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-337" name="__codelineno-0-337"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-338" name="__codelineno-0-338"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">overwrite</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<a id="__codelineno-0-339" name="__codelineno-0-339"></a> <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, already exists: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-340" name="__codelineno-0-340"></a> <span class="n">output_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_output_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-341" name="__codelineno-0-341"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-342" name="__codelineno-0-342"></a> <span class="k">raise</span>
<a id="__codelineno-0-343" name="__codelineno-0-343"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-344" name="__codelineno-0-344"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-345" name="__codelineno-0-345"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot create file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-346" name="__codelineno-0-346"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-347" name="__codelineno-0-347"></a> <span class="k">return</span> <span class="n">output_file</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.exists" class="doc doc-heading">
<code class="highlight language-python"><span class="n">exists</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.exists" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Check whether the location exists.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-279">279</a></span>
<span class="normal"><a href="#__codelineno-0-280">280</a></span>
<span class="normal"><a href="#__codelineno-0-281">281</a></span>
<span class="normal"><a href="#__codelineno-0-282">282</a></span>
<span class="normal"><a href="#__codelineno-0-283">283</a></span>
<span class="normal"><a href="#__codelineno-0-284">284</a></span>
<span class="normal"><a href="#__codelineno-0-285">285</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-279" name="__codelineno-0-279"></a><span class="k">def</span><span class="w"> </span><span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<a id="__codelineno-0-280" name="__codelineno-0-280"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether the location exists.&quot;&quot;&quot;</span>
<a id="__codelineno-0-281" name="__codelineno-0-281"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-282" name="__codelineno-0-282"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_file_info</span><span class="p">()</span> <span class="c1"># raises FileNotFoundError if it does not exist</span>
<a id="__codelineno-0-283" name="__codelineno-0-283"></a> <span class="k">return</span> <span class="kc">True</span>
<a id="__codelineno-0-284" name="__codelineno-0-284"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-285" name="__codelineno-0-285"></a> <span class="k">return</span> <span class="kc">False</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.open" class="doc doc-heading">
<code class="highlight language-python"><span class="nb">open</span><span class="p">(</span><span class="n">seekable</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.open" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Open the location using a PyArrow FileSystem inferred from the location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>seekable</code>
</td>
<td>
<code><span title="bool">bool</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the stream should support seek, or if it is consumed sequential.</p>
</div>
</td>
<td>
<code>True</code>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><a class="autorefs autorefs-internal" title="pyiceberg.io.InputStream" href="../#pyiceberg.io.InputStream">InputStream</a></code>
</td>
<td>
<div class="doc-md-description">
<p>pyarrow.lib.NativeFile: A NativeFile instance for the file located at <code>self.location</code>.</p>
</div>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="FileNotFoundError">FileNotFoundError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file at self.location does not exist.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="PermissionError">PermissionError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file at self.location cannot be accessed due to a permission error such as
an AWS error code 15.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-287">287</a></span>
<span class="normal"><a href="#__codelineno-0-288">288</a></span>
<span class="normal"><a href="#__codelineno-0-289">289</a></span>
<span class="normal"><a href="#__codelineno-0-290">290</a></span>
<span class="normal"><a href="#__codelineno-0-291">291</a></span>
<span class="normal"><a href="#__codelineno-0-292">292</a></span>
<span class="normal"><a href="#__codelineno-0-293">293</a></span>
<span class="normal"><a href="#__codelineno-0-294">294</a></span>
<span class="normal"><a href="#__codelineno-0-295">295</a></span>
<span class="normal"><a href="#__codelineno-0-296">296</a></span>
<span class="normal"><a href="#__codelineno-0-297">297</a></span>
<span class="normal"><a href="#__codelineno-0-298">298</a></span>
<span class="normal"><a href="#__codelineno-0-299">299</a></span>
<span class="normal"><a href="#__codelineno-0-300">300</a></span>
<span class="normal"><a href="#__codelineno-0-301">301</a></span>
<span class="normal"><a href="#__codelineno-0-302">302</a></span>
<span class="normal"><a href="#__codelineno-0-303">303</a></span>
<span class="normal"><a href="#__codelineno-0-304">304</a></span>
<span class="normal"><a href="#__codelineno-0-305">305</a></span>
<span class="normal"><a href="#__codelineno-0-306">306</a></span>
<span class="normal"><a href="#__codelineno-0-307">307</a></span>
<span class="normal"><a href="#__codelineno-0-308">308</a></span>
<span class="normal"><a href="#__codelineno-0-309">309</a></span>
<span class="normal"><a href="#__codelineno-0-310">310</a></span>
<span class="normal"><a href="#__codelineno-0-311">311</a></span>
<span class="normal"><a href="#__codelineno-0-312">312</a></span>
<span class="normal"><a href="#__codelineno-0-313">313</a></span>
<span class="normal"><a href="#__codelineno-0-314">314</a></span>
<span class="normal"><a href="#__codelineno-0-315">315</a></span>
<span class="normal"><a href="#__codelineno-0-316">316</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-287" name="__codelineno-0-287"></a><span class="k">def</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seekable</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">InputStream</span><span class="p">:</span>
<a id="__codelineno-0-288" name="__codelineno-0-288"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Open the location using a PyArrow FileSystem inferred from the location.</span>
<a id="__codelineno-0-289" name="__codelineno-0-289"></a>
<a id="__codelineno-0-290" name="__codelineno-0-290"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-291" name="__codelineno-0-291"></a><span class="sd"> seekable: If the stream should support seek, or if it is consumed sequential.</span>
<a id="__codelineno-0-292" name="__codelineno-0-292"></a>
<a id="__codelineno-0-293" name="__codelineno-0-293"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-294" name="__codelineno-0-294"></a><span class="sd"> pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.</span>
<a id="__codelineno-0-295" name="__codelineno-0-295"></a>
<a id="__codelineno-0-296" name="__codelineno-0-296"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-297" name="__codelineno-0-297"></a><span class="sd"> FileNotFoundError: If the file at self.location does not exist.</span>
<a id="__codelineno-0-298" name="__codelineno-0-298"></a><span class="sd"> PermissionError: If the file at self.location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-299" name="__codelineno-0-299"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-300" name="__codelineno-0-300"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-301" name="__codelineno-0-301"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-302" name="__codelineno-0-302"></a> <span class="k">if</span> <span class="n">seekable</span><span class="p">:</span>
<a id="__codelineno-0-303" name="__codelineno-0-303"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_file</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">)</span>
<a id="__codelineno-0-304" name="__codelineno-0-304"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-305" name="__codelineno-0-305"></a> <span class="n">input_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_filesystem</span><span class="o">.</span><span class="n">open_input_stream</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="n">buffer_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_buffer_size</span><span class="p">)</span>
<a id="__codelineno-0-306" name="__codelineno-0-306"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-307" name="__codelineno-0-307"></a> <span class="k">raise</span>
<a id="__codelineno-0-308" name="__codelineno-0-308"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-309" name="__codelineno-0-309"></a> <span class="k">raise</span>
<a id="__codelineno-0-310" name="__codelineno-0-310"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-311" name="__codelineno-0-311"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-312" name="__codelineno-0-312"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, does not exist: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-313" name="__codelineno-0-313"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-314" name="__codelineno-0-314"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot open file, access denied: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-315" name="__codelineno-0-315"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-316" name="__codelineno-0-316"></a> <span class="k">return</span> <span class="n">input_file</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="doc doc-heading">
<code class="highlight language-python"><span class="n">to_input_file</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFile.to_input_file" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Return a new PyArrowFile for the location of an existing PyArrowFile instance.</p>
<p>This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single
PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns
a copy of the same instance.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-349">349</a></span>
<span class="normal"><a href="#__codelineno-0-350">350</a></span>
<span class="normal"><a href="#__codelineno-0-351">351</a></span>
<span class="normal"><a href="#__codelineno-0-352">352</a></span>
<span class="normal"><a href="#__codelineno-0-353">353</a></span>
<span class="normal"><a href="#__codelineno-0-354">354</a></span>
<span class="normal"><a href="#__codelineno-0-355">355</a></span>
<span class="normal"><a href="#__codelineno-0-356">356</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-349" name="__codelineno-0-349"></a><span class="k">def</span><span class="w"> </span><span class="nf">to_input_file</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-350" name="__codelineno-0-350"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return a new PyArrowFile for the location of an existing PyArrowFile instance.</span>
<a id="__codelineno-0-351" name="__codelineno-0-351"></a>
<a id="__codelineno-0-352" name="__codelineno-0-352"></a><span class="sd"> This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single</span>
<a id="__codelineno-0-353" name="__codelineno-0-353"></a><span class="sd"> PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns</span>
<a id="__codelineno-0-354" name="__codelineno-0-354"></a><span class="sd"> a copy of the same instance.</span>
<a id="__codelineno-0-355" name="__codelineno-0-355"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-356" name="__codelineno-0-356"></a> <span class="k">return</span> <span class="bp">self</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.PyArrowFileIO" class="doc doc-heading">
<code>PyArrowFileIO</code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.FileIO" href="../#pyiceberg.io.FileIO">FileIO</a></code></p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-359">359</a></span>
<span class="normal"><a href="#__codelineno-0-360">360</a></span>
<span class="normal"><a href="#__codelineno-0-361">361</a></span>
<span class="normal"><a href="#__codelineno-0-362">362</a></span>
<span class="normal"><a href="#__codelineno-0-363">363</a></span>
<span class="normal"><a href="#__codelineno-0-364">364</a></span>
<span class="normal"><a href="#__codelineno-0-365">365</a></span>
<span class="normal"><a href="#__codelineno-0-366">366</a></span>
<span class="normal"><a href="#__codelineno-0-367">367</a></span>
<span class="normal"><a href="#__codelineno-0-368">368</a></span>
<span class="normal"><a href="#__codelineno-0-369">369</a></span>
<span class="normal"><a href="#__codelineno-0-370">370</a></span>
<span class="normal"><a href="#__codelineno-0-371">371</a></span>
<span class="normal"><a href="#__codelineno-0-372">372</a></span>
<span class="normal"><a href="#__codelineno-0-373">373</a></span>
<span class="normal"><a href="#__codelineno-0-374">374</a></span>
<span class="normal"><a href="#__codelineno-0-375">375</a></span>
<span class="normal"><a href="#__codelineno-0-376">376</a></span>
<span class="normal"><a href="#__codelineno-0-377">377</a></span>
<span class="normal"><a href="#__codelineno-0-378">378</a></span>
<span class="normal"><a href="#__codelineno-0-379">379</a></span>
<span class="normal"><a href="#__codelineno-0-380">380</a></span>
<span class="normal"><a href="#__codelineno-0-381">381</a></span>
<span class="normal"><a href="#__codelineno-0-382">382</a></span>
<span class="normal"><a href="#__codelineno-0-383">383</a></span>
<span class="normal"><a href="#__codelineno-0-384">384</a></span>
<span class="normal"><a href="#__codelineno-0-385">385</a></span>
<span class="normal"><a href="#__codelineno-0-386">386</a></span>
<span class="normal"><a href="#__codelineno-0-387">387</a></span>
<span class="normal"><a href="#__codelineno-0-388">388</a></span>
<span class="normal"><a href="#__codelineno-0-389">389</a></span>
<span class="normal"><a href="#__codelineno-0-390">390</a></span>
<span class="normal"><a href="#__codelineno-0-391">391</a></span>
<span class="normal"><a href="#__codelineno-0-392">392</a></span>
<span class="normal"><a href="#__codelineno-0-393">393</a></span>
<span class="normal"><a href="#__codelineno-0-394">394</a></span>
<span class="normal"><a href="#__codelineno-0-395">395</a></span>
<span class="normal"><a href="#__codelineno-0-396">396</a></span>
<span class="normal"><a href="#__codelineno-0-397">397</a></span>
<span class="normal"><a href="#__codelineno-0-398">398</a></span>
<span class="normal"><a href="#__codelineno-0-399">399</a></span>
<span class="normal"><a href="#__codelineno-0-400">400</a></span>
<span class="normal"><a href="#__codelineno-0-401">401</a></span>
<span class="normal"><a href="#__codelineno-0-402">402</a></span>
<span class="normal"><a href="#__codelineno-0-403">403</a></span>
<span class="normal"><a href="#__codelineno-0-404">404</a></span>
<span class="normal"><a href="#__codelineno-0-405">405</a></span>
<span class="normal"><a href="#__codelineno-0-406">406</a></span>
<span class="normal"><a href="#__codelineno-0-407">407</a></span>
<span class="normal"><a href="#__codelineno-0-408">408</a></span>
<span class="normal"><a href="#__codelineno-0-409">409</a></span>
<span class="normal"><a href="#__codelineno-0-410">410</a></span>
<span class="normal"><a href="#__codelineno-0-411">411</a></span>
<span class="normal"><a href="#__codelineno-0-412">412</a></span>
<span class="normal"><a href="#__codelineno-0-413">413</a></span>
<span class="normal"><a href="#__codelineno-0-414">414</a></span>
<span class="normal"><a href="#__codelineno-0-415">415</a></span>
<span class="normal"><a href="#__codelineno-0-416">416</a></span>
<span class="normal"><a href="#__codelineno-0-417">417</a></span>
<span class="normal"><a href="#__codelineno-0-418">418</a></span>
<span class="normal"><a href="#__codelineno-0-419">419</a></span>
<span class="normal"><a href="#__codelineno-0-420">420</a></span>
<span class="normal"><a href="#__codelineno-0-421">421</a></span>
<span class="normal"><a href="#__codelineno-0-422">422</a></span>
<span class="normal"><a href="#__codelineno-0-423">423</a></span>
<span class="normal"><a href="#__codelineno-0-424">424</a></span>
<span class="normal"><a href="#__codelineno-0-425">425</a></span>
<span class="normal"><a href="#__codelineno-0-426">426</a></span>
<span class="normal"><a href="#__codelineno-0-427">427</a></span>
<span class="normal"><a href="#__codelineno-0-428">428</a></span>
<span class="normal"><a href="#__codelineno-0-429">429</a></span>
<span class="normal"><a href="#__codelineno-0-430">430</a></span>
<span class="normal"><a href="#__codelineno-0-431">431</a></span>
<span class="normal"><a href="#__codelineno-0-432">432</a></span>
<span class="normal"><a href="#__codelineno-0-433">433</a></span>
<span class="normal"><a href="#__codelineno-0-434">434</a></span>
<span class="normal"><a href="#__codelineno-0-435">435</a></span>
<span class="normal"><a href="#__codelineno-0-436">436</a></span>
<span class="normal"><a href="#__codelineno-0-437">437</a></span>
<span class="normal"><a href="#__codelineno-0-438">438</a></span>
<span class="normal"><a href="#__codelineno-0-439">439</a></span>
<span class="normal"><a href="#__codelineno-0-440">440</a></span>
<span class="normal"><a href="#__codelineno-0-441">441</a></span>
<span class="normal"><a href="#__codelineno-0-442">442</a></span>
<span class="normal"><a href="#__codelineno-0-443">443</a></span>
<span class="normal"><a href="#__codelineno-0-444">444</a></span>
<span class="normal"><a href="#__codelineno-0-445">445</a></span>
<span class="normal"><a href="#__codelineno-0-446">446</a></span>
<span class="normal"><a href="#__codelineno-0-447">447</a></span>
<span class="normal"><a href="#__codelineno-0-448">448</a></span>
<span class="normal"><a href="#__codelineno-0-449">449</a></span>
<span class="normal"><a href="#__codelineno-0-450">450</a></span>
<span class="normal"><a href="#__codelineno-0-451">451</a></span>
<span class="normal"><a href="#__codelineno-0-452">452</a></span>
<span class="normal"><a href="#__codelineno-0-453">453</a></span>
<span class="normal"><a href="#__codelineno-0-454">454</a></span>
<span class="normal"><a href="#__codelineno-0-455">455</a></span>
<span class="normal"><a href="#__codelineno-0-456">456</a></span>
<span class="normal"><a href="#__codelineno-0-457">457</a></span>
<span class="normal"><a href="#__codelineno-0-458">458</a></span>
<span class="normal"><a href="#__codelineno-0-459">459</a></span>
<span class="normal"><a href="#__codelineno-0-460">460</a></span>
<span class="normal"><a href="#__codelineno-0-461">461</a></span>
<span class="normal"><a href="#__codelineno-0-462">462</a></span>
<span class="normal"><a href="#__codelineno-0-463">463</a></span>
<span class="normal"><a href="#__codelineno-0-464">464</a></span>
<span class="normal"><a href="#__codelineno-0-465">465</a></span>
<span class="normal"><a href="#__codelineno-0-466">466</a></span>
<span class="normal"><a href="#__codelineno-0-467">467</a></span>
<span class="normal"><a href="#__codelineno-0-468">468</a></span>
<span class="normal"><a href="#__codelineno-0-469">469</a></span>
<span class="normal"><a href="#__codelineno-0-470">470</a></span>
<span class="normal"><a href="#__codelineno-0-471">471</a></span>
<span class="normal"><a href="#__codelineno-0-472">472</a></span>
<span class="normal"><a href="#__codelineno-0-473">473</a></span>
<span class="normal"><a href="#__codelineno-0-474">474</a></span>
<span class="normal"><a href="#__codelineno-0-475">475</a></span>
<span class="normal"><a href="#__codelineno-0-476">476</a></span>
<span class="normal"><a href="#__codelineno-0-477">477</a></span>
<span class="normal"><a href="#__codelineno-0-478">478</a></span>
<span class="normal"><a href="#__codelineno-0-479">479</a></span>
<span class="normal"><a href="#__codelineno-0-480">480</a></span>
<span class="normal"><a href="#__codelineno-0-481">481</a></span>
<span class="normal"><a href="#__codelineno-0-482">482</a></span>
<span class="normal"><a href="#__codelineno-0-483">483</a></span>
<span class="normal"><a href="#__codelineno-0-484">484</a></span>
<span class="normal"><a href="#__codelineno-0-485">485</a></span>
<span class="normal"><a href="#__codelineno-0-486">486</a></span>
<span class="normal"><a href="#__codelineno-0-487">487</a></span>
<span class="normal"><a href="#__codelineno-0-488">488</a></span>
<span class="normal"><a href="#__codelineno-0-489">489</a></span>
<span class="normal"><a href="#__codelineno-0-490">490</a></span>
<span class="normal"><a href="#__codelineno-0-491">491</a></span>
<span class="normal"><a href="#__codelineno-0-492">492</a></span>
<span class="normal"><a href="#__codelineno-0-493">493</a></span>
<span class="normal"><a href="#__codelineno-0-494">494</a></span>
<span class="normal"><a href="#__codelineno-0-495">495</a></span>
<span class="normal"><a href="#__codelineno-0-496">496</a></span>
<span class="normal"><a href="#__codelineno-0-497">497</a></span>
<span class="normal"><a href="#__codelineno-0-498">498</a></span>
<span class="normal"><a href="#__codelineno-0-499">499</a></span>
<span class="normal"><a href="#__codelineno-0-500">500</a></span>
<span class="normal"><a href="#__codelineno-0-501">501</a></span>
<span class="normal"><a href="#__codelineno-0-502">502</a></span>
<span class="normal"><a href="#__codelineno-0-503">503</a></span>
<span class="normal"><a href="#__codelineno-0-504">504</a></span>
<span class="normal"><a href="#__codelineno-0-505">505</a></span>
<span class="normal"><a href="#__codelineno-0-506">506</a></span>
<span class="normal"><a href="#__codelineno-0-507">507</a></span>
<span class="normal"><a href="#__codelineno-0-508">508</a></span>
<span class="normal"><a href="#__codelineno-0-509">509</a></span>
<span class="normal"><a href="#__codelineno-0-510">510</a></span>
<span class="normal"><a href="#__codelineno-0-511">511</a></span>
<span class="normal"><a href="#__codelineno-0-512">512</a></span>
<span class="normal"><a href="#__codelineno-0-513">513</a></span>
<span class="normal"><a href="#__codelineno-0-514">514</a></span>
<span class="normal"><a href="#__codelineno-0-515">515</a></span>
<span class="normal"><a href="#__codelineno-0-516">516</a></span>
<span class="normal"><a href="#__codelineno-0-517">517</a></span>
<span class="normal"><a href="#__codelineno-0-518">518</a></span>
<span class="normal"><a href="#__codelineno-0-519">519</a></span>
<span class="normal"><a href="#__codelineno-0-520">520</a></span>
<span class="normal"><a href="#__codelineno-0-521">521</a></span>
<span class="normal"><a href="#__codelineno-0-522">522</a></span>
<span class="normal"><a href="#__codelineno-0-523">523</a></span>
<span class="normal"><a href="#__codelineno-0-524">524</a></span>
<span class="normal"><a href="#__codelineno-0-525">525</a></span>
<span class="normal"><a href="#__codelineno-0-526">526</a></span>
<span class="normal"><a href="#__codelineno-0-527">527</a></span>
<span class="normal"><a href="#__codelineno-0-528">528</a></span>
<span class="normal"><a href="#__codelineno-0-529">529</a></span>
<span class="normal"><a href="#__codelineno-0-530">530</a></span>
<span class="normal"><a href="#__codelineno-0-531">531</a></span>
<span class="normal"><a href="#__codelineno-0-532">532</a></span>
<span class="normal"><a href="#__codelineno-0-533">533</a></span>
<span class="normal"><a href="#__codelineno-0-534">534</a></span>
<span class="normal"><a href="#__codelineno-0-535">535</a></span>
<span class="normal"><a href="#__codelineno-0-536">536</a></span>
<span class="normal"><a href="#__codelineno-0-537">537</a></span>
<span class="normal"><a href="#__codelineno-0-538">538</a></span>
<span class="normal"><a href="#__codelineno-0-539">539</a></span>
<span class="normal"><a href="#__codelineno-0-540">540</a></span>
<span class="normal"><a href="#__codelineno-0-541">541</a></span>
<span class="normal"><a href="#__codelineno-0-542">542</a></span>
<span class="normal"><a href="#__codelineno-0-543">543</a></span>
<span class="normal"><a href="#__codelineno-0-544">544</a></span>
<span class="normal"><a href="#__codelineno-0-545">545</a></span>
<span class="normal"><a href="#__codelineno-0-546">546</a></span>
<span class="normal"><a href="#__codelineno-0-547">547</a></span>
<span class="normal"><a href="#__codelineno-0-548">548</a></span>
<span class="normal"><a href="#__codelineno-0-549">549</a></span>
<span class="normal"><a href="#__codelineno-0-550">550</a></span>
<span class="normal"><a href="#__codelineno-0-551">551</a></span>
<span class="normal"><a href="#__codelineno-0-552">552</a></span>
<span class="normal"><a href="#__codelineno-0-553">553</a></span>
<span class="normal"><a href="#__codelineno-0-554">554</a></span>
<span class="normal"><a href="#__codelineno-0-555">555</a></span>
<span class="normal"><a href="#__codelineno-0-556">556</a></span>
<span class="normal"><a href="#__codelineno-0-557">557</a></span>
<span class="normal"><a href="#__codelineno-0-558">558</a></span>
<span class="normal"><a href="#__codelineno-0-559">559</a></span>
<span class="normal"><a href="#__codelineno-0-560">560</a></span>
<span class="normal"><a href="#__codelineno-0-561">561</a></span>
<span class="normal"><a href="#__codelineno-0-562">562</a></span>
<span class="normal"><a href="#__codelineno-0-563">563</a></span>
<span class="normal"><a href="#__codelineno-0-564">564</a></span>
<span class="normal"><a href="#__codelineno-0-565">565</a></span>
<span class="normal"><a href="#__codelineno-0-566">566</a></span>
<span class="normal"><a href="#__codelineno-0-567">567</a></span>
<span class="normal"><a href="#__codelineno-0-568">568</a></span>
<span class="normal"><a href="#__codelineno-0-569">569</a></span>
<span class="normal"><a href="#__codelineno-0-570">570</a></span>
<span class="normal"><a href="#__codelineno-0-571">571</a></span>
<span class="normal"><a href="#__codelineno-0-572">572</a></span>
<span class="normal"><a href="#__codelineno-0-573">573</a></span>
<span class="normal"><a href="#__codelineno-0-574">574</a></span>
<span class="normal"><a href="#__codelineno-0-575">575</a></span>
<span class="normal"><a href="#__codelineno-0-576">576</a></span>
<span class="normal"><a href="#__codelineno-0-577">577</a></span>
<span class="normal"><a href="#__codelineno-0-578">578</a></span>
<span class="normal"><a href="#__codelineno-0-579">579</a></span>
<span class="normal"><a href="#__codelineno-0-580">580</a></span>
<span class="normal"><a href="#__codelineno-0-581">581</a></span>
<span class="normal"><a href="#__codelineno-0-582">582</a></span>
<span class="normal"><a href="#__codelineno-0-583">583</a></span>
<span class="normal"><a href="#__codelineno-0-584">584</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-359" name="__codelineno-0-359"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowFileIO</span><span class="p">(</span><span class="n">FileIO</span><span class="p">):</span>
<a id="__codelineno-0-360" name="__codelineno-0-360"></a> <span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span>
<a id="__codelineno-0-361" name="__codelineno-0-361"></a>
<a id="__codelineno-0-362" name="__codelineno-0-362"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">properties</span><span class="p">:</span> <span class="n">Properties</span> <span class="o">=</span> <span class="n">EMPTY_DICT</span><span class="p">):</span>
<a id="__codelineno-0-363" name="__codelineno-0-363"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> <span class="n">FileSystem</span><span class="p">]</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span>
<a id="__codelineno-0-364" name="__codelineno-0-364"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">properties</span><span class="o">=</span><span class="n">properties</span><span class="p">)</span>
<a id="__codelineno-0-365" name="__codelineno-0-365"></a>
<a id="__codelineno-0-366" name="__codelineno-0-366"></a> <span class="nd">@staticmethod</span>
<a id="__codelineno-0-367" name="__codelineno-0-367"></a> <span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span>
<a id="__codelineno-0-368" name="__codelineno-0-368"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return the path without the scheme.&quot;&quot;&quot;</span>
<a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span>
<a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">return</span> <span class="s2">&quot;file&quot;</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;hdfs&quot;</span><span class="p">,</span> <span class="s2">&quot;viewfs&quot;</span><span class="p">):</span>
<a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span>
<a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">&quot;</span>
<a id="__codelineno-0-376" name="__codelineno-0-376"></a>
<a id="__codelineno-0-377" name="__codelineno-0-377"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-378" name="__codelineno-0-378"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize FileSystem for different scheme.&quot;&quot;&quot;</span>
<a id="__codelineno-0-379" name="__codelineno-0-379"></a> <span class="k">if</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;oss&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-380" name="__codelineno-0-380"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_oss_fs</span><span class="p">()</span>
<a id="__codelineno-0-381" name="__codelineno-0-381"></a>
<a id="__codelineno-0-382" name="__codelineno-0-382"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;s3&quot;</span><span class="p">,</span> <span class="s2">&quot;s3a&quot;</span><span class="p">,</span> <span class="s2">&quot;s3n&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-383" name="__codelineno-0-383"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_s3_fs</span><span class="p">(</span><span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-384" name="__codelineno-0-384"></a>
<a id="__codelineno-0-385" name="__codelineno-0-385"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;hdfs&quot;</span><span class="p">,</span> <span class="s2">&quot;viewfs&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_hdfs_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-387" name="__codelineno-0-387"></a>
<a id="__codelineno-0-388" name="__codelineno-0-388"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;gs&quot;</span><span class="p">,</span> <span class="s2">&quot;gcs&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-389" name="__codelineno-0-389"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_gcs_fs</span><span class="p">()</span>
<a id="__codelineno-0-390" name="__codelineno-0-390"></a>
<a id="__codelineno-0-391" name="__codelineno-0-391"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;file&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-392" name="__codelineno-0-392"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_local_fs</span><span class="p">()</span>
<a id="__codelineno-0-393" name="__codelineno-0-393"></a>
<a id="__codelineno-0-394" name="__codelineno-0-394"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-395" name="__codelineno-0-395"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unrecognized filesystem type in URI: </span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-396" name="__codelineno-0-396"></a>
<a id="__codelineno-0-397" name="__codelineno-0-397"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_oss_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-398" name="__codelineno-0-398"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span>
<a id="__codelineno-0-399" name="__codelineno-0-399"></a>
<a id="__codelineno-0-400" name="__codelineno-0-400"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
<a id="__codelineno-0-401" name="__codelineno-0-401"></a> <span class="s2">&quot;endpoint_override&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span>
<a id="__codelineno-0-402" name="__codelineno-0-402"></a> <span class="s2">&quot;access_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span>
<a id="__codelineno-0-403" name="__codelineno-0-403"></a> <span class="s2">&quot;secret_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span>
<a id="__codelineno-0-404" name="__codelineno-0-404"></a> <span class="s2">&quot;session_token&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span>
<a id="__codelineno-0-405" name="__codelineno-0-405"></a> <span class="s2">&quot;region&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">),</span>
<a id="__codelineno-0-406" name="__codelineno-0-406"></a> <span class="p">}</span>
<a id="__codelineno-0-407" name="__codelineno-0-407"></a>
<a id="__codelineno-0-408" name="__codelineno-0-408"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span>
<a id="__codelineno-0-409" name="__codelineno-0-409"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;proxy_options&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span>
<a id="__codelineno-0-410" name="__codelineno-0-410"></a>
<a id="__codelineno-0-411" name="__codelineno-0-411"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-412" name="__codelineno-0-412"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;connect_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span>
<a id="__codelineno-0-413" name="__codelineno-0-413"></a>
<a id="__codelineno-0-414" name="__codelineno-0-414"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-415" name="__codelineno-0-415"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;request_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span>
<a id="__codelineno-0-416" name="__codelineno-0-416"></a>
<a id="__codelineno-0-417" name="__codelineno-0-417"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span>
<a id="__codelineno-0-418" name="__codelineno-0-418"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;role_arn&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span>
<a id="__codelineno-0-419" name="__codelineno-0-419"></a>
<a id="__codelineno-0-420" name="__codelineno-0-420"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span>
<a id="__codelineno-0-421" name="__codelineno-0-421"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;session_name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span>
<a id="__codelineno-0-422" name="__codelineno-0-422"></a>
<a id="__codelineno-0-423" name="__codelineno-0-423"></a> <span class="k">if</span> <span class="n">force_virtual_addressing</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">):</span>
<a id="__codelineno-0-424" name="__codelineno-0-424"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;force_virtual_addressing&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">force_virtual_addressing</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
<a id="__codelineno-0-425" name="__codelineno-0-425"></a>
<a id="__codelineno-0-426" name="__codelineno-0-426"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-427" name="__codelineno-0-427"></a>
<a id="__codelineno-0-428" name="__codelineno-0-428"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_s3_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-429" name="__codelineno-0-429"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">S3FileSystem</span>
<a id="__codelineno-0-430" name="__codelineno-0-430"></a>
<a id="__codelineno-0-431" name="__codelineno-0-431"></a> <span class="n">provided_region</span> <span class="o">=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_REGION</span><span class="p">,</span> <span class="n">AWS_REGION</span><span class="p">)</span>
<a id="__codelineno-0-432" name="__codelineno-0-432"></a>
<a id="__codelineno-0-433" name="__codelineno-0-433"></a> <span class="c1"># Do this when we don&#39;t provide the region at all, or when we explicitly enable it</span>
<a id="__codelineno-0-434" name="__codelineno-0-434"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_RESOLVE_REGION</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<a id="__codelineno-0-435" name="__codelineno-0-435"></a> <span class="c1"># Resolve region from netloc(bucket), fallback to user-provided region</span>
<a id="__codelineno-0-436" name="__codelineno-0-436"></a> <span class="c1"># Only supported by buckets hosted by S3</span>
<a id="__codelineno-0-437" name="__codelineno-0-437"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">_cached_resolve_s3_region</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">netloc</span><span class="p">)</span> <span class="ow">or</span> <span class="n">provided_region</span>
<a id="__codelineno-0-438" name="__codelineno-0-438"></a> <span class="k">if</span> <span class="n">provided_region</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">bucket_region</span> <span class="o">!=</span> <span class="n">provided_region</span><span class="p">:</span>
<a id="__codelineno-0-439" name="__codelineno-0-439"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
<a id="__codelineno-0-440" name="__codelineno-0-440"></a> <span class="sa">f</span><span class="s2">&quot;PyArrow FileIO overriding S3 bucket region for bucket </span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">: &quot;</span>
<a id="__codelineno-0-441" name="__codelineno-0-441"></a> <span class="sa">f</span><span class="s2">&quot;provided region </span><span class="si">{</span><span class="n">provided_region</span><span class="si">}</span><span class="s2">, actual region </span><span class="si">{</span><span class="n">bucket_region</span><span class="si">}</span><span class="s2">&quot;</span>
<a id="__codelineno-0-442" name="__codelineno-0-442"></a> <span class="p">)</span>
<a id="__codelineno-0-443" name="__codelineno-0-443"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-444" name="__codelineno-0-444"></a> <span class="n">bucket_region</span> <span class="o">=</span> <span class="n">provided_region</span>
<a id="__codelineno-0-445" name="__codelineno-0-445"></a>
<a id="__codelineno-0-446" name="__codelineno-0-446"></a> <span class="n">client_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
<a id="__codelineno-0-447" name="__codelineno-0-447"></a> <span class="s2">&quot;endpoint_override&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_ENDPOINT</span><span class="p">),</span>
<a id="__codelineno-0-448" name="__codelineno-0-448"></a> <span class="s2">&quot;access_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ACCESS_KEY_ID</span><span class="p">,</span> <span class="n">AWS_ACCESS_KEY_ID</span><span class="p">),</span>
<a id="__codelineno-0-449" name="__codelineno-0-449"></a> <span class="s2">&quot;secret_key&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SECRET_ACCESS_KEY</span><span class="p">,</span> <span class="n">AWS_SECRET_ACCESS_KEY</span><span class="p">),</span>
<a id="__codelineno-0-450" name="__codelineno-0-450"></a> <span class="s2">&quot;session_token&quot;</span><span class="p">:</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_SESSION_TOKEN</span><span class="p">,</span> <span class="n">AWS_SESSION_TOKEN</span><span class="p">),</span>
<a id="__codelineno-0-451" name="__codelineno-0-451"></a> <span class="s2">&quot;region&quot;</span><span class="p">:</span> <span class="n">bucket_region</span><span class="p">,</span>
<a id="__codelineno-0-452" name="__codelineno-0-452"></a> <span class="p">}</span>
<a id="__codelineno-0-453" name="__codelineno-0-453"></a>
<a id="__codelineno-0-454" name="__codelineno-0-454"></a> <span class="k">if</span> <span class="n">proxy_uri</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_PROXY_URI</span><span class="p">):</span>
<a id="__codelineno-0-455" name="__codelineno-0-455"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;proxy_options&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">proxy_uri</span>
<a id="__codelineno-0-456" name="__codelineno-0-456"></a>
<a id="__codelineno-0-457" name="__codelineno-0-457"></a> <span class="k">if</span> <span class="n">connect_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_CONNECT_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-458" name="__codelineno-0-458"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;connect_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">connect_timeout</span><span class="p">)</span>
<a id="__codelineno-0-459" name="__codelineno-0-459"></a>
<a id="__codelineno-0-460" name="__codelineno-0-460"></a> <span class="k">if</span> <span class="n">request_timeout</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_REQUEST_TIMEOUT</span><span class="p">):</span>
<a id="__codelineno-0-461" name="__codelineno-0-461"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;request_timeout&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">request_timeout</span><span class="p">)</span>
<a id="__codelineno-0-462" name="__codelineno-0-462"></a>
<a id="__codelineno-0-463" name="__codelineno-0-463"></a> <span class="k">if</span> <span class="n">role_arn</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_ARN</span><span class="p">,</span> <span class="n">AWS_ROLE_ARN</span><span class="p">):</span>
<a id="__codelineno-0-464" name="__codelineno-0-464"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;role_arn&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">role_arn</span>
<a id="__codelineno-0-465" name="__codelineno-0-465"></a>
<a id="__codelineno-0-466" name="__codelineno-0-466"></a> <span class="k">if</span> <span class="n">session_name</span> <span class="o">:=</span> <span class="n">get_first_property_value</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">S3_ROLE_SESSION_NAME</span><span class="p">,</span> <span class="n">AWS_ROLE_SESSION_NAME</span><span class="p">):</span>
<a id="__codelineno-0-467" name="__codelineno-0-467"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;session_name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">session_name</span>
<a id="__codelineno-0-468" name="__codelineno-0-468"></a>
<a id="__codelineno-0-469" name="__codelineno-0-469"></a> <span class="k">if</span> <span class="n">force_virtual_addressing</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">S3_FORCE_VIRTUAL_ADDRESSING</span><span class="p">):</span>
<a id="__codelineno-0-470" name="__codelineno-0-470"></a> <span class="n">client_kwargs</span><span class="p">[</span><span class="s2">&quot;force_virtual_addressing&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">property_as_bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span> <span class="n">force_virtual_addressing</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
<a id="__codelineno-0-471" name="__codelineno-0-471"></a>
<a id="__codelineno-0-472" name="__codelineno-0-472"></a> <span class="k">return</span> <span class="n">S3FileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">client_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-473" name="__codelineno-0-473"></a>
<a id="__codelineno-0-474" name="__codelineno-0-474"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_hdfs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-475" name="__codelineno-0-475"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">HadoopFileSystem</span>
<a id="__codelineno-0-476" name="__codelineno-0-476"></a>
<a id="__codelineno-0-477" name="__codelineno-0-477"></a> <span class="n">hdfs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-478" name="__codelineno-0-478"></a> <span class="k">if</span> <span class="n">netloc</span><span class="p">:</span>
<a id="__codelineno-0-479" name="__codelineno-0-479"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="o">.</span><span class="n">from_uri</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">://</span><span class="si">{</span><span class="n">netloc</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-480" name="__codelineno-0-480"></a> <span class="k">if</span> <span class="n">host</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_HOST</span><span class="p">):</span>
<a id="__codelineno-0-481" name="__codelineno-0-481"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;host&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">host</span>
<a id="__codelineno-0-482" name="__codelineno-0-482"></a> <span class="k">if</span> <span class="n">port</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_PORT</span><span class="p">):</span>
<a id="__codelineno-0-483" name="__codelineno-0-483"></a> <span class="c1"># port should be an integer type</span>
<a id="__codelineno-0-484" name="__codelineno-0-484"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;port&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">port</span><span class="p">)</span>
<a id="__codelineno-0-485" name="__codelineno-0-485"></a> <span class="k">if</span> <span class="n">user</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_USER</span><span class="p">):</span>
<a id="__codelineno-0-486" name="__codelineno-0-486"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;user&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">user</span>
<a id="__codelineno-0-487" name="__codelineno-0-487"></a> <span class="k">if</span> <span class="n">kerb_ticket</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">HDFS_KERB_TICKET</span><span class="p">):</span>
<a id="__codelineno-0-488" name="__codelineno-0-488"></a> <span class="n">hdfs_kwargs</span><span class="p">[</span><span class="s2">&quot;kerb_ticket&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">kerb_ticket</span>
<a id="__codelineno-0-489" name="__codelineno-0-489"></a>
<a id="__codelineno-0-490" name="__codelineno-0-490"></a> <span class="k">return</span> <span class="n">HadoopFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">hdfs_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-491" name="__codelineno-0-491"></a>
<a id="__codelineno-0-492" name="__codelineno-0-492"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_gcs_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-493" name="__codelineno-0-493"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.fs</span><span class="w"> </span><span class="kn">import</span> <span class="n">GcsFileSystem</span>
<a id="__codelineno-0-494" name="__codelineno-0-494"></a>
<a id="__codelineno-0-495" name="__codelineno-0-495"></a> <span class="n">gcs_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-496" name="__codelineno-0-496"></a> <span class="k">if</span> <span class="n">access_token</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN</span><span class="p">):</span>
<a id="__codelineno-0-497" name="__codelineno-0-497"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;access_token&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">access_token</span>
<a id="__codelineno-0-498" name="__codelineno-0-498"></a> <span class="k">if</span> <span class="n">expiration</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_TOKEN_EXPIRES_AT_MS</span><span class="p">):</span>
<a id="__codelineno-0-499" name="__codelineno-0-499"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;credential_token_expiration&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">millis_to_datetime</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">expiration</span><span class="p">))</span>
<a id="__codelineno-0-500" name="__codelineno-0-500"></a> <span class="k">if</span> <span class="n">bucket_location</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_DEFAULT_LOCATION</span><span class="p">):</span>
<a id="__codelineno-0-501" name="__codelineno-0-501"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;default_bucket_location&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">bucket_location</span>
<a id="__codelineno-0-502" name="__codelineno-0-502"></a> <span class="k">if</span> <span class="n">endpoint</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">GCS_SERVICE_HOST</span><span class="p">):</span>
<a id="__codelineno-0-503" name="__codelineno-0-503"></a> <span class="n">url_parts</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">endpoint</span><span class="p">)</span>
<a id="__codelineno-0-504" name="__codelineno-0-504"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">scheme</span>
<a id="__codelineno-0-505" name="__codelineno-0-505"></a> <span class="n">gcs_kwargs</span><span class="p">[</span><span class="s2">&quot;endpoint_override&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">url_parts</span><span class="o">.</span><span class="n">netloc</span>
<a id="__codelineno-0-506" name="__codelineno-0-506"></a>
<a id="__codelineno-0-507" name="__codelineno-0-507"></a> <span class="k">return</span> <span class="n">GcsFileSystem</span><span class="p">(</span><span class="o">**</span><span class="n">gcs_kwargs</span><span class="p">)</span>
<a id="__codelineno-0-508" name="__codelineno-0-508"></a>
<a id="__codelineno-0-509" name="__codelineno-0-509"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_initialize_local_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-510" name="__codelineno-0-510"></a> <span class="k">return</span> <span class="n">PyArrowLocalFileSystem</span><span class="p">()</span>
<a id="__codelineno-0-511" name="__codelineno-0-511"></a>
<a id="__codelineno-0-512" name="__codelineno-0-512"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-513" name="__codelineno-0-513"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to read bytes from the file at the given location.</span>
<a id="__codelineno-0-514" name="__codelineno-0-514"></a>
<a id="__codelineno-0-515" name="__codelineno-0-515"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-516" name="__codelineno-0-516"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-517" name="__codelineno-0-517"></a>
<a id="__codelineno-0-518" name="__codelineno-0-518"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-519" name="__codelineno-0-519"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-520" name="__codelineno-0-520"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-521" name="__codelineno-0-521"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-522" name="__codelineno-0-522"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-523" name="__codelineno-0-523"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-524" name="__codelineno-0-524"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-525" name="__codelineno-0-525"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-526" name="__codelineno-0-526"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-527" name="__codelineno-0-527"></a> <span class="p">)</span>
<a id="__codelineno-0-528" name="__codelineno-0-528"></a>
<a id="__codelineno-0-529" name="__codelineno-0-529"></a> <span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-530" name="__codelineno-0-530"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to write bytes to the file at the given location.</span>
<a id="__codelineno-0-531" name="__codelineno-0-531"></a>
<a id="__codelineno-0-532" name="__codelineno-0-532"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-533" name="__codelineno-0-533"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-534" name="__codelineno-0-534"></a>
<a id="__codelineno-0-535" name="__codelineno-0-535"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-536" name="__codelineno-0-536"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-537" name="__codelineno-0-537"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-538" name="__codelineno-0-538"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-539" name="__codelineno-0-539"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-540" name="__codelineno-0-540"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-541" name="__codelineno-0-541"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-542" name="__codelineno-0-542"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-543" name="__codelineno-0-543"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-544" name="__codelineno-0-544"></a> <span class="p">)</span>
<a id="__codelineno-0-545" name="__codelineno-0-545"></a>
<a id="__codelineno-0-546" name="__codelineno-0-546"></a> <span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-547" name="__codelineno-0-547"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Delete the file at the given location.</span>
<a id="__codelineno-0-548" name="__codelineno-0-548"></a>
<a id="__codelineno-0-549" name="__codelineno-0-549"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-550" name="__codelineno-0-550"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span>
<a id="__codelineno-0-551" name="__codelineno-0-551"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span>
<a id="__codelineno-0-552" name="__codelineno-0-552"></a>
<a id="__codelineno-0-553" name="__codelineno-0-553"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-554" name="__codelineno-0-554"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span>
<a id="__codelineno-0-555" name="__codelineno-0-555"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-556" name="__codelineno-0-556"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-557" name="__codelineno-0-557"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-558" name="__codelineno-0-558"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span>
<a id="__codelineno-0-559" name="__codelineno-0-559"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">)</span>
<a id="__codelineno-0-560" name="__codelineno-0-560"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-561" name="__codelineno-0-561"></a>
<a id="__codelineno-0-562" name="__codelineno-0-562"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-563" name="__codelineno-0-563"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<a id="__codelineno-0-564" name="__codelineno-0-564"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-565" name="__codelineno-0-565"></a> <span class="k">raise</span>
<a id="__codelineno-0-566" name="__codelineno-0-566"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-567" name="__codelineno-0-567"></a> <span class="k">raise</span>
<a id="__codelineno-0-568" name="__codelineno-0-568"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-569" name="__codelineno-0-569"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-570" name="__codelineno-0-570"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-571" name="__codelineno-0-571"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-572" name="__codelineno-0-572"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-573" name="__codelineno-0-573"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
<a id="__codelineno-0-574" name="__codelineno-0-574"></a>
<a id="__codelineno-0-575" name="__codelineno-0-575"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<a id="__codelineno-0-576" name="__codelineno-0-576"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a dictionary of the PyArrowFileIO fields used when pickling.&quot;&quot;&quot;</span>
<a id="__codelineno-0-577" name="__codelineno-0-577"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
<a id="__codelineno-0-578" name="__codelineno-0-578"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">&quot;fs_by_scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<a id="__codelineno-0-579" name="__codelineno-0-579"></a> <span class="k">return</span> <span class="n">fileio_copy</span>
<a id="__codelineno-0-580" name="__codelineno-0-580"></a>
<a id="__codelineno-0-581" name="__codelineno-0-581"></a> <span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-582" name="__codelineno-0-582"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Deserialize the state into a PyArrowFileIO instance.&quot;&quot;&quot;</span>
<a id="__codelineno-0-583" name="__codelineno-0-583"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span>
<a id="__codelineno-0-584" name="__codelineno-0-584"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="doc doc-heading">
<code class="highlight language-python"><span class="n">__getstate__</span><span class="p">()</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__getstate__" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Create a dictionary of the PyArrowFileIO fields used when pickling.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-575">575</a></span>
<span class="normal"><a href="#__codelineno-0-576">576</a></span>
<span class="normal"><a href="#__codelineno-0-577">577</a></span>
<span class="normal"><a href="#__codelineno-0-578">578</a></span>
<span class="normal"><a href="#__codelineno-0-579">579</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-575" name="__codelineno-0-575"></a><span class="k">def</span><span class="w"> </span><span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<a id="__codelineno-0-576" name="__codelineno-0-576"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Create a dictionary of the PyArrowFileIO fields used when pickling.&quot;&quot;&quot;</span>
<a id="__codelineno-0-577" name="__codelineno-0-577"></a> <span class="n">fileio_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="p">)</span>
<a id="__codelineno-0-578" name="__codelineno-0-578"></a> <span class="n">fileio_copy</span><span class="p">[</span><span class="s2">&quot;fs_by_scheme&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<a id="__codelineno-0-579" name="__codelineno-0-579"></a> <span class="k">return</span> <span class="n">fileio_copy</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="doc doc-heading">
<code class="highlight language-python"><span class="n">__setstate__</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.__setstate__" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Deserialize the state into a PyArrowFileIO instance.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-581">581</a></span>
<span class="normal"><a href="#__codelineno-0-582">582</a></span>
<span class="normal"><a href="#__codelineno-0-583">583</a></span>
<span class="normal"><a href="#__codelineno-0-584">584</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-581" name="__codelineno-0-581"></a><span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-582" name="__codelineno-0-582"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Deserialize the state into a PyArrowFileIO instance.&quot;&quot;&quot;</span>
<a id="__codelineno-0-583" name="__codelineno-0-583"></a> <span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span> <span class="o">=</span> <span class="n">state</span>
<a id="__codelineno-0-584" name="__codelineno-0-584"></a> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span> <span class="o">=</span> <span class="n">lru_cache</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_initialize_fs</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_initialize_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO._initialize_fs" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Initialize FileSystem for different scheme.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-377">377</a></span>
<span class="normal"><a href="#__codelineno-0-378">378</a></span>
<span class="normal"><a href="#__codelineno-0-379">379</a></span>
<span class="normal"><a href="#__codelineno-0-380">380</a></span>
<span class="normal"><a href="#__codelineno-0-381">381</a></span>
<span class="normal"><a href="#__codelineno-0-382">382</a></span>
<span class="normal"><a href="#__codelineno-0-383">383</a></span>
<span class="normal"><a href="#__codelineno-0-384">384</a></span>
<span class="normal"><a href="#__codelineno-0-385">385</a></span>
<span class="normal"><a href="#__codelineno-0-386">386</a></span>
<span class="normal"><a href="#__codelineno-0-387">387</a></span>
<span class="normal"><a href="#__codelineno-0-388">388</a></span>
<span class="normal"><a href="#__codelineno-0-389">389</a></span>
<span class="normal"><a href="#__codelineno-0-390">390</a></span>
<span class="normal"><a href="#__codelineno-0-391">391</a></span>
<span class="normal"><a href="#__codelineno-0-392">392</a></span>
<span class="normal"><a href="#__codelineno-0-393">393</a></span>
<span class="normal"><a href="#__codelineno-0-394">394</a></span>
<span class="normal"><a href="#__codelineno-0-395">395</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-377" name="__codelineno-0-377"></a><span class="k">def</span><span class="w"> </span><span class="nf">_initialize_fs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scheme</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">netloc</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FileSystem</span><span class="p">:</span>
<a id="__codelineno-0-378" name="__codelineno-0-378"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize FileSystem for different scheme.&quot;&quot;&quot;</span>
<a id="__codelineno-0-379" name="__codelineno-0-379"></a> <span class="k">if</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;oss&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-380" name="__codelineno-0-380"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_oss_fs</span><span class="p">()</span>
<a id="__codelineno-0-381" name="__codelineno-0-381"></a>
<a id="__codelineno-0-382" name="__codelineno-0-382"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;s3&quot;</span><span class="p">,</span> <span class="s2">&quot;s3a&quot;</span><span class="p">,</span> <span class="s2">&quot;s3n&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-383" name="__codelineno-0-383"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_s3_fs</span><span class="p">(</span><span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-384" name="__codelineno-0-384"></a>
<a id="__codelineno-0-385" name="__codelineno-0-385"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;hdfs&quot;</span><span class="p">,</span> <span class="s2">&quot;viewfs&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-386" name="__codelineno-0-386"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_hdfs_fs</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-387" name="__codelineno-0-387"></a>
<a id="__codelineno-0-388" name="__codelineno-0-388"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;gs&quot;</span><span class="p">,</span> <span class="s2">&quot;gcs&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-389" name="__codelineno-0-389"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_gcs_fs</span><span class="p">()</span>
<a id="__codelineno-0-390" name="__codelineno-0-390"></a>
<a id="__codelineno-0-391" name="__codelineno-0-391"></a> <span class="k">elif</span> <span class="n">scheme</span> <span class="ow">in</span> <span class="p">{</span><span class="s2">&quot;file&quot;</span><span class="p">}:</span>
<a id="__codelineno-0-392" name="__codelineno-0-392"></a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_local_fs</span><span class="p">()</span>
<a id="__codelineno-0-393" name="__codelineno-0-393"></a>
<a id="__codelineno-0-394" name="__codelineno-0-394"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-395" name="__codelineno-0-395"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unrecognized filesystem type in URI: </span><span class="si">{</span><span class="n">scheme</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="doc doc-heading">
<code class="highlight language-python"><span class="n">delete</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.delete" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Delete the file at the given location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="typing.Union">Union</span>[<span title="str">str</span>, <a class="autorefs autorefs-internal" title="pyiceberg.io.InputFile" href="../#pyiceberg.io.InputFile">InputFile</a>, <a class="autorefs autorefs-internal" title="pyiceberg.io.OutputFile" href="../#pyiceberg.io.OutputFile">OutputFile</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The URI to the file--if an InputFile instance or an OutputFile instance is provided,
the location attribute for that instance is used as the location to delete.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="FileNotFoundError">FileNotFoundError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>When the file at the provided location does not exist.</p>
</div>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code><span title="PermissionError">PermissionError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the file at the provided location cannot be accessed due to a permission error such as
an AWS error code 15.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-546">546</a></span>
<span class="normal"><a href="#__codelineno-0-547">547</a></span>
<span class="normal"><a href="#__codelineno-0-548">548</a></span>
<span class="normal"><a href="#__codelineno-0-549">549</a></span>
<span class="normal"><a href="#__codelineno-0-550">550</a></span>
<span class="normal"><a href="#__codelineno-0-551">551</a></span>
<span class="normal"><a href="#__codelineno-0-552">552</a></span>
<span class="normal"><a href="#__codelineno-0-553">553</a></span>
<span class="normal"><a href="#__codelineno-0-554">554</a></span>
<span class="normal"><a href="#__codelineno-0-555">555</a></span>
<span class="normal"><a href="#__codelineno-0-556">556</a></span>
<span class="normal"><a href="#__codelineno-0-557">557</a></span>
<span class="normal"><a href="#__codelineno-0-558">558</a></span>
<span class="normal"><a href="#__codelineno-0-559">559</a></span>
<span class="normal"><a href="#__codelineno-0-560">560</a></span>
<span class="normal"><a href="#__codelineno-0-561">561</a></span>
<span class="normal"><a href="#__codelineno-0-562">562</a></span>
<span class="normal"><a href="#__codelineno-0-563">563</a></span>
<span class="normal"><a href="#__codelineno-0-564">564</a></span>
<span class="normal"><a href="#__codelineno-0-565">565</a></span>
<span class="normal"><a href="#__codelineno-0-566">566</a></span>
<span class="normal"><a href="#__codelineno-0-567">567</a></span>
<span class="normal"><a href="#__codelineno-0-568">568</a></span>
<span class="normal"><a href="#__codelineno-0-569">569</a></span>
<span class="normal"><a href="#__codelineno-0-570">570</a></span>
<span class="normal"><a href="#__codelineno-0-571">571</a></span>
<span class="normal"><a href="#__codelineno-0-572">572</a></span>
<span class="normal"><a href="#__codelineno-0-573">573</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-546" name="__codelineno-0-546"></a><span class="k">def</span><span class="w"> </span><span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-547" name="__codelineno-0-547"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Delete the file at the given location.</span>
<a id="__codelineno-0-548" name="__codelineno-0-548"></a>
<a id="__codelineno-0-549" name="__codelineno-0-549"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-550" name="__codelineno-0-550"></a><span class="sd"> location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or an OutputFile instance is provided,</span>
<a id="__codelineno-0-551" name="__codelineno-0-551"></a><span class="sd"> the location attribute for that instance is used as the location to delete.</span>
<a id="__codelineno-0-552" name="__codelineno-0-552"></a>
<a id="__codelineno-0-553" name="__codelineno-0-553"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-554" name="__codelineno-0-554"></a><span class="sd"> FileNotFoundError: When the file at the provided location does not exist.</span>
<a id="__codelineno-0-555" name="__codelineno-0-555"></a><span class="sd"> PermissionError: If the file at the provided location cannot be accessed due to a permission error such as</span>
<a id="__codelineno-0-556" name="__codelineno-0-556"></a><span class="sd"> an AWS error code 15.</span>
<a id="__codelineno-0-557" name="__codelineno-0-557"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-558" name="__codelineno-0-558"></a> <span class="n">str_location</span> <span class="o">=</span> <span class="n">location</span><span class="o">.</span><span class="n">location</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">location</span><span class="p">,</span> <span class="p">(</span><span class="n">InputFile</span><span class="p">,</span> <span class="n">OutputFile</span><span class="p">))</span> <span class="k">else</span> <span class="n">location</span>
<a id="__codelineno-0-559" name="__codelineno-0-559"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">str_location</span><span class="p">)</span>
<a id="__codelineno-0-560" name="__codelineno-0-560"></a> <span class="n">fs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">)</span>
<a id="__codelineno-0-561" name="__codelineno-0-561"></a>
<a id="__codelineno-0-562" name="__codelineno-0-562"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-563" name="__codelineno-0-563"></a> <span class="n">fs</span><span class="o">.</span><span class="n">delete_file</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<a id="__codelineno-0-564" name="__codelineno-0-564"></a> <span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<a id="__codelineno-0-565" name="__codelineno-0-565"></a> <span class="k">raise</span>
<a id="__codelineno-0-566" name="__codelineno-0-566"></a> <span class="k">except</span> <span class="ne">PermissionError</span><span class="p">:</span>
<a id="__codelineno-0-567" name="__codelineno-0-567"></a> <span class="k">raise</span>
<a id="__codelineno-0-568" name="__codelineno-0-568"></a> <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-569" name="__codelineno-0-569"></a> <span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">or</span> <span class="s2">&quot;Path does not exist&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-570" name="__codelineno-0-570"></a> <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, does not exist: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-571" name="__codelineno-0-571"></a> <span class="k">elif</span> <span class="n">e</span><span class="o">.</span><span class="n">errno</span> <span class="o">==</span> <span class="mi">13</span> <span class="ow">or</span> <span class="s2">&quot;AWS Error [code 15]&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">):</span>
<a id="__codelineno-0-572" name="__codelineno-0-572"></a> <span class="k">raise</span> <span class="ne">PermissionError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot delete file, access denied: </span><span class="si">{</span><span class="n">location</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-573" name="__codelineno-0-573"></a> <span class="k">raise</span> <span class="c1"># pragma: no cover - If some other kind of OSError, raise the raw error</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="doc doc-heading">
<code class="highlight language-python"><span class="n">new_input</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_input" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Get a PyArrowFile instance to read bytes from the file at the given location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="str">str</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A URI or a path to a local file.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>PyArrowFile</code></td> <td>
<code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowFile" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code>
</td>
<td>
<div class="doc-md-description">
<p>A PyArrowFile instance for the given location.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-512">512</a></span>
<span class="normal"><a href="#__codelineno-0-513">513</a></span>
<span class="normal"><a href="#__codelineno-0-514">514</a></span>
<span class="normal"><a href="#__codelineno-0-515">515</a></span>
<span class="normal"><a href="#__codelineno-0-516">516</a></span>
<span class="normal"><a href="#__codelineno-0-517">517</a></span>
<span class="normal"><a href="#__codelineno-0-518">518</a></span>
<span class="normal"><a href="#__codelineno-0-519">519</a></span>
<span class="normal"><a href="#__codelineno-0-520">520</a></span>
<span class="normal"><a href="#__codelineno-0-521">521</a></span>
<span class="normal"><a href="#__codelineno-0-522">522</a></span>
<span class="normal"><a href="#__codelineno-0-523">523</a></span>
<span class="normal"><a href="#__codelineno-0-524">524</a></span>
<span class="normal"><a href="#__codelineno-0-525">525</a></span>
<span class="normal"><a href="#__codelineno-0-526">526</a></span>
<span class="normal"><a href="#__codelineno-0-527">527</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-512" name="__codelineno-0-512"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_input</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-513" name="__codelineno-0-513"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to read bytes from the file at the given location.</span>
<a id="__codelineno-0-514" name="__codelineno-0-514"></a>
<a id="__codelineno-0-515" name="__codelineno-0-515"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-516" name="__codelineno-0-516"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-517" name="__codelineno-0-517"></a>
<a id="__codelineno-0-518" name="__codelineno-0-518"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-519" name="__codelineno-0-519"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-520" name="__codelineno-0-520"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-521" name="__codelineno-0-521"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-522" name="__codelineno-0-522"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-523" name="__codelineno-0-523"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-524" name="__codelineno-0-524"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-525" name="__codelineno-0-525"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-526" name="__codelineno-0-526"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-527" name="__codelineno-0-527"></a> <span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="doc doc-heading">
<code class="highlight language-python"><span class="n">new_output</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.new_output" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Get a PyArrowFile instance to write bytes to the file at the given location.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>location</code>
</td>
<td>
<code><span title="str">str</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A URI or a path to a local file.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Name</th> <th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td><code>PyArrowFile</code></td> <td>
<code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowFile" href="#pyiceberg.io.pyarrow.PyArrowFile">PyArrowFile</a></code>
</td>
<td>
<div class="doc-md-description">
<p>A PyArrowFile instance for the given location.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-529">529</a></span>
<span class="normal"><a href="#__codelineno-0-530">530</a></span>
<span class="normal"><a href="#__codelineno-0-531">531</a></span>
<span class="normal"><a href="#__codelineno-0-532">532</a></span>
<span class="normal"><a href="#__codelineno-0-533">533</a></span>
<span class="normal"><a href="#__codelineno-0-534">534</a></span>
<span class="normal"><a href="#__codelineno-0-535">535</a></span>
<span class="normal"><a href="#__codelineno-0-536">536</a></span>
<span class="normal"><a href="#__codelineno-0-537">537</a></span>
<span class="normal"><a href="#__codelineno-0-538">538</a></span>
<span class="normal"><a href="#__codelineno-0-539">539</a></span>
<span class="normal"><a href="#__codelineno-0-540">540</a></span>
<span class="normal"><a href="#__codelineno-0-541">541</a></span>
<span class="normal"><a href="#__codelineno-0-542">542</a></span>
<span class="normal"><a href="#__codelineno-0-543">543</a></span>
<span class="normal"><a href="#__codelineno-0-544">544</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-529" name="__codelineno-0-529"></a><span class="k">def</span><span class="w"> </span><span class="nf">new_output</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PyArrowFile</span><span class="p">:</span>
<a id="__codelineno-0-530" name="__codelineno-0-530"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Get a PyArrowFile instance to write bytes to the file at the given location.</span>
<a id="__codelineno-0-531" name="__codelineno-0-531"></a>
<a id="__codelineno-0-532" name="__codelineno-0-532"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-533" name="__codelineno-0-533"></a><span class="sd"> location (str): A URI or a path to a local file.</span>
<a id="__codelineno-0-534" name="__codelineno-0-534"></a>
<a id="__codelineno-0-535" name="__codelineno-0-535"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-536" name="__codelineno-0-536"></a><span class="sd"> PyArrowFile: A PyArrowFile instance for the given location.</span>
<a id="__codelineno-0-537" name="__codelineno-0-537"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-538" name="__codelineno-0-538"></a> <span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-539" name="__codelineno-0-539"></a> <span class="k">return</span> <span class="n">PyArrowFile</span><span class="p">(</span>
<a id="__codelineno-0-540" name="__codelineno-0-540"></a> <span class="n">fs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">fs_by_scheme</span><span class="p">(</span><span class="n">scheme</span><span class="p">,</span> <span class="n">netloc</span><span class="p">),</span>
<a id="__codelineno-0-541" name="__codelineno-0-541"></a> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<a id="__codelineno-0-542" name="__codelineno-0-542"></a> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span>
<a id="__codelineno-0-543" name="__codelineno-0-543"></a> <span class="n">buffer_size</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">BUFFER_SIZE</span><span class="p">,</span> <span class="n">ONE_MEGABYTE</span><span class="p">)),</span>
<a id="__codelineno-0-544" name="__codelineno-0-544"></a> <span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="doc doc-heading">
<code class="highlight language-python"><span class="n">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowFileIO.parse_location" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Return the path without the scheme.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-366">366</a></span>
<span class="normal"><a href="#__codelineno-0-367">367</a></span>
<span class="normal"><a href="#__codelineno-0-368">368</a></span>
<span class="normal"><a href="#__codelineno-0-369">369</a></span>
<span class="normal"><a href="#__codelineno-0-370">370</a></span>
<span class="normal"><a href="#__codelineno-0-371">371</a></span>
<span class="normal"><a href="#__codelineno-0-372">372</a></span>
<span class="normal"><a href="#__codelineno-0-373">373</a></span>
<span class="normal"><a href="#__codelineno-0-374">374</a></span>
<span class="normal"><a href="#__codelineno-0-375">375</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-366" name="__codelineno-0-366"></a><span class="nd">@staticmethod</span>
<a id="__codelineno-0-367" name="__codelineno-0-367"></a><span class="k">def</span><span class="w"> </span><span class="nf">parse_location</span><span class="p">(</span><span class="n">location</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]:</span>
<a id="__codelineno-0-368" name="__codelineno-0-368"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Return the path without the scheme.&quot;&quot;&quot;</span>
<a id="__codelineno-0-369" name="__codelineno-0-369"></a> <span class="n">uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-370" name="__codelineno-0-370"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">:</span>
<a id="__codelineno-0-371" name="__codelineno-0-371"></a> <span class="k">return</span> <span class="s2">&quot;file&quot;</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">location</span><span class="p">)</span>
<a id="__codelineno-0-372" name="__codelineno-0-372"></a> <span class="k">elif</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;hdfs&quot;</span><span class="p">,</span> <span class="s2">&quot;viewfs&quot;</span><span class="p">):</span>
<a id="__codelineno-0-373" name="__codelineno-0-373"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">path</span>
<a id="__codelineno-0-374" name="__codelineno-0-374"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-375" name="__codelineno-0-375"></a> <span class="k">return</span> <span class="n">uri</span><span class="o">.</span><span class="n">scheme</span><span class="p">,</span> <span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">uri</span><span class="o">.</span><span class="n">netloc</span><span class="si">}{</span><span class="n">uri</span><span class="o">.</span><span class="n">path</span><span class="si">}</span><span class="s2">&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="doc doc-heading">
<code>PyArrowSchemaVisitor</code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><span title="typing.Generic">Generic</span>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code>, <code><span title="abc.ABC">ABC</span></code></p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1044">1044</a></span>
<span class="normal"><a href="#__codelineno-0-1045">1045</a></span>
<span class="normal"><a href="#__codelineno-0-1046">1046</a></span>
<span class="normal"><a href="#__codelineno-0-1047">1047</a></span>
<span class="normal"><a href="#__codelineno-0-1048">1048</a></span>
<span class="normal"><a href="#__codelineno-0-1049">1049</a></span>
<span class="normal"><a href="#__codelineno-0-1050">1050</a></span>
<span class="normal"><a href="#__codelineno-0-1051">1051</a></span>
<span class="normal"><a href="#__codelineno-0-1052">1052</a></span>
<span class="normal"><a href="#__codelineno-0-1053">1053</a></span>
<span class="normal"><a href="#__codelineno-0-1054">1054</a></span>
<span class="normal"><a href="#__codelineno-0-1055">1055</a></span>
<span class="normal"><a href="#__codelineno-0-1056">1056</a></span>
<span class="normal"><a href="#__codelineno-0-1057">1057</a></span>
<span class="normal"><a href="#__codelineno-0-1058">1058</a></span>
<span class="normal"><a href="#__codelineno-0-1059">1059</a></span>
<span class="normal"><a href="#__codelineno-0-1060">1060</a></span>
<span class="normal"><a href="#__codelineno-0-1061">1061</a></span>
<span class="normal"><a href="#__codelineno-0-1062">1062</a></span>
<span class="normal"><a href="#__codelineno-0-1063">1063</a></span>
<span class="normal"><a href="#__codelineno-0-1064">1064</a></span>
<span class="normal"><a href="#__codelineno-0-1065">1065</a></span>
<span class="normal"><a href="#__codelineno-0-1066">1066</a></span>
<span class="normal"><a href="#__codelineno-0-1067">1067</a></span>
<span class="normal"><a href="#__codelineno-0-1068">1068</a></span>
<span class="normal"><a href="#__codelineno-0-1069">1069</a></span>
<span class="normal"><a href="#__codelineno-0-1070">1070</a></span>
<span class="normal"><a href="#__codelineno-0-1071">1071</a></span>
<span class="normal"><a href="#__codelineno-0-1072">1072</a></span>
<span class="normal"><a href="#__codelineno-0-1073">1073</a></span>
<span class="normal"><a href="#__codelineno-0-1074">1074</a></span>
<span class="normal"><a href="#__codelineno-0-1075">1075</a></span>
<span class="normal"><a href="#__codelineno-0-1076">1076</a></span>
<span class="normal"><a href="#__codelineno-0-1077">1077</a></span>
<span class="normal"><a href="#__codelineno-0-1078">1078</a></span>
<span class="normal"><a href="#__codelineno-0-1079">1079</a></span>
<span class="normal"><a href="#__codelineno-0-1080">1080</a></span>
<span class="normal"><a href="#__codelineno-0-1081">1081</a></span>
<span class="normal"><a href="#__codelineno-0-1082">1082</a></span>
<span class="normal"><a href="#__codelineno-0-1083">1083</a></span>
<span class="normal"><a href="#__codelineno-0-1084">1084</a></span>
<span class="normal"><a href="#__codelineno-0-1085">1085</a></span>
<span class="normal"><a href="#__codelineno-0-1086">1086</a></span>
<span class="normal"><a href="#__codelineno-0-1087">1087</a></span>
<span class="normal"><a href="#__codelineno-0-1088">1088</a></span>
<span class="normal"><a href="#__codelineno-0-1089">1089</a></span>
<span class="normal"><a href="#__codelineno-0-1090">1090</a></span>
<span class="normal"><a href="#__codelineno-0-1091">1091</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1044" name="__codelineno-0-1044"></a><span class="k">class</span><span class="w"> </span><span class="nc">PyArrowSchemaVisitor</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">ABC</span><span class="p">):</span>
<a id="__codelineno-0-1045" name="__codelineno-0-1045"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1046" name="__codelineno-0-1046"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a field.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1047" name="__codelineno-0-1047"></a>
<a id="__codelineno-0-1048" name="__codelineno-0-1048"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1049" name="__codelineno-0-1049"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a field.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1050" name="__codelineno-0-1050"></a>
<a id="__codelineno-0-1051" name="__codelineno-0-1051"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1052" name="__codelineno-0-1052"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting an element within a ListType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1053" name="__codelineno-0-1053"></a>
<a id="__codelineno-0-1054" name="__codelineno-0-1054"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1055" name="__codelineno-0-1055"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting an element within a ListType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1056" name="__codelineno-0-1056"></a>
<a id="__codelineno-0-1057" name="__codelineno-0-1057"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1058" name="__codelineno-0-1058"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a key within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1059" name="__codelineno-0-1059"></a>
<a id="__codelineno-0-1060" name="__codelineno-0-1060"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1061" name="__codelineno-0-1061"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a key within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1062" name="__codelineno-0-1062"></a>
<a id="__codelineno-0-1063" name="__codelineno-0-1063"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1064" name="__codelineno-0-1064"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a value within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1065" name="__codelineno-0-1065"></a>
<a id="__codelineno-0-1066" name="__codelineno-0-1066"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1067" name="__codelineno-0-1067"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a value within a MapType.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1068" name="__codelineno-0-1068"></a>
<a id="__codelineno-0-1069" name="__codelineno-0-1069"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1070" name="__codelineno-0-1070"></a> <span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1071" name="__codelineno-0-1071"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a schema.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1072" name="__codelineno-0-1072"></a>
<a id="__codelineno-0-1073" name="__codelineno-0-1073"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1074" name="__codelineno-0-1074"></a> <span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1075" name="__codelineno-0-1075"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a struct.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1076" name="__codelineno-0-1076"></a>
<a id="__codelineno-0-1077" name="__codelineno-0-1077"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1078" name="__codelineno-0-1078"></a> <span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1079" name="__codelineno-0-1079"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a field.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1080" name="__codelineno-0-1080"></a>
<a id="__codelineno-0-1081" name="__codelineno-0-1081"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1082" name="__codelineno-0-1082"></a> <span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1083" name="__codelineno-0-1083"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a list.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1084" name="__codelineno-0-1084"></a>
<a id="__codelineno-0-1085" name="__codelineno-0-1085"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1086" name="__codelineno-0-1086"></a> <span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1087" name="__codelineno-0-1087"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a map.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1088" name="__codelineno-0-1088"></a>
<a id="__codelineno-0-1089" name="__codelineno-0-1089"></a> <span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1090" name="__codelineno-0-1090"></a> <span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1091" name="__codelineno-0-1091"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a primitive type.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_field" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting a field.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1048">1048</a></span>
<span class="normal"><a href="#__codelineno-0-1049">1049</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1048" name="__codelineno-0-1048"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1049" name="__codelineno-0-1049"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a field.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_list_element" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting an element within a ListType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1054">1054</a></span>
<span class="normal"><a href="#__codelineno-0-1055">1055</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1054" name="__codelineno-0-1054"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1055" name="__codelineno-0-1055"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting an element within a ListType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_key" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting a key within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1060">1060</a></span>
<span class="normal"><a href="#__codelineno-0-1061">1061</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1060" name="__codelineno-0-1060"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1061" name="__codelineno-0-1061"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a key within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="doc doc-heading">
<code class="highlight language-python"><span class="n">after_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.after_map_value" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately after visiting a value within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1066">1066</a></span>
<span class="normal"><a href="#__codelineno-0-1067">1067</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1066" name="__codelineno-0-1066"></a><span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1067" name="__codelineno-0-1067"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately after visiting a value within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_field</span><span class="p">(</span><span class="n">field</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_field" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting a field.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1045">1045</a></span>
<span class="normal"><a href="#__codelineno-0-1046">1046</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1045" name="__codelineno-0-1045"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1046" name="__codelineno-0-1046"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a field.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_list_element</span><span class="p">(</span><span class="n">element</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_list_element" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting an element within a ListType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1051">1051</a></span>
<span class="normal"><a href="#__codelineno-0-1052">1052</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1051" name="__codelineno-0-1051"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1052" name="__codelineno-0-1052"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting an element within a ListType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_map_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_key" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting a key within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1057">1057</a></span>
<span class="normal"><a href="#__codelineno-0-1058">1058</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1057" name="__codelineno-0-1057"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1058" name="__codelineno-0-1058"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a key within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="doc doc-heading">
<code class="highlight language-python"><span class="n">before_map_value</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.before_map_value" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Override this method to perform an action immediately before visiting a value within a MapType.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1063">1063</a></span>
<span class="normal"><a href="#__codelineno-0-1064">1064</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1063" name="__codelineno-0-1063"></a><span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1064" name="__codelineno-0-1064"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Override this method to perform an action immediately before visiting a value within a MapType.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="doc doc-heading">
<code class="highlight language-python"><span class="n">field</span><span class="p">(</span><span class="n">field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.field" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a field.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1077">1077</a></span>
<span class="normal"><a href="#__codelineno-0-1078">1078</a></span>
<span class="normal"><a href="#__codelineno-0-1079">1079</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1077" name="__codelineno-0-1077"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1078" name="__codelineno-0-1078"></a><span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1079" name="__codelineno-0-1079"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a field.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="doc doc-heading">
<code class="highlight language-python"><span class="nb">list</span><span class="p">(</span><span class="n">list_type</span><span class="p">,</span> <span class="n">element_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.list" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a list.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1081">1081</a></span>
<span class="normal"><a href="#__codelineno-0-1082">1082</a></span>
<span class="normal"><a href="#__codelineno-0-1083">1083</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1081" name="__codelineno-0-1081"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1082" name="__codelineno-0-1082"></a><span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1083" name="__codelineno-0-1083"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a list.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="doc doc-heading">
<code class="highlight language-python"><span class="nb">map</span><span class="p">(</span><span class="n">map_type</span><span class="p">,</span> <span class="n">key_result</span><span class="p">,</span> <span class="n">value_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.map" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a map.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1085">1085</a></span>
<span class="normal"><a href="#__codelineno-0-1086">1086</a></span>
<span class="normal"><a href="#__codelineno-0-1087">1087</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1085" name="__codelineno-0-1085"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1086" name="__codelineno-0-1086"></a><span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">T</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1087" name="__codelineno-0-1087"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a map.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="doc doc-heading">
<code class="highlight language-python"><span class="n">primitive</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.primitive" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a primitive type.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1089">1089</a></span>
<span class="normal"><a href="#__codelineno-0-1090">1090</a></span>
<span class="normal"><a href="#__codelineno-0-1091">1091</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1089" name="__codelineno-0-1089"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1090" name="__codelineno-0-1090"></a><span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1091" name="__codelineno-0-1091"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a primitive type.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="doc doc-heading">
<code class="highlight language-python"><span class="n">schema</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.schema" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a schema.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1069">1069</a></span>
<span class="normal"><a href="#__codelineno-0-1070">1070</a></span>
<span class="normal"><a href="#__codelineno-0-1071">1071</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1069" name="__codelineno-0-1069"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1070" name="__codelineno-0-1070"></a><span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1071" name="__codelineno-0-1071"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a schema.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="doc doc-heading">
<code class="highlight language-python"><span class="n">struct</span><span class="p">(</span><span class="n">struct</span><span class="p">,</span> <span class="n">field_results</span><span class="p">)</span></code>
<span class="doc doc-labels">
<small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
</span>
<a href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor.struct" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Visit a struct.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1073">1073</a></span>
<span class="normal"><a href="#__codelineno-0-1074">1074</a></span>
<span class="normal"><a href="#__codelineno-0-1075">1075</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1073" name="__codelineno-0-1073"></a><span class="nd">@abstractmethod</span>
<a id="__codelineno-0-1074" name="__codelineno-0-1074"></a><span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-1075" name="__codelineno-0-1075"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Visit a struct.&quot;&quot;&quot;</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="doc doc-heading">
<code>UnsupportedPyArrowTypeException</code>
<a href="#pyiceberg.io.pyarrow.UnsupportedPyArrowTypeException" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><span title="Exception">Exception</span></code></p>
<p>Cannot convert PyArrow type to corresponding Iceberg type.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-209">209</a></span>
<span class="normal"><a href="#__codelineno-0-210">210</a></span>
<span class="normal"><a href="#__codelineno-0-211">211</a></span>
<span class="normal"><a href="#__codelineno-0-212">212</a></span>
<span class="normal"><a href="#__codelineno-0-213">213</a></span>
<span class="normal"><a href="#__codelineno-0-214">214</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-209" name="__codelineno-0-209"></a><span class="k">class</span><span class="w"> </span><span class="nc">UnsupportedPyArrowTypeException</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
<a id="__codelineno-0-210" name="__codelineno-0-210"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Cannot convert PyArrow type to corresponding Iceberg type.&quot;&quot;&quot;</span>
<a id="__codelineno-0-211" name="__codelineno-0-211"></a>
<a id="__codelineno-0-212" name="__codelineno-0-212"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span>
<a id="__codelineno-0-213" name="__codelineno-0-213"></a> <span class="bp">self</span><span class="o">.</span><span class="n">field</span> <span class="o">=</span> <span class="n">field</span>
<a id="__codelineno-0-214" name="__codelineno-0-214"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow._ConvertToIceberg" class="doc doc-heading">
<code>_ConvertToIceberg</code>
<a href="#pyiceberg.io.pyarrow._ConvertToIceberg" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor">PyArrowSchemaVisitor</a>[<span title="typing.Union">Union</span>[<a class="autorefs autorefs-internal" title="pyiceberg.types.IcebergType" href="../../types/#pyiceberg.types.IcebergType">IcebergType</a>, <a class="autorefs autorefs-internal" title="pyiceberg.schema.Schema" href="../../schema/#pyiceberg.schema.Schema">Schema</a>]]</code></p>
<p>Converts PyArrowSchema to Iceberg Schema. Applies the IDs from name_mapping if provided.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1128">1128</a></span>
<span class="normal"><a href="#__codelineno-0-1129">1129</a></span>
<span class="normal"><a href="#__codelineno-0-1130">1130</a></span>
<span class="normal"><a href="#__codelineno-0-1131">1131</a></span>
<span class="normal"><a href="#__codelineno-0-1132">1132</a></span>
<span class="normal"><a href="#__codelineno-0-1133">1133</a></span>
<span class="normal"><a href="#__codelineno-0-1134">1134</a></span>
<span class="normal"><a href="#__codelineno-0-1135">1135</a></span>
<span class="normal"><a href="#__codelineno-0-1136">1136</a></span>
<span class="normal"><a href="#__codelineno-0-1137">1137</a></span>
<span class="normal"><a href="#__codelineno-0-1138">1138</a></span>
<span class="normal"><a href="#__codelineno-0-1139">1139</a></span>
<span class="normal"><a href="#__codelineno-0-1140">1140</a></span>
<span class="normal"><a href="#__codelineno-0-1141">1141</a></span>
<span class="normal"><a href="#__codelineno-0-1142">1142</a></span>
<span class="normal"><a href="#__codelineno-0-1143">1143</a></span>
<span class="normal"><a href="#__codelineno-0-1144">1144</a></span>
<span class="normal"><a href="#__codelineno-0-1145">1145</a></span>
<span class="normal"><a href="#__codelineno-0-1146">1146</a></span>
<span class="normal"><a href="#__codelineno-0-1147">1147</a></span>
<span class="normal"><a href="#__codelineno-0-1148">1148</a></span>
<span class="normal"><a href="#__codelineno-0-1149">1149</a></span>
<span class="normal"><a href="#__codelineno-0-1150">1150</a></span>
<span class="normal"><a href="#__codelineno-0-1151">1151</a></span>
<span class="normal"><a href="#__codelineno-0-1152">1152</a></span>
<span class="normal"><a href="#__codelineno-0-1153">1153</a></span>
<span class="normal"><a href="#__codelineno-0-1154">1154</a></span>
<span class="normal"><a href="#__codelineno-0-1155">1155</a></span>
<span class="normal"><a href="#__codelineno-0-1156">1156</a></span>
<span class="normal"><a href="#__codelineno-0-1157">1157</a></span>
<span class="normal"><a href="#__codelineno-0-1158">1158</a></span>
<span class="normal"><a href="#__codelineno-0-1159">1159</a></span>
<span class="normal"><a href="#__codelineno-0-1160">1160</a></span>
<span class="normal"><a href="#__codelineno-0-1161">1161</a></span>
<span class="normal"><a href="#__codelineno-0-1162">1162</a></span>
<span class="normal"><a href="#__codelineno-0-1163">1163</a></span>
<span class="normal"><a href="#__codelineno-0-1164">1164</a></span>
<span class="normal"><a href="#__codelineno-0-1165">1165</a></span>
<span class="normal"><a href="#__codelineno-0-1166">1166</a></span>
<span class="normal"><a href="#__codelineno-0-1167">1167</a></span>
<span class="normal"><a href="#__codelineno-0-1168">1168</a></span>
<span class="normal"><a href="#__codelineno-0-1169">1169</a></span>
<span class="normal"><a href="#__codelineno-0-1170">1170</a></span>
<span class="normal"><a href="#__codelineno-0-1171">1171</a></span>
<span class="normal"><a href="#__codelineno-0-1172">1172</a></span>
<span class="normal"><a href="#__codelineno-0-1173">1173</a></span>
<span class="normal"><a href="#__codelineno-0-1174">1174</a></span>
<span class="normal"><a href="#__codelineno-0-1175">1175</a></span>
<span class="normal"><a href="#__codelineno-0-1176">1176</a></span>
<span class="normal"><a href="#__codelineno-0-1177">1177</a></span>
<span class="normal"><a href="#__codelineno-0-1178">1178</a></span>
<span class="normal"><a href="#__codelineno-0-1179">1179</a></span>
<span class="normal"><a href="#__codelineno-0-1180">1180</a></span>
<span class="normal"><a href="#__codelineno-0-1181">1181</a></span>
<span class="normal"><a href="#__codelineno-0-1182">1182</a></span>
<span class="normal"><a href="#__codelineno-0-1183">1183</a></span>
<span class="normal"><a href="#__codelineno-0-1184">1184</a></span>
<span class="normal"><a href="#__codelineno-0-1185">1185</a></span>
<span class="normal"><a href="#__codelineno-0-1186">1186</a></span>
<span class="normal"><a href="#__codelineno-0-1187">1187</a></span>
<span class="normal"><a href="#__codelineno-0-1188">1188</a></span>
<span class="normal"><a href="#__codelineno-0-1189">1189</a></span>
<span class="normal"><a href="#__codelineno-0-1190">1190</a></span>
<span class="normal"><a href="#__codelineno-0-1191">1191</a></span>
<span class="normal"><a href="#__codelineno-0-1192">1192</a></span>
<span class="normal"><a href="#__codelineno-0-1193">1193</a></span>
<span class="normal"><a href="#__codelineno-0-1194">1194</a></span>
<span class="normal"><a href="#__codelineno-0-1195">1195</a></span>
<span class="normal"><a href="#__codelineno-0-1196">1196</a></span>
<span class="normal"><a href="#__codelineno-0-1197">1197</a></span>
<span class="normal"><a href="#__codelineno-0-1198">1198</a></span>
<span class="normal"><a href="#__codelineno-0-1199">1199</a></span>
<span class="normal"><a href="#__codelineno-0-1200">1200</a></span>
<span class="normal"><a href="#__codelineno-0-1201">1201</a></span>
<span class="normal"><a href="#__codelineno-0-1202">1202</a></span>
<span class="normal"><a href="#__codelineno-0-1203">1203</a></span>
<span class="normal"><a href="#__codelineno-0-1204">1204</a></span>
<span class="normal"><a href="#__codelineno-0-1205">1205</a></span>
<span class="normal"><a href="#__codelineno-0-1206">1206</a></span>
<span class="normal"><a href="#__codelineno-0-1207">1207</a></span>
<span class="normal"><a href="#__codelineno-0-1208">1208</a></span>
<span class="normal"><a href="#__codelineno-0-1209">1209</a></span>
<span class="normal"><a href="#__codelineno-0-1210">1210</a></span>
<span class="normal"><a href="#__codelineno-0-1211">1211</a></span>
<span class="normal"><a href="#__codelineno-0-1212">1212</a></span>
<span class="normal"><a href="#__codelineno-0-1213">1213</a></span>
<span class="normal"><a href="#__codelineno-0-1214">1214</a></span>
<span class="normal"><a href="#__codelineno-0-1215">1215</a></span>
<span class="normal"><a href="#__codelineno-0-1216">1216</a></span>
<span class="normal"><a href="#__codelineno-0-1217">1217</a></span>
<span class="normal"><a href="#__codelineno-0-1218">1218</a></span>
<span class="normal"><a href="#__codelineno-0-1219">1219</a></span>
<span class="normal"><a href="#__codelineno-0-1220">1220</a></span>
<span class="normal"><a href="#__codelineno-0-1221">1221</a></span>
<span class="normal"><a href="#__codelineno-0-1222">1222</a></span>
<span class="normal"><a href="#__codelineno-0-1223">1223</a></span>
<span class="normal"><a href="#__codelineno-0-1224">1224</a></span>
<span class="normal"><a href="#__codelineno-0-1225">1225</a></span>
<span class="normal"><a href="#__codelineno-0-1226">1226</a></span>
<span class="normal"><a href="#__codelineno-0-1227">1227</a></span>
<span class="normal"><a href="#__codelineno-0-1228">1228</a></span>
<span class="normal"><a href="#__codelineno-0-1229">1229</a></span>
<span class="normal"><a href="#__codelineno-0-1230">1230</a></span>
<span class="normal"><a href="#__codelineno-0-1231">1231</a></span>
<span class="normal"><a href="#__codelineno-0-1232">1232</a></span>
<span class="normal"><a href="#__codelineno-0-1233">1233</a></span>
<span class="normal"><a href="#__codelineno-0-1234">1234</a></span>
<span class="normal"><a href="#__codelineno-0-1235">1235</a></span>
<span class="normal"><a href="#__codelineno-0-1236">1236</a></span>
<span class="normal"><a href="#__codelineno-0-1237">1237</a></span>
<span class="normal"><a href="#__codelineno-0-1238">1238</a></span>
<span class="normal"><a href="#__codelineno-0-1239">1239</a></span>
<span class="normal"><a href="#__codelineno-0-1240">1240</a></span>
<span class="normal"><a href="#__codelineno-0-1241">1241</a></span>
<span class="normal"><a href="#__codelineno-0-1242">1242</a></span>
<span class="normal"><a href="#__codelineno-0-1243">1243</a></span>
<span class="normal"><a href="#__codelineno-0-1244">1244</a></span>
<span class="normal"><a href="#__codelineno-0-1245">1245</a></span>
<span class="normal"><a href="#__codelineno-0-1246">1246</a></span>
<span class="normal"><a href="#__codelineno-0-1247">1247</a></span>
<span class="normal"><a href="#__codelineno-0-1248">1248</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1128" name="__codelineno-0-1128"></a><span class="k">class</span><span class="w"> </span><span class="nc">_ConvertToIceberg</span><span class="p">(</span><span class="n">PyArrowSchemaVisitor</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">IcebergType</span><span class="p">,</span> <span class="n">Schema</span><span class="p">]]):</span>
<a id="__codelineno-0-1129" name="__codelineno-0-1129"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Converts PyArrowSchema to Iceberg Schema. Applies the IDs from name_mapping if provided.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1130" name="__codelineno-0-1130"></a>
<a id="__codelineno-0-1131" name="__codelineno-0-1131"></a> <span class="n">_field_names</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<a id="__codelineno-0-1132" name="__codelineno-0-1132"></a>
<a id="__codelineno-0-1133" name="__codelineno-0-1133"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1134" name="__codelineno-0-1134"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span> <span class="o">=</span> <span class="p">[]</span>
<a id="__codelineno-0-1135" name="__codelineno-0-1135"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span> <span class="o">=</span> <span class="n">downcast_ns_timestamp_to_us</span>
<a id="__codelineno-0-1136" name="__codelineno-0-1136"></a>
<a id="__codelineno-0-1137" name="__codelineno-0-1137"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_field_id</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<a id="__codelineno-0-1138" name="__codelineno-0-1138"></a> <span class="k">if</span> <span class="p">(</span><span class="n">field_id</span> <span class="o">:=</span> <span class="n">_get_field_id</span><span class="p">(</span><span class="n">field</span><span class="p">))</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1139" name="__codelineno-0-1139"></a> <span class="k">return</span> <span class="n">field_id</span>
<a id="__codelineno-0-1140" name="__codelineno-0-1140"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1141" name="__codelineno-0-1141"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot convert </span><span class="si">{</span><span class="n">field</span><span class="si">}</span><span class="s2"> to Iceberg Field as field_id is empty.&quot;</span><span class="p">)</span>
<a id="__codelineno-0-1142" name="__codelineno-0-1142"></a>
<a id="__codelineno-0-1143" name="__codelineno-0-1143"></a> <span class="k">def</span><span class="w"> </span><span class="nf">schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">struct_result</span><span class="p">:</span> <span class="n">StructType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Schema</span><span class="p">:</span>
<a id="__codelineno-0-1144" name="__codelineno-0-1144"></a> <span class="k">return</span> <span class="n">Schema</span><span class="p">(</span><span class="o">*</span><span class="n">struct_result</span><span class="o">.</span><span class="n">fields</span><span class="p">)</span>
<a id="__codelineno-0-1145" name="__codelineno-0-1145"></a>
<a id="__codelineno-0-1146" name="__codelineno-0-1146"></a> <span class="k">def</span><span class="w"> </span><span class="nf">struct</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">struct</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">StructType</span><span class="p">,</span> <span class="n">field_results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">NestedField</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">StructType</span><span class="p">:</span>
<a id="__codelineno-0-1147" name="__codelineno-0-1147"></a> <span class="k">return</span> <span class="n">StructType</span><span class="p">(</span><span class="o">*</span><span class="n">field_results</span><span class="p">)</span>
<a id="__codelineno-0-1148" name="__codelineno-0-1148"></a>
<a id="__codelineno-0-1149" name="__codelineno-0-1149"></a> <span class="k">def</span><span class="w"> </span><span class="nf">field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">,</span> <span class="n">field_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">NestedField</span><span class="p">:</span>
<a id="__codelineno-0-1150" name="__codelineno-0-1150"></a> <span class="n">field_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">field</span><span class="p">)</span>
<a id="__codelineno-0-1151" name="__codelineno-0-1151"></a> <span class="n">field_doc</span> <span class="o">=</span> <span class="n">doc_str</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> <span class="k">if</span> <span class="p">(</span><span class="n">field</span><span class="o">.</span><span class="n">metadata</span> <span class="ow">and</span> <span class="p">(</span><span class="n">doc_str</span> <span class="o">:=</span> <span class="n">field</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">PYARROW_FIELD_DOC_KEY</span><span class="p">)))</span> <span class="k">else</span> <span class="kc">None</span>
<a id="__codelineno-0-1152" name="__codelineno-0-1152"></a> <span class="n">field_type</span> <span class="o">=</span> <span class="n">field_result</span>
<a id="__codelineno-0-1153" name="__codelineno-0-1153"></a> <span class="k">return</span> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="n">field</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">field_type</span><span class="p">,</span> <span class="n">required</span><span class="o">=</span><span class="ow">not</span> <span class="n">field</span><span class="o">.</span><span class="n">nullable</span><span class="p">,</span> <span class="n">doc</span><span class="o">=</span><span class="n">field_doc</span><span class="p">)</span>
<a id="__codelineno-0-1154" name="__codelineno-0-1154"></a>
<a id="__codelineno-0-1155" name="__codelineno-0-1155"></a> <span class="k">def</span><span class="w"> </span><span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">list_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">ListType</span><span class="p">,</span> <span class="n">element_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ListType</span><span class="p">:</span>
<a id="__codelineno-0-1156" name="__codelineno-0-1156"></a> <span class="n">element_field</span> <span class="o">=</span> <span class="n">list_type</span><span class="o">.</span><span class="n">value_field</span>
<a id="__codelineno-0-1157" name="__codelineno-0-1157"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">LIST_ELEMENT_NAME</span><span class="p">)</span>
<a id="__codelineno-0-1158" name="__codelineno-0-1158"></a> <span class="n">element_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">element_field</span><span class="p">)</span>
<a id="__codelineno-0-1159" name="__codelineno-0-1159"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
<a id="__codelineno-0-1160" name="__codelineno-0-1160"></a> <span class="k">return</span> <span class="n">ListType</span><span class="p">(</span><span class="n">element_id</span><span class="p">,</span> <span class="n">element_result</span><span class="p">,</span> <span class="n">element_required</span><span class="o">=</span><span class="ow">not</span> <span class="n">element_field</span><span class="o">.</span><span class="n">nullable</span><span class="p">)</span>
<a id="__codelineno-0-1161" name="__codelineno-0-1161"></a>
<a id="__codelineno-0-1162" name="__codelineno-0-1162"></a> <span class="k">def</span><span class="w"> </span><span class="nf">map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">map_type</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">MapType</span><span class="p">,</span> <span class="n">key_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">,</span> <span class="n">value_result</span><span class="p">:</span> <span class="n">IcebergType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">MapType</span><span class="p">:</span>
<a id="__codelineno-0-1163" name="__codelineno-0-1163"></a> <span class="n">key_field</span> <span class="o">=</span> <span class="n">map_type</span><span class="o">.</span><span class="n">key_field</span>
<a id="__codelineno-0-1164" name="__codelineno-0-1164"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_KEY_NAME</span><span class="p">)</span>
<a id="__codelineno-0-1165" name="__codelineno-0-1165"></a> <span class="n">key_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">key_field</span><span class="p">)</span>
<a id="__codelineno-0-1166" name="__codelineno-0-1166"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
<a id="__codelineno-0-1167" name="__codelineno-0-1167"></a> <span class="n">value_field</span> <span class="o">=</span> <span class="n">map_type</span><span class="o">.</span><span class="n">item_field</span>
<a id="__codelineno-0-1168" name="__codelineno-0-1168"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_VALUE_NAME</span><span class="p">)</span>
<a id="__codelineno-0-1169" name="__codelineno-0-1169"></a> <span class="n">value_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_field_id</span><span class="p">(</span><span class="n">value_field</span><span class="p">)</span>
<a id="__codelineno-0-1170" name="__codelineno-0-1170"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
<a id="__codelineno-0-1171" name="__codelineno-0-1171"></a> <span class="k">return</span> <span class="n">MapType</span><span class="p">(</span><span class="n">key_id</span><span class="p">,</span> <span class="n">key_result</span><span class="p">,</span> <span class="n">value_id</span><span class="p">,</span> <span class="n">value_result</span><span class="p">,</span> <span class="n">value_required</span><span class="o">=</span><span class="ow">not</span> <span class="n">value_field</span><span class="o">.</span><span class="n">nullable</span><span class="p">)</span>
<a id="__codelineno-0-1172" name="__codelineno-0-1172"></a>
<a id="__codelineno-0-1173" name="__codelineno-0-1173"></a> <span class="k">def</span><span class="w"> </span><span class="nf">primitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">primitive</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PrimitiveType</span><span class="p">:</span>
<a id="__codelineno-0-1174" name="__codelineno-0-1174"></a> <span class="k">if</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_boolean</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1175" name="__codelineno-0-1175"></a> <span class="k">return</span> <span class="n">BooleanType</span><span class="p">()</span>
<a id="__codelineno-0-1176" name="__codelineno-0-1176"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_integer</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1177" name="__codelineno-0-1177"></a> <span class="n">width</span> <span class="o">=</span> <span class="n">primitive</span><span class="o">.</span><span class="n">bit_width</span>
<a id="__codelineno-0-1178" name="__codelineno-0-1178"></a> <span class="k">if</span> <span class="n">width</span> <span class="o">&lt;=</span> <span class="mi">32</span><span class="p">:</span>
<a id="__codelineno-0-1179" name="__codelineno-0-1179"></a> <span class="k">return</span> <span class="n">IntegerType</span><span class="p">()</span>
<a id="__codelineno-0-1180" name="__codelineno-0-1180"></a> <span class="k">elif</span> <span class="n">width</span> <span class="o">&lt;=</span> <span class="mi">64</span><span class="p">:</span>
<a id="__codelineno-0-1181" name="__codelineno-0-1181"></a> <span class="k">return</span> <span class="n">LongType</span><span class="p">()</span>
<a id="__codelineno-0-1182" name="__codelineno-0-1182"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1183" name="__codelineno-0-1183"></a> <span class="c1"># Does not exist (yet)</span>
<a id="__codelineno-0-1184" name="__codelineno-0-1184"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unsupported integer type: </span><span class="si">{</span><span class="n">primitive</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-1185" name="__codelineno-0-1185"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_float32</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1186" name="__codelineno-0-1186"></a> <span class="k">return</span> <span class="n">FloatType</span><span class="p">()</span>
<a id="__codelineno-0-1187" name="__codelineno-0-1187"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_float64</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1188" name="__codelineno-0-1188"></a> <span class="k">return</span> <span class="n">DoubleType</span><span class="p">()</span>
<a id="__codelineno-0-1189" name="__codelineno-0-1189"></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">primitive</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Decimal128Type</span><span class="p">):</span>
<a id="__codelineno-0-1190" name="__codelineno-0-1190"></a> <span class="n">primitive</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">Decimal128Type</span><span class="p">,</span> <span class="n">primitive</span><span class="p">)</span>
<a id="__codelineno-0-1191" name="__codelineno-0-1191"></a> <span class="k">return</span> <span class="n">DecimalType</span><span class="p">(</span><span class="n">primitive</span><span class="o">.</span><span class="n">precision</span><span class="p">,</span> <span class="n">primitive</span><span class="o">.</span><span class="n">scale</span><span class="p">)</span>
<a id="__codelineno-0-1192" name="__codelineno-0-1192"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_string</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span> <span class="ow">or</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_large_string</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1193" name="__codelineno-0-1193"></a> <span class="k">return</span> <span class="n">StringType</span><span class="p">()</span>
<a id="__codelineno-0-1194" name="__codelineno-0-1194"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_date32</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1195" name="__codelineno-0-1195"></a> <span class="k">return</span> <span class="n">DateType</span><span class="p">()</span>
<a id="__codelineno-0-1196" name="__codelineno-0-1196"></a> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">primitive</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Time64Type</span><span class="p">)</span> <span class="ow">and</span> <span class="n">primitive</span><span class="o">.</span><span class="n">unit</span> <span class="o">==</span> <span class="s2">&quot;us&quot;</span><span class="p">:</span>
<a id="__codelineno-0-1197" name="__codelineno-0-1197"></a> <span class="k">return</span> <span class="n">TimeType</span><span class="p">()</span>
<a id="__codelineno-0-1198" name="__codelineno-0-1198"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_timestamp</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1199" name="__codelineno-0-1199"></a> <span class="n">primitive</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">TimestampType</span><span class="p">,</span> <span class="n">primitive</span><span class="p">)</span>
<a id="__codelineno-0-1200" name="__codelineno-0-1200"></a> <span class="k">if</span> <span class="n">primitive</span><span class="o">.</span><span class="n">unit</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;s&quot;</span><span class="p">,</span> <span class="s2">&quot;ms&quot;</span><span class="p">,</span> <span class="s2">&quot;us&quot;</span><span class="p">):</span>
<a id="__codelineno-0-1201" name="__codelineno-0-1201"></a> <span class="c1"># Supported types, will be upcast automatically to &#39;us&#39;</span>
<a id="__codelineno-0-1202" name="__codelineno-0-1202"></a> <span class="k">pass</span>
<a id="__codelineno-0-1203" name="__codelineno-0-1203"></a> <span class="k">elif</span> <span class="n">primitive</span><span class="o">.</span><span class="n">unit</span> <span class="o">==</span> <span class="s2">&quot;ns&quot;</span><span class="p">:</span>
<a id="__codelineno-0-1204" name="__codelineno-0-1204"></a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downcast_ns_timestamp_to_us</span><span class="p">:</span>
<a id="__codelineno-0-1205" name="__codelineno-0-1205"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;Iceberg does not yet support &#39;ns&#39; timestamp precision. Downcasting to &#39;us&#39;.&quot;</span><span class="p">)</span>
<a id="__codelineno-0-1206" name="__codelineno-0-1206"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1207" name="__codelineno-0-1207"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<a id="__codelineno-0-1208" name="__codelineno-0-1208"></a> <span class="s2">&quot;Iceberg does not yet support &#39;ns&#39; timestamp precision. Use &#39;downcast-ns-timestamp-to-us-on-write&#39; configuration property to automatically downcast &#39;ns&#39; to &#39;us&#39; on write.&quot;</span><span class="p">,</span>
<a id="__codelineno-0-1209" name="__codelineno-0-1209"></a> <span class="p">)</span>
<a id="__codelineno-0-1210" name="__codelineno-0-1210"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1211" name="__codelineno-0-1211"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unsupported precision for timestamp type: </span><span class="si">{</span><span class="n">primitive</span><span class="o">.</span><span class="n">unit</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-1212" name="__codelineno-0-1212"></a>
<a id="__codelineno-0-1213" name="__codelineno-0-1213"></a> <span class="k">if</span> <span class="n">primitive</span><span class="o">.</span><span class="n">tz</span> <span class="ow">in</span> <span class="n">UTC_ALIASES</span><span class="p">:</span>
<a id="__codelineno-0-1214" name="__codelineno-0-1214"></a> <span class="k">return</span> <span class="n">TimestamptzType</span><span class="p">()</span>
<a id="__codelineno-0-1215" name="__codelineno-0-1215"></a> <span class="k">elif</span> <span class="n">primitive</span><span class="o">.</span><span class="n">tz</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1216" name="__codelineno-0-1216"></a> <span class="k">return</span> <span class="n">TimestampType</span><span class="p">()</span>
<a id="__codelineno-0-1217" name="__codelineno-0-1217"></a>
<a id="__codelineno-0-1218" name="__codelineno-0-1218"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_binary</span><span class="p">(</span><span class="n">primitive</span><span class="p">)</span> <span class="ow">or</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_large_binary</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1219" name="__codelineno-0-1219"></a> <span class="k">return</span> <span class="n">BinaryType</span><span class="p">()</span>
<a id="__codelineno-0-1220" name="__codelineno-0-1220"></a> <span class="k">elif</span> <span class="n">pa</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_fixed_size_binary</span><span class="p">(</span><span class="n">primitive</span><span class="p">):</span>
<a id="__codelineno-0-1221" name="__codelineno-0-1221"></a> <span class="n">primitive</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">FixedSizeBinaryType</span><span class="p">,</span> <span class="n">primitive</span><span class="p">)</span>
<a id="__codelineno-0-1222" name="__codelineno-0-1222"></a> <span class="k">return</span> <span class="n">FixedType</span><span class="p">(</span><span class="n">primitive</span><span class="o">.</span><span class="n">byte_width</span><span class="p">)</span>
<a id="__codelineno-0-1223" name="__codelineno-0-1223"></a>
<a id="__codelineno-0-1224" name="__codelineno-0-1224"></a> <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unsupported type: </span><span class="si">{</span><span class="n">primitive</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-0-1225" name="__codelineno-0-1225"></a>
<a id="__codelineno-0-1226" name="__codelineno-0-1226"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1227" name="__codelineno-0-1227"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">field</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<a id="__codelineno-0-1228" name="__codelineno-0-1228"></a>
<a id="__codelineno-0-1229" name="__codelineno-0-1229"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1230" name="__codelineno-0-1230"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
<a id="__codelineno-0-1231" name="__codelineno-0-1231"></a>
<a id="__codelineno-0-1232" name="__codelineno-0-1232"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1233" name="__codelineno-0-1233"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">LIST_ELEMENT_NAME</span><span class="p">)</span>
<a id="__codelineno-0-1234" name="__codelineno-0-1234"></a>
<a id="__codelineno-0-1235" name="__codelineno-0-1235"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_list_element</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1236" name="__codelineno-0-1236"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
<a id="__codelineno-0-1237" name="__codelineno-0-1237"></a>
<a id="__codelineno-0-1238" name="__codelineno-0-1238"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1239" name="__codelineno-0-1239"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_KEY_NAME</span><span class="p">)</span>
<a id="__codelineno-0-1240" name="__codelineno-0-1240"></a>
<a id="__codelineno-0-1241" name="__codelineno-0-1241"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1242" name="__codelineno-0-1242"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
<a id="__codelineno-0-1243" name="__codelineno-0-1243"></a>
<a id="__codelineno-0-1244" name="__codelineno-0-1244"></a> <span class="k">def</span><span class="w"> </span><span class="nf">before_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1245" name="__codelineno-0-1245"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">MAP_VALUE_NAME</span><span class="p">)</span>
<a id="__codelineno-0-1246" name="__codelineno-0-1246"></a>
<a id="__codelineno-0-1247" name="__codelineno-0-1247"></a> <span class="k">def</span><span class="w"> </span><span class="nf">after_map_value</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">element</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1248" name="__codelineno-0-1248"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_field_names</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="doc doc-heading">
<code>_ConvertToIcebergWithoutIDs</code>
<a href="#pyiceberg.io.pyarrow._ConvertToIcebergWithoutIDs" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow._ConvertToIceberg" href="#pyiceberg.io.pyarrow._ConvertToIceberg">_ConvertToIceberg</a></code></p>
<p>Converts PyArrowSchema to Iceberg Schema with all -1 ids.</p>
<p>The schema generated through this visitor should always be
used in conjunction with <code>new_table_metadata</code> function to
assign new field ids in order. This is currently used only
when creating an Iceberg Schema from a PyArrow schema when
creating a new Iceberg table.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1299">1299</a></span>
<span class="normal"><a href="#__codelineno-0-1300">1300</a></span>
<span class="normal"><a href="#__codelineno-0-1301">1301</a></span>
<span class="normal"><a href="#__codelineno-0-1302">1302</a></span>
<span class="normal"><a href="#__codelineno-0-1303">1303</a></span>
<span class="normal"><a href="#__codelineno-0-1304">1304</a></span>
<span class="normal"><a href="#__codelineno-0-1305">1305</a></span>
<span class="normal"><a href="#__codelineno-0-1306">1306</a></span>
<span class="normal"><a href="#__codelineno-0-1307">1307</a></span>
<span class="normal"><a href="#__codelineno-0-1308">1308</a></span>
<span class="normal"><a href="#__codelineno-0-1309">1309</a></span>
<span class="normal"><a href="#__codelineno-0-1310">1310</a></span>
<span class="normal"><a href="#__codelineno-0-1311">1311</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1299" name="__codelineno-0-1299"></a><span class="k">class</span><span class="w"> </span><span class="nc">_ConvertToIcebergWithoutIDs</span><span class="p">(</span><span class="n">_ConvertToIceberg</span><span class="p">):</span>
<a id="__codelineno-0-1300" name="__codelineno-0-1300"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-1301" name="__codelineno-0-1301"></a><span class="sd"> Converts PyArrowSchema to Iceberg Schema with all -1 ids.</span>
<a id="__codelineno-0-1302" name="__codelineno-0-1302"></a>
<a id="__codelineno-0-1303" name="__codelineno-0-1303"></a><span class="sd"> The schema generated through this visitor should always be</span>
<a id="__codelineno-0-1304" name="__codelineno-0-1304"></a><span class="sd"> used in conjunction with `new_table_metadata` function to</span>
<a id="__codelineno-0-1305" name="__codelineno-0-1305"></a><span class="sd"> assign new field ids in order. This is currently used only</span>
<a id="__codelineno-0-1306" name="__codelineno-0-1306"></a><span class="sd"> when creating an Iceberg Schema from a PyArrow schema when</span>
<a id="__codelineno-0-1307" name="__codelineno-0-1307"></a><span class="sd"> creating a new Iceberg table.</span>
<a id="__codelineno-0-1308" name="__codelineno-0-1308"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-1309" name="__codelineno-0-1309"></a>
<a id="__codelineno-0-1310" name="__codelineno-0-1310"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_field_id</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Field</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<a id="__codelineno-0-1311" name="__codelineno-0-1311"></a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
</div>
</div>
</div>
<div class="doc doc-object doc-class">
<h2 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="doc doc-heading">
<code>_NullNaNUnmentionedTermsCollector</code>
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p class="doc doc-class-bases">
Bases: <code><a class="autorefs autorefs-internal" title="pyiceberg.expressions.visitors.BoundBooleanExpressionVisitor" href="../../expressions/visitors/#pyiceberg.expressions.visitors.BoundBooleanExpressionVisitor">BoundBooleanExpressionVisitor</a>[None]</code></p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-746">746</a></span>
<span class="normal"><a href="#__codelineno-0-747">747</a></span>
<span class="normal"><a href="#__codelineno-0-748">748</a></span>
<span class="normal"><a href="#__codelineno-0-749">749</a></span>
<span class="normal"><a href="#__codelineno-0-750">750</a></span>
<span class="normal"><a href="#__codelineno-0-751">751</a></span>
<span class="normal"><a href="#__codelineno-0-752">752</a></span>
<span class="normal"><a href="#__codelineno-0-753">753</a></span>
<span class="normal"><a href="#__codelineno-0-754">754</a></span>
<span class="normal"><a href="#__codelineno-0-755">755</a></span>
<span class="normal"><a href="#__codelineno-0-756">756</a></span>
<span class="normal"><a href="#__codelineno-0-757">757</a></span>
<span class="normal"><a href="#__codelineno-0-758">758</a></span>
<span class="normal"><a href="#__codelineno-0-759">759</a></span>
<span class="normal"><a href="#__codelineno-0-760">760</a></span>
<span class="normal"><a href="#__codelineno-0-761">761</a></span>
<span class="normal"><a href="#__codelineno-0-762">762</a></span>
<span class="normal"><a href="#__codelineno-0-763">763</a></span>
<span class="normal"><a href="#__codelineno-0-764">764</a></span>
<span class="normal"><a href="#__codelineno-0-765">765</a></span>
<span class="normal"><a href="#__codelineno-0-766">766</a></span>
<span class="normal"><a href="#__codelineno-0-767">767</a></span>
<span class="normal"><a href="#__codelineno-0-768">768</a></span>
<span class="normal"><a href="#__codelineno-0-769">769</a></span>
<span class="normal"><a href="#__codelineno-0-770">770</a></span>
<span class="normal"><a href="#__codelineno-0-771">771</a></span>
<span class="normal"><a href="#__codelineno-0-772">772</a></span>
<span class="normal"><a href="#__codelineno-0-773">773</a></span>
<span class="normal"><a href="#__codelineno-0-774">774</a></span>
<span class="normal"><a href="#__codelineno-0-775">775</a></span>
<span class="normal"><a href="#__codelineno-0-776">776</a></span>
<span class="normal"><a href="#__codelineno-0-777">777</a></span>
<span class="normal"><a href="#__codelineno-0-778">778</a></span>
<span class="normal"><a href="#__codelineno-0-779">779</a></span>
<span class="normal"><a href="#__codelineno-0-780">780</a></span>
<span class="normal"><a href="#__codelineno-0-781">781</a></span>
<span class="normal"><a href="#__codelineno-0-782">782</a></span>
<span class="normal"><a href="#__codelineno-0-783">783</a></span>
<span class="normal"><a href="#__codelineno-0-784">784</a></span>
<span class="normal"><a href="#__codelineno-0-785">785</a></span>
<span class="normal"><a href="#__codelineno-0-786">786</a></span>
<span class="normal"><a href="#__codelineno-0-787">787</a></span>
<span class="normal"><a href="#__codelineno-0-788">788</a></span>
<span class="normal"><a href="#__codelineno-0-789">789</a></span>
<span class="normal"><a href="#__codelineno-0-790">790</a></span>
<span class="normal"><a href="#__codelineno-0-791">791</a></span>
<span class="normal"><a href="#__codelineno-0-792">792</a></span>
<span class="normal"><a href="#__codelineno-0-793">793</a></span>
<span class="normal"><a href="#__codelineno-0-794">794</a></span>
<span class="normal"><a href="#__codelineno-0-795">795</a></span>
<span class="normal"><a href="#__codelineno-0-796">796</a></span>
<span class="normal"><a href="#__codelineno-0-797">797</a></span>
<span class="normal"><a href="#__codelineno-0-798">798</a></span>
<span class="normal"><a href="#__codelineno-0-799">799</a></span>
<span class="normal"><a href="#__codelineno-0-800">800</a></span>
<span class="normal"><a href="#__codelineno-0-801">801</a></span>
<span class="normal"><a href="#__codelineno-0-802">802</a></span>
<span class="normal"><a href="#__codelineno-0-803">803</a></span>
<span class="normal"><a href="#__codelineno-0-804">804</a></span>
<span class="normal"><a href="#__codelineno-0-805">805</a></span>
<span class="normal"><a href="#__codelineno-0-806">806</a></span>
<span class="normal"><a href="#__codelineno-0-807">807</a></span>
<span class="normal"><a href="#__codelineno-0-808">808</a></span>
<span class="normal"><a href="#__codelineno-0-809">809</a></span>
<span class="normal"><a href="#__codelineno-0-810">810</a></span>
<span class="normal"><a href="#__codelineno-0-811">811</a></span>
<span class="normal"><a href="#__codelineno-0-812">812</a></span>
<span class="normal"><a href="#__codelineno-0-813">813</a></span>
<span class="normal"><a href="#__codelineno-0-814">814</a></span>
<span class="normal"><a href="#__codelineno-0-815">815</a></span>
<span class="normal"><a href="#__codelineno-0-816">816</a></span>
<span class="normal"><a href="#__codelineno-0-817">817</a></span>
<span class="normal"><a href="#__codelineno-0-818">818</a></span>
<span class="normal"><a href="#__codelineno-0-819">819</a></span>
<span class="normal"><a href="#__codelineno-0-820">820</a></span>
<span class="normal"><a href="#__codelineno-0-821">821</a></span>
<span class="normal"><a href="#__codelineno-0-822">822</a></span>
<span class="normal"><a href="#__codelineno-0-823">823</a></span>
<span class="normal"><a href="#__codelineno-0-824">824</a></span>
<span class="normal"><a href="#__codelineno-0-825">825</a></span>
<span class="normal"><a href="#__codelineno-0-826">826</a></span>
<span class="normal"><a href="#__codelineno-0-827">827</a></span>
<span class="normal"><a href="#__codelineno-0-828">828</a></span>
<span class="normal"><a href="#__codelineno-0-829">829</a></span>
<span class="normal"><a href="#__codelineno-0-830">830</a></span>
<span class="normal"><a href="#__codelineno-0-831">831</a></span>
<span class="normal"><a href="#__codelineno-0-832">832</a></span>
<span class="normal"><a href="#__codelineno-0-833">833</a></span>
<span class="normal"><a href="#__codelineno-0-834">834</a></span>
<span class="normal"><a href="#__codelineno-0-835">835</a></span>
<span class="normal"><a href="#__codelineno-0-836">836</a></span>
<span class="normal"><a href="#__codelineno-0-837">837</a></span>
<span class="normal"><a href="#__codelineno-0-838">838</a></span>
<span class="normal"><a href="#__codelineno-0-839">839</a></span>
<span class="normal"><a href="#__codelineno-0-840">840</a></span>
<span class="normal"><a href="#__codelineno-0-841">841</a></span>
<span class="normal"><a href="#__codelineno-0-842">842</a></span>
<span class="normal"><a href="#__codelineno-0-843">843</a></span>
<span class="normal"><a href="#__codelineno-0-844">844</a></span>
<span class="normal"><a href="#__codelineno-0-845">845</a></span>
<span class="normal"><a href="#__codelineno-0-846">846</a></span>
<span class="normal"><a href="#__codelineno-0-847">847</a></span>
<span class="normal"><a href="#__codelineno-0-848">848</a></span>
<span class="normal"><a href="#__codelineno-0-849">849</a></span>
<span class="normal"><a href="#__codelineno-0-850">850</a></span>
<span class="normal"><a href="#__codelineno-0-851">851</a></span>
<span class="normal"><a href="#__codelineno-0-852">852</a></span>
<span class="normal"><a href="#__codelineno-0-853">853</a></span>
<span class="normal"><a href="#__codelineno-0-854">854</a></span>
<span class="normal"><a href="#__codelineno-0-855">855</a></span>
<span class="normal"><a href="#__codelineno-0-856">856</a></span>
<span class="normal"><a href="#__codelineno-0-857">857</a></span>
<span class="normal"><a href="#__codelineno-0-858">858</a></span>
<span class="normal"><a href="#__codelineno-0-859">859</a></span>
<span class="normal"><a href="#__codelineno-0-860">860</a></span>
<span class="normal"><a href="#__codelineno-0-861">861</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-746" name="__codelineno-0-746"></a><span class="k">class</span><span class="w"> </span><span class="nc">_NullNaNUnmentionedTermsCollector</span><span class="p">(</span><span class="n">BoundBooleanExpressionVisitor</span><span class="p">[</span><span class="kc">None</span><span class="p">]):</span>
<a id="__codelineno-0-747" name="__codelineno-0-747"></a> <span class="c1"># BoundTerms which have either is_null or is_not_null appearing at least once in the boolean expr.</span>
<a id="__codelineno-0-748" name="__codelineno-0-748"></a> <span class="n">is_null_or_not_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span>
<a id="__codelineno-0-749" name="__codelineno-0-749"></a> <span class="c1"># The remaining BoundTerms appearing in the boolean expr.</span>
<a id="__codelineno-0-750" name="__codelineno-0-750"></a> <span class="n">null_unmentioned_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span>
<a id="__codelineno-0-751" name="__codelineno-0-751"></a> <span class="c1"># BoundTerms which have either is_nan or is_not_nan appearing at least once in the boolean expr.</span>
<a id="__codelineno-0-752" name="__codelineno-0-752"></a> <span class="n">is_nan_or_not_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span>
<a id="__codelineno-0-753" name="__codelineno-0-753"></a> <span class="c1"># The remaining BoundTerms appearing in the boolean expr.</span>
<a id="__codelineno-0-754" name="__codelineno-0-754"></a> <span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span>
<a id="__codelineno-0-755" name="__codelineno-0-755"></a>
<a id="__codelineno-0-756" name="__codelineno-0-756"></a> <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-757" name="__codelineno-0-757"></a> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<a id="__codelineno-0-758" name="__codelineno-0-758"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<a id="__codelineno-0-759" name="__codelineno-0-759"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<a id="__codelineno-0-760" name="__codelineno-0-760"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<a id="__codelineno-0-761" name="__codelineno-0-761"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<a id="__codelineno-0-762" name="__codelineno-0-762"></a>
<a id="__codelineno-0-763" name="__codelineno-0-763"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-764" name="__codelineno-0-764"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where either is_null or is_not_null is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-765" name="__codelineno-0-765"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-766" name="__codelineno-0-766"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-767" name="__codelineno-0-767"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-768" name="__codelineno-0-768"></a>
<a id="__codelineno-0-769" name="__codelineno-0-769"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_null_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-770" name="__codelineno-0-770"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where neither is_null or is_not_null is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-771" name="__codelineno-0-771"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-772" name="__codelineno-0-772"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-773" name="__codelineno-0-773"></a>
<a id="__codelineno-0-774" name="__codelineno-0-774"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-775" name="__codelineno-0-775"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where either is_nan or is_not_nan is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-776" name="__codelineno-0-776"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-777" name="__codelineno-0-777"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-778" name="__codelineno-0-778"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-779" name="__codelineno-0-779"></a>
<a id="__codelineno-0-780" name="__codelineno-0-780"></a> <span class="k">def</span><span class="w"> </span><span class="nf">_handle_nan_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-781" name="__codelineno-0-781"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where neither is_nan or is_not_nan is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-782" name="__codelineno-0-782"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-783" name="__codelineno-0-783"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-784" name="__codelineno-0-784"></a>
<a id="__codelineno-0-785" name="__codelineno-0-785"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_in</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literals</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-786" name="__codelineno-0-786"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-787" name="__codelineno-0-787"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-788" name="__codelineno-0-788"></a>
<a id="__codelineno-0-789" name="__codelineno-0-789"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_in</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literals</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-790" name="__codelineno-0-790"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-791" name="__codelineno-0-791"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-792" name="__codelineno-0-792"></a>
<a id="__codelineno-0-793" name="__codelineno-0-793"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_is_nan</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-794" name="__codelineno-0-794"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-795" name="__codelineno-0-795"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-796" name="__codelineno-0-796"></a>
<a id="__codelineno-0-797" name="__codelineno-0-797"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_nan</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-798" name="__codelineno-0-798"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-799" name="__codelineno-0-799"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-800" name="__codelineno-0-800"></a>
<a id="__codelineno-0-801" name="__codelineno-0-801"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_is_null</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-802" name="__codelineno-0-802"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-803" name="__codelineno-0-803"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-804" name="__codelineno-0-804"></a>
<a id="__codelineno-0-805" name="__codelineno-0-805"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_null</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-806" name="__codelineno-0-806"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-807" name="__codelineno-0-807"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-808" name="__codelineno-0-808"></a>
<a id="__codelineno-0-809" name="__codelineno-0-809"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-810" name="__codelineno-0-810"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-811" name="__codelineno-0-811"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-812" name="__codelineno-0-812"></a>
<a id="__codelineno-0-813" name="__codelineno-0-813"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-814" name="__codelineno-0-814"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-815" name="__codelineno-0-815"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-816" name="__codelineno-0-816"></a>
<a id="__codelineno-0-817" name="__codelineno-0-817"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_greater_than_or_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-818" name="__codelineno-0-818"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-819" name="__codelineno-0-819"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-820" name="__codelineno-0-820"></a>
<a id="__codelineno-0-821" name="__codelineno-0-821"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_greater_than</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-822" name="__codelineno-0-822"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-823" name="__codelineno-0-823"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-824" name="__codelineno-0-824"></a>
<a id="__codelineno-0-825" name="__codelineno-0-825"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_less_than</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-826" name="__codelineno-0-826"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-827" name="__codelineno-0-827"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-828" name="__codelineno-0-828"></a>
<a id="__codelineno-0-829" name="__codelineno-0-829"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_less_than_or_equal</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-830" name="__codelineno-0-830"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-831" name="__codelineno-0-831"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-832" name="__codelineno-0-832"></a>
<a id="__codelineno-0-833" name="__codelineno-0-833"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_starts_with</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-834" name="__codelineno-0-834"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-835" name="__codelineno-0-835"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-836" name="__codelineno-0-836"></a>
<a id="__codelineno-0-837" name="__codelineno-0-837"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not_starts_with</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span> <span class="n">literal</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-838" name="__codelineno-0-838"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-839" name="__codelineno-0-839"></a> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-840" name="__codelineno-0-840"></a>
<a id="__codelineno-0-841" name="__codelineno-0-841"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_true</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-842" name="__codelineno-0-842"></a> <span class="k">return</span>
<a id="__codelineno-0-843" name="__codelineno-0-843"></a>
<a id="__codelineno-0-844" name="__codelineno-0-844"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_false</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-845" name="__codelineno-0-845"></a> <span class="k">return</span>
<a id="__codelineno-0-846" name="__codelineno-0-846"></a>
<a id="__codelineno-0-847" name="__codelineno-0-847"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">child_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-848" name="__codelineno-0-848"></a> <span class="k">return</span>
<a id="__codelineno-0-849" name="__codelineno-0-849"></a>
<a id="__codelineno-0-850" name="__codelineno-0-850"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_and</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">left_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="n">right_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-851" name="__codelineno-0-851"></a> <span class="k">return</span>
<a id="__codelineno-0-852" name="__codelineno-0-852"></a>
<a id="__codelineno-0-853" name="__codelineno-0-853"></a> <span class="k">def</span><span class="w"> </span><span class="nf">visit_or</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">left_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="n">right_result</span><span class="p">:</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-854" name="__codelineno-0-854"></a> <span class="k">return</span>
<a id="__codelineno-0-855" name="__codelineno-0-855"></a>
<a id="__codelineno-0-856" name="__codelineno-0-856"></a> <span class="k">def</span><span class="w"> </span><span class="nf">collect</span><span class="p">(</span>
<a id="__codelineno-0-857" name="__codelineno-0-857"></a> <span class="bp">self</span><span class="p">,</span>
<a id="__codelineno-0-858" name="__codelineno-0-858"></a> <span class="n">expr</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span>
<a id="__codelineno-0-859" name="__codelineno-0-859"></a> <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-860" name="__codelineno-0-860"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Collect the bound references categorized by having at least one is_null or is_not_null in the expr and the remaining.&quot;&quot;&quot;</span>
<a id="__codelineno-0-861" name="__codelineno-0-861"></a> <span class="n">boolean_expression_visit</span><span class="p">(</span><span class="n">expr</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
<div class="doc doc-children">
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_not" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Handle the predicate case where either is_nan or is_not_nan is included.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-774">774</a></span>
<span class="normal"><a href="#__codelineno-0-775">775</a></span>
<span class="normal"><a href="#__codelineno-0-776">776</a></span>
<span class="normal"><a href="#__codelineno-0-777">777</a></span>
<span class="normal"><a href="#__codelineno-0-778">778</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-774" name="__codelineno-0-774"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_nan_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-775" name="__codelineno-0-775"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where either is_nan or is_not_nan is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-776" name="__codelineno-0-776"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-777" name="__codelineno-0-777"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-778" name="__codelineno-0-778"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_not" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Handle the predicate case where either is_null or is_not_null is included.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-763">763</a></span>
<span class="normal"><a href="#__codelineno-0-764">764</a></span>
<span class="normal"><a href="#__codelineno-0-765">765</a></span>
<span class="normal"><a href="#__codelineno-0-766">766</a></span>
<span class="normal"><a href="#__codelineno-0-767">767</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-763" name="__codelineno-0-763"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_explicit_is_null_or_not</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-764" name="__codelineno-0-764"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where either is_null or is_not_null is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-765" name="__codelineno-0-765"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-766" name="__codelineno-0-766"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
<a id="__codelineno-0-767" name="__codelineno-0-767"></a> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_handle_nan_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_nan_unmentioned" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Handle the predicate case where neither is_nan or is_not_nan is included.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-780">780</a></span>
<span class="normal"><a href="#__codelineno-0-781">781</a></span>
<span class="normal"><a href="#__codelineno-0-782">782</a></span>
<span class="normal"><a href="#__codelineno-0-783">783</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-780" name="__codelineno-0-780"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_nan_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-781" name="__codelineno-0-781"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where neither is_nan or is_not_nan is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-782" name="__codelineno-0-782"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_nan_or_not_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-783" name="__codelineno-0-783"></a> <span class="bp">self</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_handle_null_unmentioned</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector._handle_null_unmentioned" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Handle the predicate case where neither is_null or is_not_null is included.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-769">769</a></span>
<span class="normal"><a href="#__codelineno-0-770">770</a></span>
<span class="normal"><a href="#__codelineno-0-771">771</a></span>
<span class="normal"><a href="#__codelineno-0-772">772</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-769" name="__codelineno-0-769"></a><span class="k">def</span><span class="w"> </span><span class="nf">_handle_null_unmentioned</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-770" name="__codelineno-0-770"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Handle the predicate case where neither is_null or is_not_null is included.&quot;&quot;&quot;</span>
<a id="__codelineno-0-771" name="__codelineno-0-771"></a> <span class="k">if</span> <span class="n">term</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_null_or_not_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-772" name="__codelineno-0-772"></a> <span class="bp">self</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">term</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h3 id="pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="doc doc-heading">
<code class="highlight language-python"><span class="n">collect</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._NullNaNUnmentionedTermsCollector.collect" class="headerlink" title="Permanent link">&para;</a></h3>
<div class="doc doc-contents ">
<p>Collect the bound references categorized by having at least one is_null or is_not_null in the expr and the remaining.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-856">856</a></span>
<span class="normal"><a href="#__codelineno-0-857">857</a></span>
<span class="normal"><a href="#__codelineno-0-858">858</a></span>
<span class="normal"><a href="#__codelineno-0-859">859</a></span>
<span class="normal"><a href="#__codelineno-0-860">860</a></span>
<span class="normal"><a href="#__codelineno-0-861">861</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-856" name="__codelineno-0-856"></a><span class="k">def</span><span class="w"> </span><span class="nf">collect</span><span class="p">(</span>
<a id="__codelineno-0-857" name="__codelineno-0-857"></a> <span class="bp">self</span><span class="p">,</span>
<a id="__codelineno-0-858" name="__codelineno-0-858"></a> <span class="n">expr</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">,</span>
<a id="__codelineno-0-859" name="__codelineno-0-859"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-860" name="__codelineno-0-860"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Collect the bound references categorized by having at least one is_null or is_not_null in the expr and the remaining.&quot;&quot;&quot;</span>
<a id="__codelineno-0-861" name="__codelineno-0-861"></a> <span class="n">boolean_expression_visit</span><span class="p">(</span><span class="n">expr</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_check_pyarrow_schema_compatible</span><span class="p">(</span><span class="n">requested_schema</span><span class="p">,</span> <span class="n">provided_schema</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._check_pyarrow_schema_compatible" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Check if the <code>requested_schema</code> is compatible with <code>provided_schema</code>.</p>
<p>Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type.</p>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="ValueError">ValueError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If the schemas are not compatible.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2442">2442</a></span>
<span class="normal"><a href="#__codelineno-0-2443">2443</a></span>
<span class="normal"><a href="#__codelineno-0-2444">2444</a></span>
<span class="normal"><a href="#__codelineno-0-2445">2445</a></span>
<span class="normal"><a href="#__codelineno-0-2446">2446</a></span>
<span class="normal"><a href="#__codelineno-0-2447">2447</a></span>
<span class="normal"><a href="#__codelineno-0-2448">2448</a></span>
<span class="normal"><a href="#__codelineno-0-2449">2449</a></span>
<span class="normal"><a href="#__codelineno-0-2450">2450</a></span>
<span class="normal"><a href="#__codelineno-0-2451">2451</a></span>
<span class="normal"><a href="#__codelineno-0-2452">2452</a></span>
<span class="normal"><a href="#__codelineno-0-2453">2453</a></span>
<span class="normal"><a href="#__codelineno-0-2454">2454</a></span>
<span class="normal"><a href="#__codelineno-0-2455">2455</a></span>
<span class="normal"><a href="#__codelineno-0-2456">2456</a></span>
<span class="normal"><a href="#__codelineno-0-2457">2457</a></span>
<span class="normal"><a href="#__codelineno-0-2458">2458</a></span>
<span class="normal"><a href="#__codelineno-0-2459">2459</a></span>
<span class="normal"><a href="#__codelineno-0-2460">2460</a></span>
<span class="normal"><a href="#__codelineno-0-2461">2461</a></span>
<span class="normal"><a href="#__codelineno-0-2462">2462</a></span>
<span class="normal"><a href="#__codelineno-0-2463">2463</a></span>
<span class="normal"><a href="#__codelineno-0-2464">2464</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2442" name="__codelineno-0-2442"></a><span class="k">def</span><span class="w"> </span><span class="nf">_check_pyarrow_schema_compatible</span><span class="p">(</span>
<a id="__codelineno-0-2443" name="__codelineno-0-2443"></a> <span class="n">requested_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> <span class="n">provided_schema</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<a id="__codelineno-0-2444" name="__codelineno-0-2444"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-2445" name="__codelineno-0-2445"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-2446" name="__codelineno-0-2446"></a><span class="sd"> Check if the `requested_schema` is compatible with `provided_schema`.</span>
<a id="__codelineno-0-2447" name="__codelineno-0-2447"></a>
<a id="__codelineno-0-2448" name="__codelineno-0-2448"></a><span class="sd"> Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type.</span>
<a id="__codelineno-0-2449" name="__codelineno-0-2449"></a>
<a id="__codelineno-0-2450" name="__codelineno-0-2450"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-2451" name="__codelineno-0-2451"></a><span class="sd"> ValueError: If the schemas are not compatible.</span>
<a id="__codelineno-0-2452" name="__codelineno-0-2452"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2453" name="__codelineno-0-2453"></a> <span class="n">name_mapping</span> <span class="o">=</span> <span class="n">requested_schema</span><span class="o">.</span><span class="n">name_mapping</span>
<a id="__codelineno-0-2454" name="__codelineno-0-2454"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-2455" name="__codelineno-0-2455"></a> <span class="n">provided_schema</span> <span class="o">=</span> <span class="n">pyarrow_to_schema</span><span class="p">(</span>
<a id="__codelineno-0-2456" name="__codelineno-0-2456"></a> <span class="n">provided_schema</span><span class="p">,</span> <span class="n">name_mapping</span><span class="o">=</span><span class="n">name_mapping</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="n">downcast_ns_timestamp_to_us</span>
<a id="__codelineno-0-2457" name="__codelineno-0-2457"></a> <span class="p">)</span>
<a id="__codelineno-0-2458" name="__codelineno-0-2458"></a> <span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-2459" name="__codelineno-0-2459"></a> <span class="n">provided_schema</span> <span class="o">=</span> <span class="n">_pyarrow_to_schema_without_ids</span><span class="p">(</span><span class="n">provided_schema</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="n">downcast_ns_timestamp_to_us</span><span class="p">)</span>
<a id="__codelineno-0-2460" name="__codelineno-0-2460"></a> <span class="n">additional_names</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">provided_schema</span><span class="o">.</span><span class="n">_name_to_id</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">-</span> <span class="nb">set</span><span class="p">(</span><span class="n">requested_schema</span><span class="o">.</span><span class="n">_name_to_id</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<a id="__codelineno-0-2461" name="__codelineno-0-2461"></a> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<a id="__codelineno-0-2462" name="__codelineno-0-2462"></a> <span class="sa">f</span><span class="s2">&quot;PyArrow table contains more columns: </span><span class="si">{</span><span class="s1">&#39;, &#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">additional_names</span><span class="p">))</span><span class="si">}</span><span class="s2">. Update the schema first (hint, use union_by_name).&quot;</span>
<a id="__codelineno-0-2463" name="__codelineno-0-2463"></a> <span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
<a id="__codelineno-0-2464" name="__codelineno-0-2464"></a> <span class="n">_check_schema_compatible</span><span class="p">(</span><span class="n">requested_schema</span><span class="p">,</span> <span class="n">provided_schema</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow._dataframe_to_data_files" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_dataframe_to_data_files</span><span class="p">(</span><span class="n">table_metadata</span><span class="p">,</span> <span class="n">df</span><span class="p">,</span> <span class="n">io</span><span class="p">,</span> <span class="n">write_uuid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">counter</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._dataframe_to_data_files" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Convert a PyArrow table into a DataFile.</p>
<p><span class="doc-section-title">Returns:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="typing.Iterable">Iterable</span>[<a class="autorefs autorefs-internal" title="pyiceberg.manifest.DataFile" href="../../manifest/#pyiceberg.manifest.DataFile">DataFile</a>]</code>
</td>
<td>
<div class="doc-md-description">
<p>An iterable that supplies datafiles that represent the table.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2546">2546</a></span>
<span class="normal"><a href="#__codelineno-0-2547">2547</a></span>
<span class="normal"><a href="#__codelineno-0-2548">2548</a></span>
<span class="normal"><a href="#__codelineno-0-2549">2549</a></span>
<span class="normal"><a href="#__codelineno-0-2550">2550</a></span>
<span class="normal"><a href="#__codelineno-0-2551">2551</a></span>
<span class="normal"><a href="#__codelineno-0-2552">2552</a></span>
<span class="normal"><a href="#__codelineno-0-2553">2553</a></span>
<span class="normal"><a href="#__codelineno-0-2554">2554</a></span>
<span class="normal"><a href="#__codelineno-0-2555">2555</a></span>
<span class="normal"><a href="#__codelineno-0-2556">2556</a></span>
<span class="normal"><a href="#__codelineno-0-2557">2557</a></span>
<span class="normal"><a href="#__codelineno-0-2558">2558</a></span>
<span class="normal"><a href="#__codelineno-0-2559">2559</a></span>
<span class="normal"><a href="#__codelineno-0-2560">2560</a></span>
<span class="normal"><a href="#__codelineno-0-2561">2561</a></span>
<span class="normal"><a href="#__codelineno-0-2562">2562</a></span>
<span class="normal"><a href="#__codelineno-0-2563">2563</a></span>
<span class="normal"><a href="#__codelineno-0-2564">2564</a></span>
<span class="normal"><a href="#__codelineno-0-2565">2565</a></span>
<span class="normal"><a href="#__codelineno-0-2566">2566</a></span>
<span class="normal"><a href="#__codelineno-0-2567">2567</a></span>
<span class="normal"><a href="#__codelineno-0-2568">2568</a></span>
<span class="normal"><a href="#__codelineno-0-2569">2569</a></span>
<span class="normal"><a href="#__codelineno-0-2570">2570</a></span>
<span class="normal"><a href="#__codelineno-0-2571">2571</a></span>
<span class="normal"><a href="#__codelineno-0-2572">2572</a></span>
<span class="normal"><a href="#__codelineno-0-2573">2573</a></span>
<span class="normal"><a href="#__codelineno-0-2574">2574</a></span>
<span class="normal"><a href="#__codelineno-0-2575">2575</a></span>
<span class="normal"><a href="#__codelineno-0-2576">2576</a></span>
<span class="normal"><a href="#__codelineno-0-2577">2577</a></span>
<span class="normal"><a href="#__codelineno-0-2578">2578</a></span>
<span class="normal"><a href="#__codelineno-0-2579">2579</a></span>
<span class="normal"><a href="#__codelineno-0-2580">2580</a></span>
<span class="normal"><a href="#__codelineno-0-2581">2581</a></span>
<span class="normal"><a href="#__codelineno-0-2582">2582</a></span>
<span class="normal"><a href="#__codelineno-0-2583">2583</a></span>
<span class="normal"><a href="#__codelineno-0-2584">2584</a></span>
<span class="normal"><a href="#__codelineno-0-2585">2585</a></span>
<span class="normal"><a href="#__codelineno-0-2586">2586</a></span>
<span class="normal"><a href="#__codelineno-0-2587">2587</a></span>
<span class="normal"><a href="#__codelineno-0-2588">2588</a></span>
<span class="normal"><a href="#__codelineno-0-2589">2589</a></span>
<span class="normal"><a href="#__codelineno-0-2590">2590</a></span>
<span class="normal"><a href="#__codelineno-0-2591">2591</a></span>
<span class="normal"><a href="#__codelineno-0-2592">2592</a></span>
<span class="normal"><a href="#__codelineno-0-2593">2593</a></span>
<span class="normal"><a href="#__codelineno-0-2594">2594</a></span>
<span class="normal"><a href="#__codelineno-0-2595">2595</a></span>
<span class="normal"><a href="#__codelineno-0-2596">2596</a></span>
<span class="normal"><a href="#__codelineno-0-2597">2597</a></span>
<span class="normal"><a href="#__codelineno-0-2598">2598</a></span>
<span class="normal"><a href="#__codelineno-0-2599">2599</a></span>
<span class="normal"><a href="#__codelineno-0-2600">2600</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2546" name="__codelineno-0-2546"></a><span class="k">def</span><span class="w"> </span><span class="nf">_dataframe_to_data_files</span><span class="p">(</span>
<a id="__codelineno-0-2547" name="__codelineno-0-2547"></a> <span class="n">table_metadata</span><span class="p">:</span> <span class="n">TableMetadata</span><span class="p">,</span>
<a id="__codelineno-0-2548" name="__codelineno-0-2548"></a> <span class="n">df</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">,</span>
<a id="__codelineno-0-2549" name="__codelineno-0-2549"></a> <span class="n">io</span><span class="p">:</span> <span class="n">FileIO</span><span class="p">,</span>
<a id="__codelineno-0-2550" name="__codelineno-0-2550"></a> <span class="n">write_uuid</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">uuid</span><span class="o">.</span><span class="n">UUID</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<a id="__codelineno-0-2551" name="__codelineno-0-2551"></a> <span class="n">counter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">itertools</span><span class="o">.</span><span class="n">count</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<a id="__codelineno-0-2552" name="__codelineno-0-2552"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">DataFile</span><span class="p">]:</span>
<a id="__codelineno-0-2553" name="__codelineno-0-2553"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a PyArrow table into a DataFile.</span>
<a id="__codelineno-0-2554" name="__codelineno-0-2554"></a>
<a id="__codelineno-0-2555" name="__codelineno-0-2555"></a><span class="sd"> Returns:</span>
<a id="__codelineno-0-2556" name="__codelineno-0-2556"></a><span class="sd"> An iterable that supplies datafiles that represent the table.</span>
<a id="__codelineno-0-2557" name="__codelineno-0-2557"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2558" name="__codelineno-0-2558"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.table</span><span class="w"> </span><span class="kn">import</span> <span class="n">DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE</span><span class="p">,</span> <span class="n">TableProperties</span><span class="p">,</span> <span class="n">WriteTask</span>
<a id="__codelineno-0-2559" name="__codelineno-0-2559"></a>
<a id="__codelineno-0-2560" name="__codelineno-0-2560"></a> <span class="n">counter</span> <span class="o">=</span> <span class="n">counter</span> <span class="ow">or</span> <span class="n">itertools</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<a id="__codelineno-0-2561" name="__codelineno-0-2561"></a> <span class="n">write_uuid</span> <span class="o">=</span> <span class="n">write_uuid</span> <span class="ow">or</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()</span>
<a id="__codelineno-0-2562" name="__codelineno-0-2562"></a> <span class="n">target_file_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">property_as_int</span><span class="p">(</span> <span class="c1"># type: ignore # The property is set with non-None value.</span>
<a id="__codelineno-0-2563" name="__codelineno-0-2563"></a> <span class="n">properties</span><span class="o">=</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">properties</span><span class="p">,</span>
<a id="__codelineno-0-2564" name="__codelineno-0-2564"></a> <span class="n">property_name</span><span class="o">=</span><span class="n">TableProperties</span><span class="o">.</span><span class="n">WRITE_TARGET_FILE_SIZE_BYTES</span><span class="p">,</span>
<a id="__codelineno-0-2565" name="__codelineno-0-2565"></a> <span class="n">default</span><span class="o">=</span><span class="n">TableProperties</span><span class="o">.</span><span class="n">WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT</span><span class="p">,</span>
<a id="__codelineno-0-2566" name="__codelineno-0-2566"></a> <span class="p">)</span>
<a id="__codelineno-0-2567" name="__codelineno-0-2567"></a> <span class="n">name_mapping</span> <span class="o">=</span> <span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">()</span><span class="o">.</span><span class="n">name_mapping</span>
<a id="__codelineno-0-2568" name="__codelineno-0-2568"></a> <span class="n">downcast_ns_timestamp_to_us</span> <span class="o">=</span> <span class="n">Config</span><span class="p">()</span><span class="o">.</span><span class="n">get_bool</span><span class="p">(</span><span class="n">DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE</span><span class="p">)</span> <span class="ow">or</span> <span class="kc">False</span>
<a id="__codelineno-0-2569" name="__codelineno-0-2569"></a> <span class="n">task_schema</span> <span class="o">=</span> <span class="n">pyarrow_to_schema</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="n">name_mapping</span><span class="o">=</span><span class="n">name_mapping</span><span class="p">,</span> <span class="n">downcast_ns_timestamp_to_us</span><span class="o">=</span><span class="n">downcast_ns_timestamp_to_us</span><span class="p">)</span>
<a id="__codelineno-0-2570" name="__codelineno-0-2570"></a>
<a id="__codelineno-0-2571" name="__codelineno-0-2571"></a> <span class="k">if</span> <span class="n">table_metadata</span><span class="o">.</span><span class="n">spec</span><span class="p">()</span><span class="o">.</span><span class="n">is_unpartitioned</span><span class="p">():</span>
<a id="__codelineno-0-2572" name="__codelineno-0-2572"></a> <span class="k">yield from</span> <span class="n">write_file</span><span class="p">(</span>
<a id="__codelineno-0-2573" name="__codelineno-0-2573"></a> <span class="n">io</span><span class="o">=</span><span class="n">io</span><span class="p">,</span>
<a id="__codelineno-0-2574" name="__codelineno-0-2574"></a> <span class="n">table_metadata</span><span class="o">=</span><span class="n">table_metadata</span><span class="p">,</span>
<a id="__codelineno-0-2575" name="__codelineno-0-2575"></a> <span class="n">tasks</span><span class="o">=</span><span class="nb">iter</span><span class="p">(</span>
<a id="__codelineno-0-2576" name="__codelineno-0-2576"></a> <span class="p">[</span>
<a id="__codelineno-0-2577" name="__codelineno-0-2577"></a> <span class="n">WriteTask</span><span class="p">(</span><span class="n">write_uuid</span><span class="o">=</span><span class="n">write_uuid</span><span class="p">,</span> <span class="n">task_id</span><span class="o">=</span><span class="nb">next</span><span class="p">(</span><span class="n">counter</span><span class="p">),</span> <span class="n">record_batches</span><span class="o">=</span><span class="n">batches</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">task_schema</span><span class="p">)</span>
<a id="__codelineno-0-2578" name="__codelineno-0-2578"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">bin_pack_arrow_table</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">target_file_size</span><span class="p">)</span>
<a id="__codelineno-0-2579" name="__codelineno-0-2579"></a> <span class="p">]</span>
<a id="__codelineno-0-2580" name="__codelineno-0-2580"></a> <span class="p">),</span>
<a id="__codelineno-0-2581" name="__codelineno-0-2581"></a> <span class="p">)</span>
<a id="__codelineno-0-2582" name="__codelineno-0-2582"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-2583" name="__codelineno-0-2583"></a> <span class="n">partitions</span> <span class="o">=</span> <span class="n">_determine_partitions</span><span class="p">(</span><span class="n">spec</span><span class="o">=</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">spec</span><span class="p">(),</span> <span class="n">schema</span><span class="o">=</span><span class="n">table_metadata</span><span class="o">.</span><span class="n">schema</span><span class="p">(),</span> <span class="n">arrow_table</span><span class="o">=</span><span class="n">df</span><span class="p">)</span>
<a id="__codelineno-0-2584" name="__codelineno-0-2584"></a> <span class="k">yield from</span> <span class="n">write_file</span><span class="p">(</span>
<a id="__codelineno-0-2585" name="__codelineno-0-2585"></a> <span class="n">io</span><span class="o">=</span><span class="n">io</span><span class="p">,</span>
<a id="__codelineno-0-2586" name="__codelineno-0-2586"></a> <span class="n">table_metadata</span><span class="o">=</span><span class="n">table_metadata</span><span class="p">,</span>
<a id="__codelineno-0-2587" name="__codelineno-0-2587"></a> <span class="n">tasks</span><span class="o">=</span><span class="nb">iter</span><span class="p">(</span>
<a id="__codelineno-0-2588" name="__codelineno-0-2588"></a> <span class="p">[</span>
<a id="__codelineno-0-2589" name="__codelineno-0-2589"></a> <span class="n">WriteTask</span><span class="p">(</span>
<a id="__codelineno-0-2590" name="__codelineno-0-2590"></a> <span class="n">write_uuid</span><span class="o">=</span><span class="n">write_uuid</span><span class="p">,</span>
<a id="__codelineno-0-2591" name="__codelineno-0-2591"></a> <span class="n">task_id</span><span class="o">=</span><span class="nb">next</span><span class="p">(</span><span class="n">counter</span><span class="p">),</span>
<a id="__codelineno-0-2592" name="__codelineno-0-2592"></a> <span class="n">record_batches</span><span class="o">=</span><span class="n">batches</span><span class="p">,</span>
<a id="__codelineno-0-2593" name="__codelineno-0-2593"></a> <span class="n">partition_key</span><span class="o">=</span><span class="n">partition</span><span class="o">.</span><span class="n">partition_key</span><span class="p">,</span>
<a id="__codelineno-0-2594" name="__codelineno-0-2594"></a> <span class="n">schema</span><span class="o">=</span><span class="n">task_schema</span><span class="p">,</span>
<a id="__codelineno-0-2595" name="__codelineno-0-2595"></a> <span class="p">)</span>
<a id="__codelineno-0-2596" name="__codelineno-0-2596"></a> <span class="k">for</span> <span class="n">partition</span> <span class="ow">in</span> <span class="n">partitions</span>
<a id="__codelineno-0-2597" name="__codelineno-0-2597"></a> <span class="k">for</span> <span class="n">batches</span> <span class="ow">in</span> <span class="n">bin_pack_arrow_table</span><span class="p">(</span><span class="n">partition</span><span class="o">.</span><span class="n">arrow_table_partition</span><span class="p">,</span> <span class="n">target_file_size</span><span class="p">)</span>
<a id="__codelineno-0-2598" name="__codelineno-0-2598"></a> <span class="p">]</span>
<a id="__codelineno-0-2599" name="__codelineno-0-2599"></a> <span class="p">),</span>
<a id="__codelineno-0-2600" name="__codelineno-0-2600"></a> <span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow._determine_partitions" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_determine_partitions</span><span class="p">(</span><span class="n">spec</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">arrow_table</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._determine_partitions" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Based on the iceberg table partition spec, filter the arrow table into partitions with their keys.</p>
<p>Example:
Input:
An arrow table with partition key of ['n_legs', 'year'] and with data of
{'year': [2020, 2022, 2022, 2021, 2022, 2022, 2022, 2019, 2021],
'n_legs': [2, 2, 2, 4, 4, 4, 4, 5, 100],
'animal': ["Flamingo", "Parrot", "Parrot", "Dog", "Horse", "Horse", "Horse","Brittle stars", "Centipede"]}.
The algorithm:
- We determine the set of unique partition keys
- Then we produce a set of partitions by filtering on each of the combinations
- We combine the chunks to create a copy to avoid GIL congestion on the original table</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2609">2609</a></span>
<span class="normal"><a href="#__codelineno-0-2610">2610</a></span>
<span class="normal"><a href="#__codelineno-0-2611">2611</a></span>
<span class="normal"><a href="#__codelineno-0-2612">2612</a></span>
<span class="normal"><a href="#__codelineno-0-2613">2613</a></span>
<span class="normal"><a href="#__codelineno-0-2614">2614</a></span>
<span class="normal"><a href="#__codelineno-0-2615">2615</a></span>
<span class="normal"><a href="#__codelineno-0-2616">2616</a></span>
<span class="normal"><a href="#__codelineno-0-2617">2617</a></span>
<span class="normal"><a href="#__codelineno-0-2618">2618</a></span>
<span class="normal"><a href="#__codelineno-0-2619">2619</a></span>
<span class="normal"><a href="#__codelineno-0-2620">2620</a></span>
<span class="normal"><a href="#__codelineno-0-2621">2621</a></span>
<span class="normal"><a href="#__codelineno-0-2622">2622</a></span>
<span class="normal"><a href="#__codelineno-0-2623">2623</a></span>
<span class="normal"><a href="#__codelineno-0-2624">2624</a></span>
<span class="normal"><a href="#__codelineno-0-2625">2625</a></span>
<span class="normal"><a href="#__codelineno-0-2626">2626</a></span>
<span class="normal"><a href="#__codelineno-0-2627">2627</a></span>
<span class="normal"><a href="#__codelineno-0-2628">2628</a></span>
<span class="normal"><a href="#__codelineno-0-2629">2629</a></span>
<span class="normal"><a href="#__codelineno-0-2630">2630</a></span>
<span class="normal"><a href="#__codelineno-0-2631">2631</a></span>
<span class="normal"><a href="#__codelineno-0-2632">2632</a></span>
<span class="normal"><a href="#__codelineno-0-2633">2633</a></span>
<span class="normal"><a href="#__codelineno-0-2634">2634</a></span>
<span class="normal"><a href="#__codelineno-0-2635">2635</a></span>
<span class="normal"><a href="#__codelineno-0-2636">2636</a></span>
<span class="normal"><a href="#__codelineno-0-2637">2637</a></span>
<span class="normal"><a href="#__codelineno-0-2638">2638</a></span>
<span class="normal"><a href="#__codelineno-0-2639">2639</a></span>
<span class="normal"><a href="#__codelineno-0-2640">2640</a></span>
<span class="normal"><a href="#__codelineno-0-2641">2641</a></span>
<span class="normal"><a href="#__codelineno-0-2642">2642</a></span>
<span class="normal"><a href="#__codelineno-0-2643">2643</a></span>
<span class="normal"><a href="#__codelineno-0-2644">2644</a></span>
<span class="normal"><a href="#__codelineno-0-2645">2645</a></span>
<span class="normal"><a href="#__codelineno-0-2646">2646</a></span>
<span class="normal"><a href="#__codelineno-0-2647">2647</a></span>
<span class="normal"><a href="#__codelineno-0-2648">2648</a></span>
<span class="normal"><a href="#__codelineno-0-2649">2649</a></span>
<span class="normal"><a href="#__codelineno-0-2650">2650</a></span>
<span class="normal"><a href="#__codelineno-0-2651">2651</a></span>
<span class="normal"><a href="#__codelineno-0-2652">2652</a></span>
<span class="normal"><a href="#__codelineno-0-2653">2653</a></span>
<span class="normal"><a href="#__codelineno-0-2654">2654</a></span>
<span class="normal"><a href="#__codelineno-0-2655">2655</a></span>
<span class="normal"><a href="#__codelineno-0-2656">2656</a></span>
<span class="normal"><a href="#__codelineno-0-2657">2657</a></span>
<span class="normal"><a href="#__codelineno-0-2658">2658</a></span>
<span class="normal"><a href="#__codelineno-0-2659">2659</a></span>
<span class="normal"><a href="#__codelineno-0-2660">2660</a></span>
<span class="normal"><a href="#__codelineno-0-2661">2661</a></span>
<span class="normal"><a href="#__codelineno-0-2662">2662</a></span>
<span class="normal"><a href="#__codelineno-0-2663">2663</a></span>
<span class="normal"><a href="#__codelineno-0-2664">2664</a></span>
<span class="normal"><a href="#__codelineno-0-2665">2665</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2609" name="__codelineno-0-2609"></a><span class="k">def</span><span class="w"> </span><span class="nf">_determine_partitions</span><span class="p">(</span><span class="n">spec</span><span class="p">:</span> <span class="n">PartitionSpec</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> <span class="n">arrow_table</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">_TablePartition</span><span class="p">]:</span>
<a id="__codelineno-0-2610" name="__codelineno-0-2610"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Based on the iceberg table partition spec, filter the arrow table into partitions with their keys.</span>
<a id="__codelineno-0-2611" name="__codelineno-0-2611"></a>
<a id="__codelineno-0-2612" name="__codelineno-0-2612"></a><span class="sd"> Example:</span>
<a id="__codelineno-0-2613" name="__codelineno-0-2613"></a><span class="sd"> Input:</span>
<a id="__codelineno-0-2614" name="__codelineno-0-2614"></a><span class="sd"> An arrow table with partition key of [&#39;n_legs&#39;, &#39;year&#39;] and with data of</span>
<a id="__codelineno-0-2615" name="__codelineno-0-2615"></a><span class="sd"> {&#39;year&#39;: [2020, 2022, 2022, 2021, 2022, 2022, 2022, 2019, 2021],</span>
<a id="__codelineno-0-2616" name="__codelineno-0-2616"></a><span class="sd"> &#39;n_legs&#39;: [2, 2, 2, 4, 4, 4, 4, 5, 100],</span>
<a id="__codelineno-0-2617" name="__codelineno-0-2617"></a><span class="sd"> &#39;animal&#39;: [&quot;Flamingo&quot;, &quot;Parrot&quot;, &quot;Parrot&quot;, &quot;Dog&quot;, &quot;Horse&quot;, &quot;Horse&quot;, &quot;Horse&quot;,&quot;Brittle stars&quot;, &quot;Centipede&quot;]}.</span>
<a id="__codelineno-0-2618" name="__codelineno-0-2618"></a><span class="sd"> The algorithm:</span>
<a id="__codelineno-0-2619" name="__codelineno-0-2619"></a><span class="sd"> - We determine the set of unique partition keys</span>
<a id="__codelineno-0-2620" name="__codelineno-0-2620"></a><span class="sd"> - Then we produce a set of partitions by filtering on each of the combinations</span>
<a id="__codelineno-0-2621" name="__codelineno-0-2621"></a><span class="sd"> - We combine the chunks to create a copy to avoid GIL congestion on the original table</span>
<a id="__codelineno-0-2622" name="__codelineno-0-2622"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2623" name="__codelineno-0-2623"></a> <span class="c1"># Assign unique names to columns where the partition transform has been applied</span>
<a id="__codelineno-0-2624" name="__codelineno-0-2624"></a> <span class="c1"># to avoid conflicts</span>
<a id="__codelineno-0-2625" name="__codelineno-0-2625"></a> <span class="n">partition_fields</span> <span class="o">=</span> <span class="p">[</span><span class="sa">f</span><span class="s2">&quot;_partition_</span><span class="si">{</span><span class="n">field</span><span class="o">.</span><span class="n">name</span><span class="si">}</span><span class="s2">&quot;</span> <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">]</span>
<a id="__codelineno-0-2626" name="__codelineno-0-2626"></a>
<a id="__codelineno-0-2627" name="__codelineno-0-2627"></a> <span class="k">for</span> <span class="n">partition</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">,</span> <span class="n">partition_fields</span><span class="p">):</span>
<a id="__codelineno-0-2628" name="__codelineno-0-2628"></a> <span class="n">source_field</span> <span class="o">=</span> <span class="n">schema</span><span class="o">.</span><span class="n">find_field</span><span class="p">(</span><span class="n">partition</span><span class="o">.</span><span class="n">source_id</span><span class="p">)</span>
<a id="__codelineno-0-2629" name="__codelineno-0-2629"></a> <span class="n">arrow_table</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="o">.</span><span class="n">append_column</span><span class="p">(</span>
<a id="__codelineno-0-2630" name="__codelineno-0-2630"></a> <span class="n">name</span><span class="p">,</span> <span class="n">partition</span><span class="o">.</span><span class="n">transform</span><span class="o">.</span><span class="n">pyarrow_transform</span><span class="p">(</span><span class="n">source_field</span><span class="o">.</span><span class="n">field_type</span><span class="p">)(</span><span class="n">arrow_table</span><span class="p">[</span><span class="n">source_field</span><span class="o">.</span><span class="n">name</span><span class="p">])</span>
<a id="__codelineno-0-2631" name="__codelineno-0-2631"></a> <span class="p">)</span>
<a id="__codelineno-0-2632" name="__codelineno-0-2632"></a>
<a id="__codelineno-0-2633" name="__codelineno-0-2633"></a> <span class="n">unique_partition_fields</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">partition_fields</span><span class="p">)</span><span class="o">.</span><span class="n">group_by</span><span class="p">(</span><span class="n">partition_fields</span><span class="p">)</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([])</span>
<a id="__codelineno-0-2634" name="__codelineno-0-2634"></a>
<a id="__codelineno-0-2635" name="__codelineno-0-2635"></a> <span class="n">table_partitions</span> <span class="o">=</span> <span class="p">[]</span>
<a id="__codelineno-0-2636" name="__codelineno-0-2636"></a> <span class="c1"># TODO: As a next step, we could also play around with yielding instead of materializing the full list</span>
<a id="__codelineno-0-2637" name="__codelineno-0-2637"></a> <span class="k">for</span> <span class="n">unique_partition</span> <span class="ow">in</span> <span class="n">unique_partition_fields</span><span class="o">.</span><span class="n">to_pylist</span><span class="p">():</span>
<a id="__codelineno-0-2638" name="__codelineno-0-2638"></a> <span class="n">partition_key</span> <span class="o">=</span> <span class="n">PartitionKey</span><span class="p">(</span>
<a id="__codelineno-0-2639" name="__codelineno-0-2639"></a> <span class="n">field_values</span><span class="o">=</span><span class="p">[</span>
<a id="__codelineno-0-2640" name="__codelineno-0-2640"></a> <span class="n">PartitionFieldValue</span><span class="p">(</span><span class="n">field</span><span class="o">=</span><span class="n">field</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">unique_partition</span><span class="p">[</span><span class="n">name</span><span class="p">])</span>
<a id="__codelineno-0-2641" name="__codelineno-0-2641"></a> <span class="k">for</span> <span class="n">field</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">,</span> <span class="n">partition_fields</span><span class="p">)</span>
<a id="__codelineno-0-2642" name="__codelineno-0-2642"></a> <span class="p">],</span>
<a id="__codelineno-0-2643" name="__codelineno-0-2643"></a> <span class="n">partition_spec</span><span class="o">=</span><span class="n">spec</span><span class="p">,</span>
<a id="__codelineno-0-2644" name="__codelineno-0-2644"></a> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span>
<a id="__codelineno-0-2645" name="__codelineno-0-2645"></a> <span class="p">)</span>
<a id="__codelineno-0-2646" name="__codelineno-0-2646"></a> <span class="n">filtered_table</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
<a id="__codelineno-0-2647" name="__codelineno-0-2647"></a> <span class="n">functools</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span>
<a id="__codelineno-0-2648" name="__codelineno-0-2648"></a> <span class="n">operator</span><span class="o">.</span><span class="n">and_</span><span class="p">,</span>
<a id="__codelineno-0-2649" name="__codelineno-0-2649"></a> <span class="p">[</span>
<a id="__codelineno-0-2650" name="__codelineno-0-2650"></a> <span class="n">pc</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">partition_field_name</span><span class="p">)</span> <span class="o">==</span> <span class="n">unique_partition</span><span class="p">[</span><span class="n">partition_field_name</span><span class="p">]</span>
<a id="__codelineno-0-2651" name="__codelineno-0-2651"></a> <span class="k">if</span> <span class="n">unique_partition</span><span class="p">[</span><span class="n">partition_field_name</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<a id="__codelineno-0-2652" name="__codelineno-0-2652"></a> <span class="k">else</span> <span class="n">pc</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="n">partition_field_name</span><span class="p">)</span><span class="o">.</span><span class="n">is_null</span><span class="p">()</span>
<a id="__codelineno-0-2653" name="__codelineno-0-2653"></a> <span class="k">for</span> <span class="n">field</span><span class="p">,</span> <span class="n">partition_field_name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">spec</span><span class="o">.</span><span class="n">fields</span><span class="p">,</span> <span class="n">partition_fields</span><span class="p">)</span>
<a id="__codelineno-0-2654" name="__codelineno-0-2654"></a> <span class="p">],</span>
<a id="__codelineno-0-2655" name="__codelineno-0-2655"></a> <span class="p">)</span>
<a id="__codelineno-0-2656" name="__codelineno-0-2656"></a> <span class="p">)</span>
<a id="__codelineno-0-2657" name="__codelineno-0-2657"></a> <span class="n">filtered_table</span> <span class="o">=</span> <span class="n">filtered_table</span><span class="o">.</span><span class="n">drop_columns</span><span class="p">(</span><span class="n">partition_fields</span><span class="p">)</span>
<a id="__codelineno-0-2658" name="__codelineno-0-2658"></a>
<a id="__codelineno-0-2659" name="__codelineno-0-2659"></a> <span class="c1"># The combine_chunks seems to be counter-intuitive to do, but it actually returns</span>
<a id="__codelineno-0-2660" name="__codelineno-0-2660"></a> <span class="c1"># fresh buffers that don&#39;t interfere with each other when it is written out to file</span>
<a id="__codelineno-0-2661" name="__codelineno-0-2661"></a> <span class="n">table_partitions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
<a id="__codelineno-0-2662" name="__codelineno-0-2662"></a> <span class="n">_TablePartition</span><span class="p">(</span><span class="n">partition_key</span><span class="o">=</span><span class="n">partition_key</span><span class="p">,</span> <span class="n">arrow_table_partition</span><span class="o">=</span><span class="n">filtered_table</span><span class="o">.</span><span class="n">combine_chunks</span><span class="p">())</span>
<a id="__codelineno-0-2663" name="__codelineno-0-2663"></a> <span class="p">)</span>
<a id="__codelineno-0-2664" name="__codelineno-0-2664"></a>
<a id="__codelineno-0-2665" name="__codelineno-0-2665"></a> <span class="k">return</span> <span class="n">table_partitions</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_expression_to_complementary_pyarrow</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._expression_to_complementary_pyarrow" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Complementary filter conversion function of expression_to_pyarrow.</p>
<p>Could not use expression_to_pyarrow(Not(expr)) to achieve this complementary effect because ~ in pyarrow.compute.Expression does not handle null.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-868">868</a></span>
<span class="normal"><a href="#__codelineno-0-869">869</a></span>
<span class="normal"><a href="#__codelineno-0-870">870</a></span>
<span class="normal"><a href="#__codelineno-0-871">871</a></span>
<span class="normal"><a href="#__codelineno-0-872">872</a></span>
<span class="normal"><a href="#__codelineno-0-873">873</a></span>
<span class="normal"><a href="#__codelineno-0-874">874</a></span>
<span class="normal"><a href="#__codelineno-0-875">875</a></span>
<span class="normal"><a href="#__codelineno-0-876">876</a></span>
<span class="normal"><a href="#__codelineno-0-877">877</a></span>
<span class="normal"><a href="#__codelineno-0-878">878</a></span>
<span class="normal"><a href="#__codelineno-0-879">879</a></span>
<span class="normal"><a href="#__codelineno-0-880">880</a></span>
<span class="normal"><a href="#__codelineno-0-881">881</a></span>
<span class="normal"><a href="#__codelineno-0-882">882</a></span>
<span class="normal"><a href="#__codelineno-0-883">883</a></span>
<span class="normal"><a href="#__codelineno-0-884">884</a></span>
<span class="normal"><a href="#__codelineno-0-885">885</a></span>
<span class="normal"><a href="#__codelineno-0-886">886</a></span>
<span class="normal"><a href="#__codelineno-0-887">887</a></span>
<span class="normal"><a href="#__codelineno-0-888">888</a></span>
<span class="normal"><a href="#__codelineno-0-889">889</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-868" name="__codelineno-0-868"></a><span class="k">def</span><span class="w"> </span><span class="nf">_expression_to_complementary_pyarrow</span><span class="p">(</span><span class="n">expr</span><span class="p">:</span> <span class="n">BooleanExpression</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pc</span><span class="o">.</span><span class="n">Expression</span><span class="p">:</span>
<a id="__codelineno-0-869" name="__codelineno-0-869"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Complementary filter conversion function of expression_to_pyarrow.</span>
<a id="__codelineno-0-870" name="__codelineno-0-870"></a>
<a id="__codelineno-0-871" name="__codelineno-0-871"></a><span class="sd"> Could not use expression_to_pyarrow(Not(expr)) to achieve this complementary effect because ~ in pyarrow.compute.Expression does not handle null.</span>
<a id="__codelineno-0-872" name="__codelineno-0-872"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-873" name="__codelineno-0-873"></a> <span class="n">collector</span> <span class="o">=</span> <span class="n">_NullNaNUnmentionedTermsCollector</span><span class="p">()</span>
<a id="__codelineno-0-874" name="__codelineno-0-874"></a> <span class="n">collector</span><span class="o">.</span><span class="n">collect</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span>
<a id="__codelineno-0-875" name="__codelineno-0-875"></a>
<a id="__codelineno-0-876" name="__codelineno-0-876"></a> <span class="c1"># Convert the set of terms to a sorted list so that layout of the expression to build is deterministic.</span>
<a id="__codelineno-0-877" name="__codelineno-0-877"></a> <span class="n">null_unmentioned_bound_terms</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span>
<a id="__codelineno-0-878" name="__codelineno-0-878"></a> <span class="n">collector</span><span class="o">.</span><span class="n">null_unmentioned_bound_terms</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">term</span><span class="p">:</span> <span class="n">term</span><span class="o">.</span><span class="n">ref</span><span class="p">()</span><span class="o">.</span><span class="n">field</span><span class="o">.</span><span class="n">name</span>
<a id="__codelineno-0-879" name="__codelineno-0-879"></a> <span class="p">)</span>
<a id="__codelineno-0-880" name="__codelineno-0-880"></a> <span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">BoundTerm</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span>
<a id="__codelineno-0-881" name="__codelineno-0-881"></a> <span class="n">collector</span><span class="o">.</span><span class="n">nan_unmentioned_bound_terms</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">term</span><span class="p">:</span> <span class="n">term</span><span class="o">.</span><span class="n">ref</span><span class="p">()</span><span class="o">.</span><span class="n">field</span><span class="o">.</span><span class="n">name</span>
<a id="__codelineno-0-882" name="__codelineno-0-882"></a> <span class="p">)</span>
<a id="__codelineno-0-883" name="__codelineno-0-883"></a>
<a id="__codelineno-0-884" name="__codelineno-0-884"></a> <span class="n">preserve_expr</span><span class="p">:</span> <span class="n">BooleanExpression</span> <span class="o">=</span> <span class="n">Not</span><span class="p">(</span><span class="n">expr</span><span class="p">)</span>
<a id="__codelineno-0-885" name="__codelineno-0-885"></a> <span class="k">for</span> <span class="n">term</span> <span class="ow">in</span> <span class="n">null_unmentioned_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-886" name="__codelineno-0-886"></a> <span class="n">preserve_expr</span> <span class="o">=</span> <span class="n">Or</span><span class="p">(</span><span class="n">preserve_expr</span><span class="p">,</span> <span class="n">BoundIsNull</span><span class="p">(</span><span class="n">term</span><span class="o">=</span><span class="n">term</span><span class="p">))</span>
<a id="__codelineno-0-887" name="__codelineno-0-887"></a> <span class="k">for</span> <span class="n">term</span> <span class="ow">in</span> <span class="n">nan_unmentioned_bound_terms</span><span class="p">:</span>
<a id="__codelineno-0-888" name="__codelineno-0-888"></a> <span class="n">preserve_expr</span> <span class="o">=</span> <span class="n">Or</span><span class="p">(</span><span class="n">preserve_expr</span><span class="p">,</span> <span class="n">BoundIsNaN</span><span class="p">(</span><span class="n">term</span><span class="o">=</span><span class="n">term</span><span class="p">))</span>
<a id="__codelineno-0-889" name="__codelineno-0-889"></a> <span class="k">return</span> <span class="n">expression_to_pyarrow</span><span class="p">(</span><span class="n">preserve_expr</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow._get_column_projection_values" class="doc doc-heading">
<code class="highlight language-python"><span class="n">_get_column_projection_values</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="n">projected_schema</span><span class="p">,</span> <span class="n">partition_spec</span><span class="p">,</span> <span class="n">file_project_field_ids</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow._get_column_projection_values" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Apply Column Projection rules to File Schema.</p>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-1314">1314</a></span>
<span class="normal"><a href="#__codelineno-0-1315">1315</a></span>
<span class="normal"><a href="#__codelineno-0-1316">1316</a></span>
<span class="normal"><a href="#__codelineno-0-1317">1317</a></span>
<span class="normal"><a href="#__codelineno-0-1318">1318</a></span>
<span class="normal"><a href="#__codelineno-0-1319">1319</a></span>
<span class="normal"><a href="#__codelineno-0-1320">1320</a></span>
<span class="normal"><a href="#__codelineno-0-1321">1321</a></span>
<span class="normal"><a href="#__codelineno-0-1322">1322</a></span>
<span class="normal"><a href="#__codelineno-0-1323">1323</a></span>
<span class="normal"><a href="#__codelineno-0-1324">1324</a></span>
<span class="normal"><a href="#__codelineno-0-1325">1325</a></span>
<span class="normal"><a href="#__codelineno-0-1326">1326</a></span>
<span class="normal"><a href="#__codelineno-0-1327">1327</a></span>
<span class="normal"><a href="#__codelineno-0-1328">1328</a></span>
<span class="normal"><a href="#__codelineno-0-1329">1329</a></span>
<span class="normal"><a href="#__codelineno-0-1330">1330</a></span>
<span class="normal"><a href="#__codelineno-0-1331">1331</a></span>
<span class="normal"><a href="#__codelineno-0-1332">1332</a></span>
<span class="normal"><a href="#__codelineno-0-1333">1333</a></span>
<span class="normal"><a href="#__codelineno-0-1334">1334</a></span>
<span class="normal"><a href="#__codelineno-0-1335">1335</a></span>
<span class="normal"><a href="#__codelineno-0-1336">1336</a></span>
<span class="normal"><a href="#__codelineno-0-1337">1337</a></span>
<span class="normal"><a href="#__codelineno-0-1338">1338</a></span>
<span class="normal"><a href="#__codelineno-0-1339">1339</a></span>
<span class="normal"><a href="#__codelineno-0-1340">1340</a></span>
<span class="normal"><a href="#__codelineno-0-1341">1341</a></span>
<span class="normal"><a href="#__codelineno-0-1342">1342</a></span>
<span class="normal"><a href="#__codelineno-0-1343">1343</a></span>
<span class="normal"><a href="#__codelineno-0-1344">1344</a></span>
<span class="normal"><a href="#__codelineno-0-1345">1345</a></span>
<span class="normal"><a href="#__codelineno-0-1346">1346</a></span>
<span class="normal"><a href="#__codelineno-0-1347">1347</a></span>
<span class="normal"><a href="#__codelineno-0-1348">1348</a></span>
<span class="normal"><a href="#__codelineno-0-1349">1349</a></span>
<span class="normal"><a href="#__codelineno-0-1350">1350</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-1314" name="__codelineno-0-1314"></a><span class="k">def</span><span class="w"> </span><span class="nf">_get_column_projection_values</span><span class="p">(</span>
<a id="__codelineno-0-1315" name="__codelineno-0-1315"></a> <span class="n">file</span><span class="p">:</span> <span class="n">DataFile</span><span class="p">,</span> <span class="n">projected_schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span> <span class="n">partition_spec</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">PartitionSpec</span><span class="p">],</span> <span class="n">file_project_field_ids</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
<a id="__codelineno-0-1316" name="__codelineno-0-1316"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
<a id="__codelineno-0-1317" name="__codelineno-0-1317"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Apply Column Projection rules to File Schema.&quot;&quot;&quot;</span>
<a id="__codelineno-0-1318" name="__codelineno-0-1318"></a> <span class="n">project_schema_diff</span> <span class="o">=</span> <span class="n">projected_schema</span><span class="o">.</span><span class="n">field_ids</span><span class="o">.</span><span class="n">difference</span><span class="p">(</span><span class="n">file_project_field_ids</span><span class="p">)</span>
<a id="__codelineno-0-1319" name="__codelineno-0-1319"></a> <span class="n">should_project_columns</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">project_schema_diff</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span>
<a id="__codelineno-0-1320" name="__codelineno-0-1320"></a> <span class="n">projected_missing_fields</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-1321" name="__codelineno-0-1321"></a>
<a id="__codelineno-0-1322" name="__codelineno-0-1322"></a> <span class="k">if</span> <span class="ow">not</span> <span class="n">should_project_columns</span><span class="p">:</span>
<a id="__codelineno-0-1323" name="__codelineno-0-1323"></a> <span class="k">return</span> <span class="kc">False</span><span class="p">,</span> <span class="p">{}</span>
<a id="__codelineno-0-1324" name="__codelineno-0-1324"></a>
<a id="__codelineno-0-1325" name="__codelineno-0-1325"></a> <span class="n">partition_schema</span><span class="p">:</span> <span class="n">StructType</span>
<a id="__codelineno-0-1326" name="__codelineno-0-1326"></a> <span class="n">accessors</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Accessor</span><span class="p">]</span>
<a id="__codelineno-0-1327" name="__codelineno-0-1327"></a>
<a id="__codelineno-0-1328" name="__codelineno-0-1328"></a> <span class="k">if</span> <span class="n">partition_spec</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1329" name="__codelineno-0-1329"></a> <span class="n">partition_schema</span> <span class="o">=</span> <span class="n">partition_spec</span><span class="o">.</span><span class="n">partition_type</span><span class="p">(</span><span class="n">projected_schema</span><span class="p">)</span>
<a id="__codelineno-0-1330" name="__codelineno-0-1330"></a> <span class="n">accessors</span> <span class="o">=</span> <span class="n">build_position_accessors</span><span class="p">(</span><span class="n">partition_schema</span><span class="p">)</span>
<a id="__codelineno-0-1331" name="__codelineno-0-1331"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-1332" name="__codelineno-0-1332"></a> <span class="k">return</span> <span class="kc">False</span><span class="p">,</span> <span class="p">{}</span>
<a id="__codelineno-0-1333" name="__codelineno-0-1333"></a>
<a id="__codelineno-0-1334" name="__codelineno-0-1334"></a> <span class="k">for</span> <span class="n">field_id</span> <span class="ow">in</span> <span class="n">project_schema_diff</span><span class="p">:</span>
<a id="__codelineno-0-1335" name="__codelineno-0-1335"></a> <span class="k">for</span> <span class="n">partition_field</span> <span class="ow">in</span> <span class="n">partition_spec</span><span class="o">.</span><span class="n">fields_by_source_id</span><span class="p">(</span><span class="n">field_id</span><span class="p">):</span>
<a id="__codelineno-0-1336" name="__codelineno-0-1336"></a> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">partition_field</span><span class="o">.</span><span class="n">transform</span><span class="p">,</span> <span class="n">IdentityTransform</span><span class="p">):</span>
<a id="__codelineno-0-1337" name="__codelineno-0-1337"></a> <span class="n">accessor</span> <span class="o">=</span> <span class="n">accessors</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">partition_field</span><span class="o">.</span><span class="n">field_id</span><span class="p">)</span>
<a id="__codelineno-0-1338" name="__codelineno-0-1338"></a>
<a id="__codelineno-0-1339" name="__codelineno-0-1339"></a> <span class="k">if</span> <span class="n">accessor</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<a id="__codelineno-0-1340" name="__codelineno-0-1340"></a> <span class="k">continue</span>
<a id="__codelineno-0-1341" name="__codelineno-0-1341"></a>
<a id="__codelineno-0-1342" name="__codelineno-0-1342"></a> <span class="c1"># The partition field may not exist in the partition record of the data file.</span>
<a id="__codelineno-0-1343" name="__codelineno-0-1343"></a> <span class="c1"># This can happen when new partition fields are introduced after the file was written.</span>
<a id="__codelineno-0-1344" name="__codelineno-0-1344"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-1345" name="__codelineno-0-1345"></a> <span class="k">if</span> <span class="n">partition_value</span> <span class="o">:=</span> <span class="n">accessor</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">file</span><span class="o">.</span><span class="n">partition</span><span class="p">):</span>
<a id="__codelineno-0-1346" name="__codelineno-0-1346"></a> <span class="n">projected_missing_fields</span><span class="p">[</span><span class="n">partition_field</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">partition_value</span>
<a id="__codelineno-0-1347" name="__codelineno-0-1347"></a> <span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span>
<a id="__codelineno-0-1348" name="__codelineno-0-1348"></a> <span class="k">continue</span>
<a id="__codelineno-0-1349" name="__codelineno-0-1349"></a>
<a id="__codelineno-0-1350" name="__codelineno-0-1350"></a> <span class="k">return</span> <span class="kc">True</span><span class="p">,</span> <span class="n">projected_missing_fields</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.compute_statistics_plan" class="doc doc-heading">
<code class="highlight language-python"><span class="n">compute_statistics_plan</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.compute_statistics_plan" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Compute the statistics plan for all columns.</p>
<p>The resulting list is assumed to have the same length and same order as the columns in the pyarrow table.
This allows the list to map from the column index to the Iceberg column ID.
For each element, the desired metrics collection that was provided by the user in the configuration
is computed and then adjusted according to the data type of the column. For nested columns the minimum
and maximum values are not computed. And truncation is only applied to text of binary strings.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>table_properties</code>
</td>
<td>
<code>from pyiceberg.table.metadata.TableMetadata</code>
</td>
<td>
<div class="doc-md-description">
<p>The Iceberg table metadata properties.
They are required to compute the mapping of column position to iceberg schema type id. It's also
used to set the mode for column metrics collection</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2092">2092</a></span>
<span class="normal"><a href="#__codelineno-0-2093">2093</a></span>
<span class="normal"><a href="#__codelineno-0-2094">2094</a></span>
<span class="normal"><a href="#__codelineno-0-2095">2095</a></span>
<span class="normal"><a href="#__codelineno-0-2096">2096</a></span>
<span class="normal"><a href="#__codelineno-0-2097">2097</a></span>
<span class="normal"><a href="#__codelineno-0-2098">2098</a></span>
<span class="normal"><a href="#__codelineno-0-2099">2099</a></span>
<span class="normal"><a href="#__codelineno-0-2100">2100</a></span>
<span class="normal"><a href="#__codelineno-0-2101">2101</a></span>
<span class="normal"><a href="#__codelineno-0-2102">2102</a></span>
<span class="normal"><a href="#__codelineno-0-2103">2103</a></span>
<span class="normal"><a href="#__codelineno-0-2104">2104</a></span>
<span class="normal"><a href="#__codelineno-0-2105">2105</a></span>
<span class="normal"><a href="#__codelineno-0-2106">2106</a></span>
<span class="normal"><a href="#__codelineno-0-2107">2107</a></span>
<span class="normal"><a href="#__codelineno-0-2108">2108</a></span>
<span class="normal"><a href="#__codelineno-0-2109">2109</a></span>
<span class="normal"><a href="#__codelineno-0-2110">2110</a></span>
<span class="normal"><a href="#__codelineno-0-2111">2111</a></span>
<span class="normal"><a href="#__codelineno-0-2112">2112</a></span>
<span class="normal"><a href="#__codelineno-0-2113">2113</a></span>
<span class="normal"><a href="#__codelineno-0-2114">2114</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2092" name="__codelineno-0-2092"></a><span class="k">def</span><span class="w"> </span><span class="nf">compute_statistics_plan</span><span class="p">(</span>
<a id="__codelineno-0-2093" name="__codelineno-0-2093"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span>
<a id="__codelineno-0-2094" name="__codelineno-0-2094"></a> <span class="n">table_properties</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<a id="__codelineno-0-2095" name="__codelineno-0-2095"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]:</span>
<a id="__codelineno-0-2096" name="__codelineno-0-2096"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-2097" name="__codelineno-0-2097"></a><span class="sd"> Compute the statistics plan for all columns.</span>
<a id="__codelineno-0-2098" name="__codelineno-0-2098"></a>
<a id="__codelineno-0-2099" name="__codelineno-0-2099"></a><span class="sd"> The resulting list is assumed to have the same length and same order as the columns in the pyarrow table.</span>
<a id="__codelineno-0-2100" name="__codelineno-0-2100"></a><span class="sd"> This allows the list to map from the column index to the Iceberg column ID.</span>
<a id="__codelineno-0-2101" name="__codelineno-0-2101"></a><span class="sd"> For each element, the desired metrics collection that was provided by the user in the configuration</span>
<a id="__codelineno-0-2102" name="__codelineno-0-2102"></a><span class="sd"> is computed and then adjusted according to the data type of the column. For nested columns the minimum</span>
<a id="__codelineno-0-2103" name="__codelineno-0-2103"></a><span class="sd"> and maximum values are not computed. And truncation is only applied to text of binary strings.</span>
<a id="__codelineno-0-2104" name="__codelineno-0-2104"></a>
<a id="__codelineno-0-2105" name="__codelineno-0-2105"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-2106" name="__codelineno-0-2106"></a><span class="sd"> table_properties (from pyiceberg.table.metadata.TableMetadata): The Iceberg table metadata properties.</span>
<a id="__codelineno-0-2107" name="__codelineno-0-2107"></a><span class="sd"> They are required to compute the mapping of column position to iceberg schema type id. It&#39;s also</span>
<a id="__codelineno-0-2108" name="__codelineno-0-2108"></a><span class="sd"> used to set the mode for column metrics collection</span>
<a id="__codelineno-0-2109" name="__codelineno-0-2109"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2110" name="__codelineno-0-2110"></a> <span class="n">stats_cols</span> <span class="o">=</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">PyArrowStatisticsCollector</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_properties</span><span class="p">))</span>
<a id="__codelineno-0-2111" name="__codelineno-0-2111"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2112" name="__codelineno-0-2112"></a> <span class="k">for</span> <span class="n">stats_col</span> <span class="ow">in</span> <span class="n">stats_cols</span><span class="p">:</span>
<a id="__codelineno-0-2113" name="__codelineno-0-2113"></a> <span class="n">result</span><span class="p">[</span><span class="n">stats_col</span><span class="o">.</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">stats_col</span>
<a id="__codelineno-0-2114" name="__codelineno-0-2114"></a> <span class="k">return</span> <span class="n">result</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="doc doc-heading">
<code class="highlight language-python"><span class="n">data_file_statistics_from_parquet_metadata</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="p">,</span> <span class="n">stats_columns</span><span class="p">,</span> <span class="n">parquet_column_mapping</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.data_file_statistics_from_parquet_metadata" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Compute and return DataFileStatistics that includes the following.</p>
<ul>
<li>record_count</li>
<li>column_sizes</li>
<li>value_counts</li>
<li>null_value_counts</li>
<li>nan_value_counts</li>
<li>column_aggregates</li>
<li>split_offsets</li>
</ul>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>parquet_metadata</code>
</td>
<td>
<code><span title="pyarrow.parquet.FileMetaData">FileMetaData</span></code>
</td>
<td>
<div class="doc-md-description">
<p>A pyarrow metadata object.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>stats_columns</code>
</td>
<td>
<code><span title="typing.Dict">Dict</span>[<span title="int">int</span>, <span title="pyiceberg.io.pyarrow.StatisticsCollector">StatisticsCollector</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The statistics gathering plan. It is required to
set the mode for column metrics collection</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>parquet_column_mapping</code>
</td>
<td>
<code><span title="typing.Dict">Dict</span>[<span title="str">str</span>, <span title="int">int</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>The mapping of the parquet file name to the field ID</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2253">2253</a></span>
<span class="normal"><a href="#__codelineno-0-2254">2254</a></span>
<span class="normal"><a href="#__codelineno-0-2255">2255</a></span>
<span class="normal"><a href="#__codelineno-0-2256">2256</a></span>
<span class="normal"><a href="#__codelineno-0-2257">2257</a></span>
<span class="normal"><a href="#__codelineno-0-2258">2258</a></span>
<span class="normal"><a href="#__codelineno-0-2259">2259</a></span>
<span class="normal"><a href="#__codelineno-0-2260">2260</a></span>
<span class="normal"><a href="#__codelineno-0-2261">2261</a></span>
<span class="normal"><a href="#__codelineno-0-2262">2262</a></span>
<span class="normal"><a href="#__codelineno-0-2263">2263</a></span>
<span class="normal"><a href="#__codelineno-0-2264">2264</a></span>
<span class="normal"><a href="#__codelineno-0-2265">2265</a></span>
<span class="normal"><a href="#__codelineno-0-2266">2266</a></span>
<span class="normal"><a href="#__codelineno-0-2267">2267</a></span>
<span class="normal"><a href="#__codelineno-0-2268">2268</a></span>
<span class="normal"><a href="#__codelineno-0-2269">2269</a></span>
<span class="normal"><a href="#__codelineno-0-2270">2270</a></span>
<span class="normal"><a href="#__codelineno-0-2271">2271</a></span>
<span class="normal"><a href="#__codelineno-0-2272">2272</a></span>
<span class="normal"><a href="#__codelineno-0-2273">2273</a></span>
<span class="normal"><a href="#__codelineno-0-2274">2274</a></span>
<span class="normal"><a href="#__codelineno-0-2275">2275</a></span>
<span class="normal"><a href="#__codelineno-0-2276">2276</a></span>
<span class="normal"><a href="#__codelineno-0-2277">2277</a></span>
<span class="normal"><a href="#__codelineno-0-2278">2278</a></span>
<span class="normal"><a href="#__codelineno-0-2279">2279</a></span>
<span class="normal"><a href="#__codelineno-0-2280">2280</a></span>
<span class="normal"><a href="#__codelineno-0-2281">2281</a></span>
<span class="normal"><a href="#__codelineno-0-2282">2282</a></span>
<span class="normal"><a href="#__codelineno-0-2283">2283</a></span>
<span class="normal"><a href="#__codelineno-0-2284">2284</a></span>
<span class="normal"><a href="#__codelineno-0-2285">2285</a></span>
<span class="normal"><a href="#__codelineno-0-2286">2286</a></span>
<span class="normal"><a href="#__codelineno-0-2287">2287</a></span>
<span class="normal"><a href="#__codelineno-0-2288">2288</a></span>
<span class="normal"><a href="#__codelineno-0-2289">2289</a></span>
<span class="normal"><a href="#__codelineno-0-2290">2290</a></span>
<span class="normal"><a href="#__codelineno-0-2291">2291</a></span>
<span class="normal"><a href="#__codelineno-0-2292">2292</a></span>
<span class="normal"><a href="#__codelineno-0-2293">2293</a></span>
<span class="normal"><a href="#__codelineno-0-2294">2294</a></span>
<span class="normal"><a href="#__codelineno-0-2295">2295</a></span>
<span class="normal"><a href="#__codelineno-0-2296">2296</a></span>
<span class="normal"><a href="#__codelineno-0-2297">2297</a></span>
<span class="normal"><a href="#__codelineno-0-2298">2298</a></span>
<span class="normal"><a href="#__codelineno-0-2299">2299</a></span>
<span class="normal"><a href="#__codelineno-0-2300">2300</a></span>
<span class="normal"><a href="#__codelineno-0-2301">2301</a></span>
<span class="normal"><a href="#__codelineno-0-2302">2302</a></span>
<span class="normal"><a href="#__codelineno-0-2303">2303</a></span>
<span class="normal"><a href="#__codelineno-0-2304">2304</a></span>
<span class="normal"><a href="#__codelineno-0-2305">2305</a></span>
<span class="normal"><a href="#__codelineno-0-2306">2306</a></span>
<span class="normal"><a href="#__codelineno-0-2307">2307</a></span>
<span class="normal"><a href="#__codelineno-0-2308">2308</a></span>
<span class="normal"><a href="#__codelineno-0-2309">2309</a></span>
<span class="normal"><a href="#__codelineno-0-2310">2310</a></span>
<span class="normal"><a href="#__codelineno-0-2311">2311</a></span>
<span class="normal"><a href="#__codelineno-0-2312">2312</a></span>
<span class="normal"><a href="#__codelineno-0-2313">2313</a></span>
<span class="normal"><a href="#__codelineno-0-2314">2314</a></span>
<span class="normal"><a href="#__codelineno-0-2315">2315</a></span>
<span class="normal"><a href="#__codelineno-0-2316">2316</a></span>
<span class="normal"><a href="#__codelineno-0-2317">2317</a></span>
<span class="normal"><a href="#__codelineno-0-2318">2318</a></span>
<span class="normal"><a href="#__codelineno-0-2319">2319</a></span>
<span class="normal"><a href="#__codelineno-0-2320">2320</a></span>
<span class="normal"><a href="#__codelineno-0-2321">2321</a></span>
<span class="normal"><a href="#__codelineno-0-2322">2322</a></span>
<span class="normal"><a href="#__codelineno-0-2323">2323</a></span>
<span class="normal"><a href="#__codelineno-0-2324">2324</a></span>
<span class="normal"><a href="#__codelineno-0-2325">2325</a></span>
<span class="normal"><a href="#__codelineno-0-2326">2326</a></span>
<span class="normal"><a href="#__codelineno-0-2327">2327</a></span>
<span class="normal"><a href="#__codelineno-0-2328">2328</a></span>
<span class="normal"><a href="#__codelineno-0-2329">2329</a></span>
<span class="normal"><a href="#__codelineno-0-2330">2330</a></span>
<span class="normal"><a href="#__codelineno-0-2331">2331</a></span>
<span class="normal"><a href="#__codelineno-0-2332">2332</a></span>
<span class="normal"><a href="#__codelineno-0-2333">2333</a></span>
<span class="normal"><a href="#__codelineno-0-2334">2334</a></span>
<span class="normal"><a href="#__codelineno-0-2335">2335</a></span>
<span class="normal"><a href="#__codelineno-0-2336">2336</a></span>
<span class="normal"><a href="#__codelineno-0-2337">2337</a></span>
<span class="normal"><a href="#__codelineno-0-2338">2338</a></span>
<span class="normal"><a href="#__codelineno-0-2339">2339</a></span>
<span class="normal"><a href="#__codelineno-0-2340">2340</a></span>
<span class="normal"><a href="#__codelineno-0-2341">2341</a></span>
<span class="normal"><a href="#__codelineno-0-2342">2342</a></span>
<span class="normal"><a href="#__codelineno-0-2343">2343</a></span>
<span class="normal"><a href="#__codelineno-0-2344">2344</a></span>
<span class="normal"><a href="#__codelineno-0-2345">2345</a></span>
<span class="normal"><a href="#__codelineno-0-2346">2346</a></span>
<span class="normal"><a href="#__codelineno-0-2347">2347</a></span>
<span class="normal"><a href="#__codelineno-0-2348">2348</a></span>
<span class="normal"><a href="#__codelineno-0-2349">2349</a></span>
<span class="normal"><a href="#__codelineno-0-2350">2350</a></span>
<span class="normal"><a href="#__codelineno-0-2351">2351</a></span>
<span class="normal"><a href="#__codelineno-0-2352">2352</a></span>
<span class="normal"><a href="#__codelineno-0-2353">2353</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2253" name="__codelineno-0-2253"></a><span class="k">def</span><span class="w"> </span><span class="nf">data_file_statistics_from_parquet_metadata</span><span class="p">(</span>
<a id="__codelineno-0-2254" name="__codelineno-0-2254"></a> <span class="n">parquet_metadata</span><span class="p">:</span> <span class="n">pq</span><span class="o">.</span><span class="n">FileMetaData</span><span class="p">,</span>
<a id="__codelineno-0-2255" name="__codelineno-0-2255"></a> <span class="n">stats_columns</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">StatisticsCollector</span><span class="p">],</span>
<a id="__codelineno-0-2256" name="__codelineno-0-2256"></a> <span class="n">parquet_column_mapping</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">],</span>
<a id="__codelineno-0-2257" name="__codelineno-0-2257"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFileStatistics</span><span class="p">:</span>
<a id="__codelineno-0-2258" name="__codelineno-0-2258"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-2259" name="__codelineno-0-2259"></a><span class="sd"> Compute and return DataFileStatistics that includes the following.</span>
<a id="__codelineno-0-2260" name="__codelineno-0-2260"></a>
<a id="__codelineno-0-2261" name="__codelineno-0-2261"></a><span class="sd"> - record_count</span>
<a id="__codelineno-0-2262" name="__codelineno-0-2262"></a><span class="sd"> - column_sizes</span>
<a id="__codelineno-0-2263" name="__codelineno-0-2263"></a><span class="sd"> - value_counts</span>
<a id="__codelineno-0-2264" name="__codelineno-0-2264"></a><span class="sd"> - null_value_counts</span>
<a id="__codelineno-0-2265" name="__codelineno-0-2265"></a><span class="sd"> - nan_value_counts</span>
<a id="__codelineno-0-2266" name="__codelineno-0-2266"></a><span class="sd"> - column_aggregates</span>
<a id="__codelineno-0-2267" name="__codelineno-0-2267"></a><span class="sd"> - split_offsets</span>
<a id="__codelineno-0-2268" name="__codelineno-0-2268"></a>
<a id="__codelineno-0-2269" name="__codelineno-0-2269"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-2270" name="__codelineno-0-2270"></a><span class="sd"> parquet_metadata (pyarrow.parquet.FileMetaData): A pyarrow metadata object.</span>
<a id="__codelineno-0-2271" name="__codelineno-0-2271"></a><span class="sd"> stats_columns (Dict[int, StatisticsCollector]): The statistics gathering plan. It is required to</span>
<a id="__codelineno-0-2272" name="__codelineno-0-2272"></a><span class="sd"> set the mode for column metrics collection</span>
<a id="__codelineno-0-2273" name="__codelineno-0-2273"></a><span class="sd"> parquet_column_mapping (Dict[str, int]): The mapping of the parquet file name to the field ID</span>
<a id="__codelineno-0-2274" name="__codelineno-0-2274"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2275" name="__codelineno-0-2275"></a> <span class="n">column_sizes</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2276" name="__codelineno-0-2276"></a> <span class="n">value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2277" name="__codelineno-0-2277"></a> <span class="n">split_offsets</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<a id="__codelineno-0-2278" name="__codelineno-0-2278"></a>
<a id="__codelineno-0-2279" name="__codelineno-0-2279"></a> <span class="n">null_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2280" name="__codelineno-0-2280"></a> <span class="n">nan_value_counts</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2281" name="__codelineno-0-2281"></a>
<a id="__codelineno-0-2282" name="__codelineno-0-2282"></a> <span class="n">col_aggs</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2283" name="__codelineno-0-2283"></a>
<a id="__codelineno-0-2284" name="__codelineno-0-2284"></a> <span class="n">invalidate_col</span><span class="p">:</span> <span class="n">Set</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
<a id="__codelineno-0-2285" name="__codelineno-0-2285"></a> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_row_groups</span><span class="p">):</span>
<a id="__codelineno-0-2286" name="__codelineno-0-2286"></a> <span class="c1"># References:</span>
<a id="__codelineno-0-2287" name="__codelineno-0-2287"></a> <span class="c1"># https://github.com/apache/iceberg/blob/fc381a81a1fdb8f51a0637ca27cd30673bd7aad3/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java#L232</span>
<a id="__codelineno-0-2288" name="__codelineno-0-2288"></a> <span class="c1"># https://github.com/apache/parquet-mr/blob/ac29db4611f86a07cc6877b416aa4b183e09b353/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java#L184</span>
<a id="__codelineno-0-2289" name="__codelineno-0-2289"></a>
<a id="__codelineno-0-2290" name="__codelineno-0-2290"></a> <span class="n">row_group</span> <span class="o">=</span> <span class="n">parquet_metadata</span><span class="o">.</span><span class="n">row_group</span><span class="p">(</span><span class="n">r</span><span class="p">)</span>
<a id="__codelineno-0-2291" name="__codelineno-0-2291"></a>
<a id="__codelineno-0-2292" name="__codelineno-0-2292"></a> <span class="n">data_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">data_page_offset</span>
<a id="__codelineno-0-2293" name="__codelineno-0-2293"></a> <span class="n">dictionary_offset</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">dictionary_page_offset</span>
<a id="__codelineno-0-2294" name="__codelineno-0-2294"></a>
<a id="__codelineno-0-2295" name="__codelineno-0-2295"></a> <span class="k">if</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">has_dictionary_page</span> <span class="ow">and</span> <span class="n">dictionary_offset</span> <span class="o">&lt;</span> <span class="n">data_offset</span><span class="p">:</span>
<a id="__codelineno-0-2296" name="__codelineno-0-2296"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dictionary_offset</span><span class="p">)</span>
<a id="__codelineno-0-2297" name="__codelineno-0-2297"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-2298" name="__codelineno-0-2298"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">data_offset</span><span class="p">)</span>
<a id="__codelineno-0-2299" name="__codelineno-0-2299"></a>
<a id="__codelineno-0-2300" name="__codelineno-0-2300"></a> <span class="k">for</span> <span class="n">pos</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_columns</span><span class="p">):</span>
<a id="__codelineno-0-2301" name="__codelineno-0-2301"></a> <span class="n">column</span> <span class="o">=</span> <span class="n">row_group</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="n">pos</span><span class="p">)</span>
<a id="__codelineno-0-2302" name="__codelineno-0-2302"></a> <span class="n">field_id</span> <span class="o">=</span> <span class="n">parquet_column_mapping</span><span class="p">[</span><span class="n">column</span><span class="o">.</span><span class="n">path_in_schema</span><span class="p">]</span>
<a id="__codelineno-0-2303" name="__codelineno-0-2303"></a>
<a id="__codelineno-0-2304" name="__codelineno-0-2304"></a> <span class="n">stats_col</span> <span class="o">=</span> <span class="n">stats_columns</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span>
<a id="__codelineno-0-2305" name="__codelineno-0-2305"></a>
<a id="__codelineno-0-2306" name="__codelineno-0-2306"></a> <span class="n">column_sizes</span><span class="o">.</span><span class="n">setdefault</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<a id="__codelineno-0-2307" name="__codelineno-0-2307"></a> <span class="n">column_sizes</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">+=</span> <span class="n">column</span><span class="o">.</span><span class="n">total_compressed_size</span>
<a id="__codelineno-0-2308" name="__codelineno-0-2308"></a>
<a id="__codelineno-0-2309" name="__codelineno-0-2309"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">NONE</span><span class="p">):</span>
<a id="__codelineno-0-2310" name="__codelineno-0-2310"></a> <span class="k">continue</span>
<a id="__codelineno-0-2311" name="__codelineno-0-2311"></a>
<a id="__codelineno-0-2312" name="__codelineno-0-2312"></a> <span class="n">value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">column</span><span class="o">.</span><span class="n">num_values</span>
<a id="__codelineno-0-2313" name="__codelineno-0-2313"></a>
<a id="__codelineno-0-2314" name="__codelineno-0-2314"></a> <span class="k">if</span> <span class="n">column</span><span class="o">.</span><span class="n">is_stats_set</span><span class="p">:</span>
<a id="__codelineno-0-2315" name="__codelineno-0-2315"></a> <span class="k">try</span><span class="p">:</span>
<a id="__codelineno-0-2316" name="__codelineno-0-2316"></a> <span class="n">statistics</span> <span class="o">=</span> <span class="n">column</span><span class="o">.</span><span class="n">statistics</span>
<a id="__codelineno-0-2317" name="__codelineno-0-2317"></a>
<a id="__codelineno-0-2318" name="__codelineno-0-2318"></a> <span class="k">if</span> <span class="n">statistics</span><span class="o">.</span><span class="n">has_null_count</span><span class="p">:</span>
<a id="__codelineno-0-2319" name="__codelineno-0-2319"></a> <span class="n">null_value_counts</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">statistics</span><span class="o">.</span><span class="n">null_count</span>
<a id="__codelineno-0-2320" name="__codelineno-0-2320"></a>
<a id="__codelineno-0-2321" name="__codelineno-0-2321"></a> <span class="k">if</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span> <span class="o">==</span> <span class="n">MetricsMode</span><span class="p">(</span><span class="n">MetricModeTypes</span><span class="o">.</span><span class="n">COUNTS</span><span class="p">):</span>
<a id="__codelineno-0-2322" name="__codelineno-0-2322"></a> <span class="k">continue</span>
<a id="__codelineno-0-2323" name="__codelineno-0-2323"></a>
<a id="__codelineno-0-2324" name="__codelineno-0-2324"></a> <span class="k">if</span> <span class="n">field_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">col_aggs</span><span class="p">:</span>
<a id="__codelineno-0-2325" name="__codelineno-0-2325"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">StatsAggregator</span><span class="p">(</span>
<a id="__codelineno-0-2326" name="__codelineno-0-2326"></a> <span class="n">stats_col</span><span class="o">.</span><span class="n">iceberg_type</span><span class="p">,</span> <span class="n">statistics</span><span class="o">.</span><span class="n">physical_type</span><span class="p">,</span> <span class="n">stats_col</span><span class="o">.</span><span class="n">mode</span><span class="o">.</span><span class="n">length</span>
<a id="__codelineno-0-2327" name="__codelineno-0-2327"></a> <span class="p">)</span>
<a id="__codelineno-0-2328" name="__codelineno-0-2328"></a>
<a id="__codelineno-0-2329" name="__codelineno-0-2329"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_min</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">min</span><span class="p">)</span>
<a id="__codelineno-0-2330" name="__codelineno-0-2330"></a> <span class="n">col_aggs</span><span class="p">[</span><span class="n">field_id</span><span class="p">]</span><span class="o">.</span><span class="n">update_max</span><span class="p">(</span><span class="n">statistics</span><span class="o">.</span><span class="n">max</span><span class="p">)</span>
<a id="__codelineno-0-2331" name="__codelineno-0-2331"></a>
<a id="__codelineno-0-2332" name="__codelineno-0-2332"></a> <span class="k">except</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">lib</span><span class="o">.</span><span class="n">ArrowNotImplementedError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<a id="__codelineno-0-2333" name="__codelineno-0-2333"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span>
<a id="__codelineno-0-2334" name="__codelineno-0-2334"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
<a id="__codelineno-0-2335" name="__codelineno-0-2335"></a> <span class="k">else</span><span class="p">:</span>
<a id="__codelineno-0-2336" name="__codelineno-0-2336"></a> <span class="n">invalidate_col</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">field_id</span><span class="p">)</span>
<a id="__codelineno-0-2337" name="__codelineno-0-2337"></a> <span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;PyArrow statistics missing for column </span><span class="si">%d</span><span class="s2"> when writing file&quot;</span><span class="p">,</span> <span class="n">pos</span><span class="p">)</span>
<a id="__codelineno-0-2338" name="__codelineno-0-2338"></a>
<a id="__codelineno-0-2339" name="__codelineno-0-2339"></a> <span class="n">split_offsets</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
<a id="__codelineno-0-2340" name="__codelineno-0-2340"></a>
<a id="__codelineno-0-2341" name="__codelineno-0-2341"></a> <span class="k">for</span> <span class="n">field_id</span> <span class="ow">in</span> <span class="n">invalidate_col</span><span class="p">:</span>
<a id="__codelineno-0-2342" name="__codelineno-0-2342"></a> <span class="n">col_aggs</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<a id="__codelineno-0-2343" name="__codelineno-0-2343"></a> <span class="n">null_value_counts</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">field_id</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<a id="__codelineno-0-2344" name="__codelineno-0-2344"></a>
<a id="__codelineno-0-2345" name="__codelineno-0-2345"></a> <span class="k">return</span> <span class="n">DataFileStatistics</span><span class="p">(</span>
<a id="__codelineno-0-2346" name="__codelineno-0-2346"></a> <span class="n">record_count</span><span class="o">=</span><span class="n">parquet_metadata</span><span class="o">.</span><span class="n">num_rows</span><span class="p">,</span>
<a id="__codelineno-0-2347" name="__codelineno-0-2347"></a> <span class="n">column_sizes</span><span class="o">=</span><span class="n">column_sizes</span><span class="p">,</span>
<a id="__codelineno-0-2348" name="__codelineno-0-2348"></a> <span class="n">value_counts</span><span class="o">=</span><span class="n">value_counts</span><span class="p">,</span>
<a id="__codelineno-0-2349" name="__codelineno-0-2349"></a> <span class="n">null_value_counts</span><span class="o">=</span><span class="n">null_value_counts</span><span class="p">,</span>
<a id="__codelineno-0-2350" name="__codelineno-0-2350"></a> <span class="n">nan_value_counts</span><span class="o">=</span><span class="n">nan_value_counts</span><span class="p">,</span>
<a id="__codelineno-0-2351" name="__codelineno-0-2351"></a> <span class="n">column_aggregates</span><span class="o">=</span><span class="n">col_aggs</span><span class="p">,</span>
<a id="__codelineno-0-2352" name="__codelineno-0-2352"></a> <span class="n">split_offsets</span><span class="o">=</span><span class="n">split_offsets</span><span class="p">,</span>
<a id="__codelineno-0-2353" name="__codelineno-0-2353"></a> <span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="doc doc-heading">
<code class="highlight language-python"><span class="n">parquet_path_to_id_mapping</span><span class="p">(</span><span class="n">schema</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.parquet_path_to_id_mapping" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Compute the mapping of parquet column path to Iceberg ID.</p>
<p>For each column, the parquet file metadata has a path_in_schema attribute that follows
a specific naming scheme for nested columnds. This function computes a mapping of
the full paths to the corresponding Iceberg IDs.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>schema</code>
</td>
<td>
<code><a class="autorefs autorefs-internal" title="pyiceberg.schema.Schema" href="../../schema/#pyiceberg.schema.Schema">Schema</a></code>
</td>
<td>
<div class="doc-md-description">
<p>The current table schema.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-2170">2170</a></span>
<span class="normal"><a href="#__codelineno-0-2171">2171</a></span>
<span class="normal"><a href="#__codelineno-0-2172">2172</a></span>
<span class="normal"><a href="#__codelineno-0-2173">2173</a></span>
<span class="normal"><a href="#__codelineno-0-2174">2174</a></span>
<span class="normal"><a href="#__codelineno-0-2175">2175</a></span>
<span class="normal"><a href="#__codelineno-0-2176">2176</a></span>
<span class="normal"><a href="#__codelineno-0-2177">2177</a></span>
<span class="normal"><a href="#__codelineno-0-2178">2178</a></span>
<span class="normal"><a href="#__codelineno-0-2179">2179</a></span>
<span class="normal"><a href="#__codelineno-0-2180">2180</a></span>
<span class="normal"><a href="#__codelineno-0-2181">2181</a></span>
<span class="normal"><a href="#__codelineno-0-2182">2182</a></span>
<span class="normal"><a href="#__codelineno-0-2183">2183</a></span>
<span class="normal"><a href="#__codelineno-0-2184">2184</a></span>
<span class="normal"><a href="#__codelineno-0-2185">2185</a></span>
<span class="normal"><a href="#__codelineno-0-2186">2186</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-2170" name="__codelineno-0-2170"></a><span class="k">def</span><span class="w"> </span><span class="nf">parquet_path_to_id_mapping</span><span class="p">(</span>
<a id="__codelineno-0-2171" name="__codelineno-0-2171"></a> <span class="n">schema</span><span class="p">:</span> <span class="n">Schema</span><span class="p">,</span>
<a id="__codelineno-0-2172" name="__codelineno-0-2172"></a><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span>
<a id="__codelineno-0-2173" name="__codelineno-0-2173"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<a id="__codelineno-0-2174" name="__codelineno-0-2174"></a><span class="sd"> Compute the mapping of parquet column path to Iceberg ID.</span>
<a id="__codelineno-0-2175" name="__codelineno-0-2175"></a>
<a id="__codelineno-0-2176" name="__codelineno-0-2176"></a><span class="sd"> For each column, the parquet file metadata has a path_in_schema attribute that follows</span>
<a id="__codelineno-0-2177" name="__codelineno-0-2177"></a><span class="sd"> a specific naming scheme for nested columnds. This function computes a mapping of</span>
<a id="__codelineno-0-2178" name="__codelineno-0-2178"></a><span class="sd"> the full paths to the corresponding Iceberg IDs.</span>
<a id="__codelineno-0-2179" name="__codelineno-0-2179"></a>
<a id="__codelineno-0-2180" name="__codelineno-0-2180"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-2181" name="__codelineno-0-2181"></a><span class="sd"> schema (pyiceberg.schema.Schema): The current table schema.</span>
<a id="__codelineno-0-2182" name="__codelineno-0-2182"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-2183" name="__codelineno-0-2183"></a> <span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<a id="__codelineno-0-2184" name="__codelineno-0-2184"></a> <span class="k">for</span> <span class="n">pair</span> <span class="ow">in</span> <span class="n">pre_order_visit</span><span class="p">(</span><span class="n">schema</span><span class="p">,</span> <span class="n">ID2ParquetPathVisitor</span><span class="p">()):</span>
<a id="__codelineno-0-2185" name="__codelineno-0-2185"></a> <span class="n">result</span><span class="p">[</span><span class="n">pair</span><span class="o">.</span><span class="n">parquet_path</span><span class="p">]</span> <span class="o">=</span> <span class="n">pair</span><span class="o">.</span><span class="n">field_id</span>
<a id="__codelineno-0-2186" name="__codelineno-0-2186"></a> <span class="k">return</span> <span class="n">result</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
<div class="doc doc-object doc-function">
<h2 id="pyiceberg.io.pyarrow.visit_pyarrow" class="doc doc-heading">
<code class="highlight language-python"><span class="n">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">visitor</span><span class="p">)</span></code>
<a href="#pyiceberg.io.pyarrow.visit_pyarrow" class="headerlink" title="Permanent link">&para;</a></h2>
<div class="doc doc-contents ">
<p>Apply a pyarrow schema visitor to any point within a schema.</p>
<p>The function traverses the schema in post-order fashion.</p>
<p><span class="doc-section-title">Parameters:</span></p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Description</th>
<th>Default</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code>obj</code>
</td>
<td>
<code><span title="typing.Union">Union</span>[<span title="pyarrow.DataType">DataType</span>, <span title="pyarrow.Schema">Schema</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>An instance of a Schema or an IcebergType.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
<tr class="doc-section-item">
<td>
<code>visitor</code>
</td>
<td>
<code><a class="autorefs autorefs-internal" title="pyiceberg.io.pyarrow.PyArrowSchemaVisitor" href="#pyiceberg.io.pyarrow.PyArrowSchemaVisitor">PyArrowSchemaVisitor</a>[<span title="pyiceberg.io.pyarrow.T">T</span>]</code>
</td>
<td>
<div class="doc-md-description">
<p>An instance of an implementation of the generic PyarrowSchemaVisitor base class.</p>
</div>
</td>
<td>
<em>required</em>
</td>
</tr>
</tbody>
</table>
<p><span class="doc-section-title">Raises:</span></p>
<table>
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="doc-section-item">
<td>
<code><span title="NotImplementedError">NotImplementedError</span></code>
</td>
<td>
<div class="doc-md-description">
<p>If attempting to visit an unrecognized object type.</p>
</div>
</td>
</tr>
</tbody>
</table>
<details class="quote">
<summary>Source code in <code>pyiceberg/io/pyarrow.py</code></summary>
<div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"><a href="#__codelineno-0-962">962</a></span>
<span class="normal"><a href="#__codelineno-0-963">963</a></span>
<span class="normal"><a href="#__codelineno-0-964">964</a></span>
<span class="normal"><a href="#__codelineno-0-965">965</a></span>
<span class="normal"><a href="#__codelineno-0-966">966</a></span>
<span class="normal"><a href="#__codelineno-0-967">967</a></span>
<span class="normal"><a href="#__codelineno-0-968">968</a></span>
<span class="normal"><a href="#__codelineno-0-969">969</a></span>
<span class="normal"><a href="#__codelineno-0-970">970</a></span>
<span class="normal"><a href="#__codelineno-0-971">971</a></span>
<span class="normal"><a href="#__codelineno-0-972">972</a></span>
<span class="normal"><a href="#__codelineno-0-973">973</a></span>
<span class="normal"><a href="#__codelineno-0-974">974</a></span>
<span class="normal"><a href="#__codelineno-0-975">975</a></span></pre></div></td><td class="code"><div><pre><span></span><code><a id="__codelineno-0-962" name="__codelineno-0-962"></a><span class="nd">@singledispatch</span>
<a id="__codelineno-0-963" name="__codelineno-0-963"></a><span class="k">def</span><span class="w"> </span><span class="nf">visit_pyarrow</span><span class="p">(</span><span class="n">obj</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">DataType</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Schema</span><span class="p">],</span> <span class="n">visitor</span><span class="p">:</span> <span class="n">PyArrowSchemaVisitor</span><span class="p">[</span><span class="n">T</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<a id="__codelineno-0-964" name="__codelineno-0-964"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;Apply a pyarrow schema visitor to any point within a schema.</span>
<a id="__codelineno-0-965" name="__codelineno-0-965"></a>
<a id="__codelineno-0-966" name="__codelineno-0-966"></a><span class="sd"> The function traverses the schema in post-order fashion.</span>
<a id="__codelineno-0-967" name="__codelineno-0-967"></a>
<a id="__codelineno-0-968" name="__codelineno-0-968"></a><span class="sd"> Args:</span>
<a id="__codelineno-0-969" name="__codelineno-0-969"></a><span class="sd"> obj (Union[pa.DataType, pa.Schema]): An instance of a Schema or an IcebergType.</span>
<a id="__codelineno-0-970" name="__codelineno-0-970"></a><span class="sd"> visitor (PyArrowSchemaVisitor[T]): An instance of an implementation of the generic PyarrowSchemaVisitor base class.</span>
<a id="__codelineno-0-971" name="__codelineno-0-971"></a>
<a id="__codelineno-0-972" name="__codelineno-0-972"></a><span class="sd"> Raises:</span>
<a id="__codelineno-0-973" name="__codelineno-0-973"></a><span class="sd"> NotImplementedError: If attempting to visit an unrecognized object type.</span>
<a id="__codelineno-0-974" name="__codelineno-0-974"></a><span class="sd"> &quot;&quot;&quot;</span>
<a id="__codelineno-0-975" name="__codelineno-0-975"></a> <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Cannot visit non-type: </span><span class="si">{</span><span class="n">obj</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
</code></pre></div></td></tr></table></div>
</details>
</div>
</div>
</div>
</div>
</div>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../../..", "features": ["navigation.top", "navigation.tracking", "navigation.tabs", "navigation.tabs.sticky"], "search": "../../../../assets/javascripts/workers/search.f8cc74c7.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
<script src="../../../../assets/javascripts/bundle.f1b6f286.min.js"></script>
</body>
</html>